@@ -46,33 +46,55 @@ type PrettyTokenSerializer = ESTreeSerializer<TokenConfig, PrettyFormatter>;
4646pub struct EstreeToken < ' a > {
4747 pub token_type : TokenType ,
4848 pub value : & ' a str ,
49- pub regex : Option < EstreeRegExpToken < ' a > > ,
5049 pub span : Span ,
5150}
5251
53- pub struct EstreeRegExpToken < ' a > {
54- pub pattern : & ' a str ,
55- pub flags : & ' a str ,
56- }
57-
5852impl ESTree for EstreeToken < ' _ > {
5953 fn serialize < S : Serializer > ( & self , serializer : S ) {
6054 let mut state = serializer. serialize_struct ( ) ;
6155 state. serialize_field ( "type" , & JsonSafeString ( self . token_type . as_str ( ) ) ) ;
6256 state. serialize_field ( "value" , & self . value ) ;
63- if let Some ( regex) = & self . regex {
64- state. serialize_field ( "regex" , regex) ;
65- }
6657 state. serialize_span ( self . span ) ;
6758 state. end ( ) ;
6859 }
6960}
7061
62+ /// Token type for RegExps.
63+ ///
64+ /// This is a separate type from `EstreeToken` because RegExp tokens have a nested `regex` object
65+ /// containing `flags` and `pattern`, and the token type is always `"RegularExpression"`.
66+ /// Pattern is taken from the AST node (`RegExpLiteral.regex.pattern.text`), and flags are sliced
67+ /// from source text to preserve the original order (the AST stores flags as a bitfield which
68+ /// would alphabetize them).
69+ struct EstreeRegExpToken < ' a > {
70+ value : & ' a str ,
71+ regex : RegExpData < ' a > ,
72+ span : Span ,
73+ }
74+
75+ /// The `regex` sub-object inside a `RegularExpression` token.
76+ struct RegExpData < ' a > {
77+ pattern : & ' a str ,
78+ flags : & ' a str ,
79+ }
80+
7181impl ESTree for EstreeRegExpToken < ' _ > {
82+ fn serialize < S : Serializer > ( & self , serializer : S ) {
83+ let mut state = serializer. serialize_struct ( ) ;
84+ state. serialize_field ( "type" , & JsonSafeString ( "RegularExpression" ) ) ;
85+ state. serialize_field ( "value" , & self . value ) ;
86+ state. serialize_field ( "regex" , & self . regex ) ;
87+ state. serialize_span ( self . span ) ;
88+ state. end ( ) ;
89+ }
90+ }
91+
92+ impl ESTree for RegExpData < ' _ > {
7293 fn serialize < S : Serializer > ( & self , serializer : S ) {
7394 let mut state = serializer. serialize_struct ( ) ;
7495 state. serialize_field ( "pattern" , & self . pattern ) ;
75- state. serialize_field ( "flags" , & self . flags ) ;
96+ // Flags are single ASCII letters (d, g, i, m, s, u, v, y) — always JSON-safe
97+ state. serialize_field ( "flags" , & JsonSafeString ( self . flags ) ) ;
7698 state. end ( ) ;
7799 }
78100}
@@ -397,33 +419,21 @@ impl<'b, S: SequenceSerializer> EstreeTokenContext<'b, S> {
397419 unreachable ! ( "Expected token at position {start}" ) ;
398420 }
399421
400- /// Serialize a single token.
422+ /// Serialize a single token using its raw source text as the value .
401423 fn emit_token ( & mut self , token : & Token , token_type : TokenType ) {
402424 let value = & self . source_text [ token. start ( ) as usize ..token. end ( ) as usize ] ;
403- let regex = if token. kind ( ) == Kind :: RegExp {
404- regex_parts ( value) . map ( |( pattern, flags) | EstreeRegExpToken { pattern, flags } )
405- } else {
406- None
407- } ;
408- self . serialize_token ( token, token_type, value, regex) ;
425+ self . serialize_token ( token, token_type, value) ;
409426 }
410427
411428 /// Convert span to UTF-16 and serialize token.
412- fn serialize_token (
413- & mut self ,
414- token : & Token ,
415- token_type : TokenType ,
416- value : & str ,
417- regex : Option < EstreeRegExpToken < ' _ > > ,
418- ) {
429+ fn serialize_token ( & mut self , token : & Token , token_type : TokenType , value : & str ) {
419430 // Convert offsets to UTF-16
420431 let mut span = Span :: new ( token. start ( ) , token. end ( ) ) ;
421432 if let Some ( converter) = self . span_converter . as_mut ( ) {
422433 converter. convert_span ( & mut span) ;
423434 }
424435
425- let estree_token = EstreeToken { token_type, value, regex, span } ;
426- self . seq . serialize_element ( & estree_token) ;
436+ self . seq . serialize_element ( & EstreeToken { token_type, value, span } ) ;
427437 }
428438
429439 /// Serialize a token whose value is guaranteed JSON-safe, skipping escape-checking.
@@ -561,12 +571,31 @@ impl<'a, S: SequenceSerializer> Visit<'a> for EstreeTokenContext<'_, S> {
561571 fn emit < S : SequenceSerializer > ( ctx : & mut EstreeTokenContext < ' _ , S > , token : & Token ) {
562572 // Strip leading `#`
563573 let value = & ctx. source_text [ token. start ( ) as usize + 1 ..token. end ( ) as usize ] ;
564- ctx. serialize_token ( token, TokenType :: new ( "PrivateIdentifier" ) , value, None ) ;
574+ ctx. serialize_token ( token, TokenType :: new ( "PrivateIdentifier" ) , value) ;
565575 }
566576 emit ( self , token) ;
567577 }
568578 }
569579
580+ fn visit_reg_exp_literal ( & mut self , regexp : & RegExpLiteral < ' a > ) {
581+ let token = self . advance_to ( regexp. span . start ) ;
582+
583+ let value = regexp. raw . as_deref ( ) . unwrap ( ) ;
584+ let pattern = regexp. regex . pattern . text . as_str ( ) ;
585+
586+ // Flags start after opening `/`, pattern, and closing `/`
587+ let flags = & value[ pattern. len ( ) + 2 ..] ;
588+ let regex = RegExpData { pattern, flags } ;
589+
590+ // Convert offsets to UTF-16
591+ let mut span = Span :: new ( token. start ( ) , token. end ( ) ) ;
592+ if let Some ( converter) = self . span_converter . as_mut ( ) {
593+ converter. convert_span ( & mut span) ;
594+ }
595+
596+ self . seq . serialize_element ( & EstreeRegExpToken { value, regex, span } ) ;
597+ }
598+
570599 fn visit_ts_this_parameter ( & mut self , parameter : & TSThisParameter < ' a > ) {
571600 self . emit_token_at ( parameter. this_span . start , TokenType :: new ( "Identifier" ) ) ;
572601 walk:: walk_ts_this_parameter ( self , parameter) ;
@@ -708,29 +737,3 @@ fn get_token_type(kind: Kind) -> TokenType {
708737 _ => TokenType :: new ( "Punctuator" ) ,
709738 }
710739}
711-
712- fn regex_parts ( raw : & str ) -> Option < ( & str , & str ) > {
713- let bytes = raw. as_bytes ( ) ;
714- if bytes. first ( ) != Some ( & b'/' ) {
715- return None ;
716- }
717-
718- let mut escaped = false ;
719- let mut in_character_class = false ;
720- for index in 1 ..bytes. len ( ) {
721- let byte = bytes[ index] ;
722- if escaped {
723- escaped = false ;
724- continue ;
725- }
726- match byte {
727- b'\\' => escaped = true ,
728- b'[' if !in_character_class => in_character_class = true ,
729- b']' if in_character_class => in_character_class = false ,
730- b'/' if !in_character_class => return Some ( ( & raw [ 1 ..index] , & raw [ index + 1 ..] ) ) ,
731- _ => { }
732- }
733- }
734-
735- None
736- }
0 commit comments