@@ -16,6 +16,108 @@ use oxc_estree::{
1616use oxc_parser:: { Kind , Token } ;
1717use oxc_span:: { GetSpan , Span } ;
1818
19+ /// Options for serializing tokens.
20+ ///
21+ /// Espree (`test262`) and TS-ESLint (`typescript`) differ in several ways:
22+ ///
23+ /// * `yield`, `let`, `static` used as identifiers (`obj = { yield: 1, let: 2, static: 3 };`)
24+ /// * Espree emits these as `Keyword` tokens.
25+ /// * TS-ESLint as `Identifier` tokens.
26+ /// * Escaped identifiers (e.g. `\u0061`)
27+ /// * Espree decodes escapes in the token `value`.
28+ /// * TS-ESLint preserves the raw source text.
29+ /// * JSX namespaced names (`<ns:tag>`)
30+ /// * Espree emits `JSXIdentifier` tokens for both parts,
31+ /// * TS-ESLint leaves them as their default token type (`Identifier`).
32+ /// * Member expressions in JSX expressions (`<C x={a.b}>`)
33+ /// * Espree emits them as `Identifier` tokens.
34+ /// * TS-ESLint emits `JSXIdentifier` tokens for non-computed member expression identifiers
35+ /// inside JSX expression containers.
36+ #[ derive( Debug , Clone , Copy ) ]
37+ pub struct ESTreeTokenOptions {
38+ pub exclude_legacy_keyword_identifiers : bool ,
39+ pub decode_identifier_escapes : bool ,
40+ pub jsx_namespace_jsx_identifiers : bool ,
41+ pub member_expr_in_jsx_expression_jsx_identifiers : bool ,
42+ }
43+
44+ impl ESTreeTokenOptions {
45+ pub const fn test262 ( ) -> Self {
46+ Self {
47+ exclude_legacy_keyword_identifiers : true ,
48+ decode_identifier_escapes : true ,
49+ jsx_namespace_jsx_identifiers : true ,
50+ member_expr_in_jsx_expression_jsx_identifiers : false ,
51+ }
52+ }
53+
54+ pub const fn typescript ( ) -> Self {
55+ Self {
56+ exclude_legacy_keyword_identifiers : false ,
57+ decode_identifier_escapes : false ,
58+ jsx_namespace_jsx_identifiers : false ,
59+ member_expr_in_jsx_expression_jsx_identifiers : true ,
60+ }
61+ }
62+
63+ pub const fn linter ( ) -> Self {
64+ Self {
65+ exclude_legacy_keyword_identifiers : true ,
66+ decode_identifier_escapes : false ,
67+ jsx_namespace_jsx_identifiers : true ,
68+ member_expr_in_jsx_expression_jsx_identifiers : false ,
69+ }
70+ }
71+ }
72+
73+ /// Serialize tokens to JSON.
74+ ///
75+ /// `program` must have unconverted UTF-8 byte offset spans (as returned by the parser).
76+ /// Token span conversion to UTF-16 is handled internally.
77+ ///
78+ /// `source_text` must be the original source text, prior to BOM removal.
79+ /// i.e. if the file has a BOM, it must be present at the start of `source_text`.
80+ pub fn to_estree_tokens_json (
81+ tokens : & [ Token ] ,
82+ program : & Program < ' _ > ,
83+ source_text : & str ,
84+ span_converter : & Utf8ToUtf16 ,
85+ options : ESTreeTokenOptions ,
86+ ) -> String {
87+ // Estimated size of a single token serialized to JSON, in bytes.
88+ // TODO: Estimate this better based on real-world usage.
89+ const BYTES_PER_TOKEN : usize = 64 ;
90+
91+ let mut serializer =
92+ CompactTokenSerializer :: with_capacity ( tokens. len ( ) * BYTES_PER_TOKEN , false ) ;
93+ serialize_tokens ( & mut serializer, tokens, program, source_text, span_converter, options) ;
94+ serializer. into_string ( )
95+ }
96+
97+ /// Serialize tokens to pretty-printed JSON.
98+ ///
99+ /// `program` must have unconverted UTF-8 byte offset spans (as returned by the parser).
100+ /// Token span conversion to UTF-16 is handled internally.
101+ ///
102+ /// `source_text` must be the original source text, prior to BOM removal.
103+ /// i.e. if the file has a BOM, it must be present at the start of `source_text`.
104+ pub fn to_estree_tokens_pretty_json (
105+ tokens : & [ Token ] ,
106+ program : & Program < ' _ > ,
107+ source_text : & str ,
108+ span_converter : & Utf8ToUtf16 ,
109+ options : ESTreeTokenOptions ,
110+ ) -> String {
111+ // Estimated size of a single token serialized to JSON, in bytes.
112+ // TODO: Estimate this better based on real-world usage.
113+ const BYTES_PER_TOKEN : usize = 64 ;
114+
115+ let mut serializer =
116+ PrettyTokenSerializer :: with_capacity ( tokens. len ( ) * BYTES_PER_TOKEN , false ) ;
117+ serialize_tokens ( & mut serializer, tokens, program, source_text, span_converter, options) ;
118+ serializer. into_string ( )
119+ }
120+
19121/// Serializer config for tokens.
20122/// We never include ranges, so use this custom config which returns `false` for `ranges()`.
21123/// This allows compiler to remove the branch which checks whether to print ranges in `serialize_span`.
@@ -234,108 +336,6 @@ mod u32_string {
234336}
235337use u32_string:: U32String ;
236338
237- /// Options for serializing tokens.
238- ///
239- /// Espree (`test262`) and TS-ESLint (`typescript`) differ in several ways:
240- ///
241- /// * `yield`, `let`, `static` used as identifiers (`obj = { yield: 1, let: 2, static: 3 };`)
242- /// * Espree emits these as `Keyword` tokens.
243- /// * TS-ESLint as `Identifier` tokens.
244- /// * Escaped identifiers (e.g. `\u0061`)
245- /// * Espree decodes escapes in the token `value`.
246- /// * TS-ESLint preserves the raw source text.
247- /// * JSX namespaced names (`<ns:tag>`)
248- /// * Espree emits `JSXIdentifier` tokens for both parts,
249- /// * TS-ESLint leaves them as their default token type (`Identifier`).
250- /// * Member expressions in JSX expressions (`<C x={a.b}>`)
251- /// * Espree emits them as `Identifier` tokens.
252- /// * TS-ESLint emits `JSXIdentifier` tokens for non-computed member expression identifiers
253- /// inside JSX expression containers.
254- #[ derive( Debug , Clone , Copy ) ]
255- pub struct ESTreeTokenOptions {
256- pub exclude_legacy_keyword_identifiers : bool ,
257- pub decode_identifier_escapes : bool ,
258- pub jsx_namespace_jsx_identifiers : bool ,
259- pub member_expr_in_jsx_expression_jsx_identifiers : bool ,
260- }
261-
262- impl ESTreeTokenOptions {
263- pub const fn test262 ( ) -> Self {
264- Self {
265- exclude_legacy_keyword_identifiers : true ,
266- decode_identifier_escapes : true ,
267- jsx_namespace_jsx_identifiers : true ,
268- member_expr_in_jsx_expression_jsx_identifiers : false ,
269- }
270- }
271-
272- pub const fn typescript ( ) -> Self {
273- Self {
274- exclude_legacy_keyword_identifiers : false ,
275- decode_identifier_escapes : false ,
276- jsx_namespace_jsx_identifiers : false ,
277- member_expr_in_jsx_expression_jsx_identifiers : true ,
278- }
279- }
280-
281- pub const fn linter ( ) -> Self {
282- Self {
283- exclude_legacy_keyword_identifiers : true ,
284- decode_identifier_escapes : false ,
285- jsx_namespace_jsx_identifiers : true ,
286- member_expr_in_jsx_expression_jsx_identifiers : false ,
287- }
288- }
289- }
290-
291- /// Serialize tokens to JSON.
292- ///
293- /// `program` must have unconverted UTF-8 byte offset spans (as returned by the parser).
294- /// Token span conversion to UTF-16 is handled internally.
295- ///
296- /// `source_text` must be the original source text, prior to BOM removal.
297- /// i.e. if the file has a BOM, it must be present at the start of `source_text`.
298- pub fn to_estree_tokens_json (
299- tokens : & [ Token ] ,
300- program : & Program < ' _ > ,
301- source_text : & str ,
302- span_converter : & Utf8ToUtf16 ,
303- options : ESTreeTokenOptions ,
304- ) -> String {
305- // Estimated size of a single token serialized to JSON, in bytes.
306- // TODO: Estimate this better based on real-world usage.
307- const BYTES_PER_TOKEN : usize = 64 ;
308-
309- let mut serializer =
310- CompactTokenSerializer :: with_capacity ( tokens. len ( ) * BYTES_PER_TOKEN , false ) ;
311- serialize_tokens ( & mut serializer, tokens, program, source_text, span_converter, options) ;
312- serializer. into_string ( )
313- }
314-
315- /// Serialize tokens to pretty-printed JSON.
316- ///
317- /// `program` must have unconverted UTF-8 byte offset spans (as returned by the parser).
318- /// Token span conversion to UTF-16 is handled internally.
319- ///
320- /// `source_text` must be the original source text, prior to BOM removal.
321- /// i.e. if the file has a BOM, it must be present at the start of `source_text`.
322- pub fn to_estree_tokens_pretty_json (
323- tokens : & [ Token ] ,
324- program : & Program < ' _ > ,
325- source_text : & str ,
326- span_converter : & Utf8ToUtf16 ,
327- options : ESTreeTokenOptions ,
328- ) -> String {
329- // Estimated size of a single token serialized to JSON, in bytes.
330- // TODO: Estimate this better based on real-world usage.
331- const BYTES_PER_TOKEN : usize = 64 ;
332-
333- let mut serializer =
334- PrettyTokenSerializer :: with_capacity ( tokens. len ( ) * BYTES_PER_TOKEN , false ) ;
335- serialize_tokens ( & mut serializer, tokens, program, source_text, span_converter, options) ;
336- serializer. into_string ( )
337- }
338-
339339/// Walk AST and serialize each token into the serializer as it's encountered.
340340///
341341/// Tokens are consumed from the `tokens` slice in source order.
0 commit comments