@@ -119,6 +119,20 @@ impl<'a, C: Config> Lexer<'a, C> {
119119 ) -> Self {
120120 let source = Source :: new ( source_text, unique) ;
121121
122+ // If collecting tokens, allocate enough space so that the `Vec<Token>` will not have to grow during parsing.
123+ // `source_text.len()` is almost always a large overestimate of number of tokens, but it's impossible to have
124+ // more than N tokens in a file which is N bytes long, so it'll never be an underestimate.
125+ //
126+ // Our largest benchmark file `binder.ts` is 190 KB, and `Token` is 16 bytes, so the `Vec<Token>`
127+ // would be ~3 MB even in the case of this unusually large file. That's not a huge amount of memory.
128+ //
129+ // However, we should choose a better heuristic based on real-world observation, and bring this usage down.
130+ let tokens = if config. tokens ( ) {
131+ ArenaVec :: with_capacity_in ( source_text. len ( ) , allocator)
132+ } else {
133+ ArenaVec :: new_in ( allocator)
134+ } ;
135+
122136 // The first token is at the start of file, so is allows on a new line
123137 let token = Token :: new_on_new_line ( ) ;
124138 Self {
@@ -133,7 +147,7 @@ impl<'a, C: Config> Lexer<'a, C> {
133147 escaped_strings : FxHashMap :: default ( ) ,
134148 escaped_templates : FxHashMap :: default ( ) ,
135149 multi_line_comment_end_finder : None ,
136- tokens : ArenaVec :: new_in ( allocator ) ,
150+ tokens,
137151 config,
138152 }
139153 }
0 commit comments