fix(parser): correct capacity for tokens Vec

overlookmotel · overlookmotel · commit bc6f33da50f6 · 2026-03-03T13:44:08.000Z
diff --git a/crates/oxc_parser/src/lexer/mod.rs b/crates/oxc_parser/src/lexer/mod.rs
@@ -131,15 +131,18 @@ impl<'a, C: Config> Lexer<'a, C> {
         let source = Source::new(source_text, unique);
 
         // If collecting tokens, allocate enough space so that the `Vec<Token>` will not have to grow during parsing.
-        // `source_text.len()` is almost always a large overestimate of number of tokens, but it's impossible to have
-        // more than N tokens in a file which is N bytes long, so it'll never be an underestimate.
+        // `source_text.len() + 1` is almost always a large overestimate of number of tokens, but it's impossible to
+        // have more than N + 1 tokens in a file which is N bytes long, so it'll never be an underestimate.
+        //
+        // + 1 is to account for the final `Eof` token. Without adding 1, the capacity could be too small for
+        // minified files which have no space between any tokens. It would also be too small for empty files.
         //
         // Our largest benchmark file `binder.ts` is 190 KB, and `Token` is 16 bytes, so the `Vec<Token>`
         // would be ~3 MB even in the case of this unusually large file. That's not a huge amount of memory.
         //
         // However, we should choose a better heuristic based on real-world observation, and bring this usage down.
         let tokens = if config.tokens() {
-            ArenaVec::with_capacity_in(source_text.len(), allocator)
+            ArenaVec::with_capacity_in(source_text.len() + 1, allocator)
         } else {
             ArenaVec::new_in(allocator)
         };