Skip to content

Commit 77cdacc

Browse files
committed
perf(linter/plugins): use array buffer views for tokens (#20301)
Small perf optimization to deserializing tokens. Instead of calculating position in the buffer of each token with `baseOffset + tokenIndex * TOKEN_SIZE`, create typed arrays which cover just to region of the buffer containing tokens data. Then it can be indexed into with just `tokenIndex * TOKEN_SIZE`. More importantly, this simplifies the code, which will make feasible another much larger optimization - all tokens methods can search for tokens directly in the buffer, without having to deserialize all the tokens to JS objects eagerly, as they do now.
1 parent a11ecff commit 77cdacc

File tree

1 file changed

+26
-15
lines changed

1 file changed

+26
-15
lines changed

apps/oxlint/src-js/plugins/tokens.ts

Lines changed: 26 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,9 @@ class Token {
155155
// Make `loc` property enumerable so that `for (const key in token) ...` includes `loc` in the keys it iterates over
156156
Object.defineProperty(Token.prototype, "loc", { enumerable: true });
157157

158-
let uint32: Uint32Array | null = null;
158+
// Typed array views over the tokens region of the buffer
159+
let tokensUint8: Uint8Array | null = null;
160+
let tokensUint32: Uint32Array | null = null;
159161

160162
// `ESTreeKind` discriminants (set by Rust side)
161163
const PRIVATE_IDENTIFIER_KIND = 2;
@@ -178,7 +180,7 @@ const TOKEN_TYPES: TokenType["type"][] = [
178180
];
179181

180182
// Details of Rust `Token` type
181-
const TOKEN_SIZE = 16;
183+
const TOKEN_SIZE_SHIFT = 4; // 1 << 4 == 16 bytes, the size of `Token` in Rust
182184
const KIND_FIELD_OFFSET = 8;
183185
const IS_ESCAPED_FIELD_OFFSET = 10;
184186

@@ -193,21 +195,31 @@ export function initTokens() {
193195
debugAssertIsNonNull(sourceText);
194196

195197
debugAssertIsNonNull(buffer);
196-
uint32 = buffer.uint32;
197198

199+
const { uint32 } = buffer;
200+
const tokensPos = uint32[TOKENS_OFFSET_POS_32];
198201
const tokensLen = uint32[TOKENS_LEN_POS_32];
199202

203+
// Create typed array views over just the tokens region of the buffer.
204+
// These are zero-copy views over the same underlying `ArrayBuffer`.
205+
const arrayBuffer = buffer.buffer,
206+
absolutePos = buffer.byteOffset + tokensPos;
207+
tokensUint8 = new Uint8Array(arrayBuffer, absolutePos, tokensLen << TOKEN_SIZE_SHIFT);
208+
tokensUint32 = new Uint32Array(arrayBuffer, absolutePos, tokensLen << (TOKEN_SIZE_SHIFT - 2));
209+
200210
// Grow cache if needed (one-time cost as cache warms up)
201211
while (cachedTokens.length < tokensLen) {
202212
cachedTokens.push(new Token());
203213
}
204214

205215
// Deserialize into cached token objects
206-
const pos = uint32[TOKENS_OFFSET_POS_32];
207216
for (let i = 0; i < tokensLen; i++) {
208-
deserializeTokenInto(cachedTokens[i], pos + i * TOKEN_SIZE);
217+
deserializeTokenInto(cachedTokens[i], i);
209218
}
210219

220+
tokensUint8 = null;
221+
tokensUint32 = null;
222+
211223
// Use `slice` rather than copying tokens one-by-one into a new array.
212224
// V8 implements `slice` with a single `memcpy` of the backing store, which is faster
213225
// than N individual `push` calls with bounds checking and potential resizing.
@@ -222,31 +234,30 @@ export function initTokens() {
222234
tokens = (previousTokens = cachedTokens.slice(0, tokensLen)) as TokenType[];
223235
}
224236

225-
uint32 = null;
226-
227237
// Check `tokens` have valid ranges and are in ascending order
228238
debugCheckValidRanges(tokens, "token");
229239
}
230240

231241
/**
232-
* Deserialize a token from buffer at position `pos` into an existing token object.
242+
* Deserialize token `i` from buffer into an existing token object.
233243
* @param token - Token object to mutate
234-
* @param pos - Position in buffer containing Rust `Token` type
244+
* @param index - Token index
235245
*/
236-
function deserializeTokenInto(token: Token, pos: number): void {
237-
const pos32 = pos >> 2;
238-
const start = uint32![pos32],
239-
end = uint32![pos32 + 1];
246+
function deserializeTokenInto(token: Token, index: number): void {
247+
const pos32 = index << 2;
248+
const start = tokensUint32![pos32],
249+
end = tokensUint32![pos32 + 1];
240250

241-
const kind = buffer![pos + KIND_FIELD_OFFSET];
251+
const pos = pos32 << (TOKEN_SIZE_SHIFT - 2);
252+
const kind = tokensUint8![pos + KIND_FIELD_OFFSET];
242253

243254
// Get `value` as slice of source text `start..end`.
244255
// Slice `start + 1..end` for private identifiers, to strip leading `#`.
245256
let value = sourceText!.slice(start + +(kind === PRIVATE_IDENTIFIER_KIND), end);
246257

247258
if (kind <= PRIVATE_IDENTIFIER_KIND) {
248259
// Unescape if `escaped` flag is set
249-
if (buffer![pos + IS_ESCAPED_FIELD_OFFSET] === 1) {
260+
if (tokensUint8![pos + IS_ESCAPED_FIELD_OFFSET] === 1) {
250261
value = unescapeIdentifier(value);
251262
}
252263
} else if (kind === REGEXP_KIND) {

0 commit comments

Comments
 (0)