Skip to content

Commit c25ef02

Browse files
committed
perf(napi/parser, linter/plugins): simplify branch condition in deserializeStr (#21019)
Follow-on after #20834. Simplify the branch condition in `deserializeStr` for detemining if can take the fast path of just slicing `sourceText`. There's no need to check `sourceIsAscii`, just compare the offset to `firstNonAsciiPos` (the position in buffer of first non-ASCII byte in source code). When source is 100% ASCII, `firstNonAsciiPos = sourceEndPos`, so `pos < firstNonAsciiPos` passes for all positions in source. The implementation is different for parser and for Oxlint, as the source text sits in a different location in buffer - at the start in parser, at the end in Oxlint - but the principle is the same in both. [Benchmarking](https://github.com/overlookmotel/oxc-raw-str-bench) showed this speeds up `deserializeStr` by a small percentage.
1 parent 9f494c3 commit c25ef02

10 files changed

Lines changed: 125 additions & 138 deletions

File tree

apps/oxlint/src-js/generated/deserialize.js

Lines changed: 24 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ let uint8,
88
uint32,
99
float64,
1010
sourceText,
11-
sourceIsAscii,
1211
sourceStartPos,
1312
firstNonAsciiPos,
1413
parent = null,
@@ -42,14 +41,12 @@ function deserializeWith(buffer, sourceTextInput, sourceByteLen, getLocInput, de
4241
uint32 = buffer.uint32;
4342
float64 = buffer.float64;
4443
sourceText = sourceTextInput;
45-
sourceIsAscii = sourceText.length === sourceByteLen;
46-
if (!sourceIsAscii) {
47-
firstNonAsciiPos = sourceByteLen;
48-
for (let i = sourceStartPos, e = sourceStartPos + sourceByteLen; i < e; i++)
49-
if (uint8[i] >= 128) {
50-
firstNonAsciiPos = i - sourceStartPos;
51-
break;
52-
}
44+
if (sourceText.length === sourceByteLen) firstNonAsciiPos = sourceStartPos + sourceByteLen;
45+
else {
46+
let i = sourceStartPos,
47+
sourceEndPos = sourceStartPos + sourceByteLen;
48+
for (; i < sourceEndPos && uint8[i] < 128; i++);
49+
firstNonAsciiPos = i;
5350
}
5451
getLoc = getLocInput;
5552
return deserialize(uint32[536870900]);
@@ -5883,11 +5880,27 @@ function deserializeStr(pos) {
58835880
len = uint32[pos32 + 2];
58845881
if (len === 0) return "";
58855882
pos = uint32[pos32];
5886-
if (pos >= sourceStartPos && (sourceIsAscii || pos - sourceStartPos + len <= firstNonAsciiPos))
5883+
let end = pos + len;
5884+
// Note: Tried reducing this check to a single branch by making the comparison the equivalent of this Rust:
5885+
// `end.wrapping_sub(sourceStartPos) <= firstNonAsciiOffset`.
5886+
//
5887+
// The JS versions tried were:
5888+
// - `((end - sourceStartPos) >>> 0) <= firstNonAsciiOffset`
5889+
// - `((end - sourceStartPos) & 0x7FFF_FFFF) <= firstNonAsciiOffset`
5890+
// But it turned out that these are both slower by 5-10% on files which are all ASCII.
5891+
//
5892+
// `>>>` is slower as V8 can't assume result fits in an SMI (which is a 32-bit *signed* integer),
5893+
// as result could be greater or equal to `2 ** 31`. So it converts both the comparison's operands to `float64`s
5894+
// and does float compare (which is slower than integer compare).
5895+
//
5896+
// `& 0x7FFF_FFFF` is slower as it has a longer chain of data dependencies than the 2 independent
5897+
// branch comparisons.
5898+
//
5899+
// Both branches are very predictable, so 2 branches wins.
5900+
if (pos >= sourceStartPos && end <= firstNonAsciiPos)
58875901
return sourceText.substr(pos - sourceStartPos, len);
58885902
// Use `TextDecoder` for strings longer than 9 bytes.
58895903
// For shorter strings, the byte-by-byte loop below avoids native call overhead.
5890-
let end = pos + len;
58915904
if (len > 9) return decodeStr(uint8.subarray(pos, end));
58925905
// Shorter strings decode by hand to avoid native call
58935906
let out = "",

napi/parser/src-js/generated/deserialize/js.js

Lines changed: 8 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,13 @@
11
// Auto-generated code, DO NOT EDIT DIRECTLY!
22
// To edit this generated file you have to edit `tasks/ast_tools/src/generators/raw_transfer.rs`.
33

4-
let uint8, uint32, float64, sourceText, sourceIsAscii, sourceEndPos, firstNonAsciiPos;
4+
let uint8, uint32, float64, sourceText, firstNonAsciiPos;
55

66
const textDecoder = new TextDecoder("utf-8", { ignoreBOM: true }),
77
decodeStr = textDecoder.decode.bind(textDecoder),
88
{ fromCharCode } = String;
99

1010
export function deserialize(buffer, sourceText, sourceByteLen) {
11-
sourceEndPos = sourceByteLen;
1211
let data = deserializeWith(buffer, sourceText, sourceByteLen, null, deserializeRawTransferData);
1312
resetBuffer();
1413
return data;
@@ -19,14 +18,11 @@ function deserializeWith(buffer, sourceTextInput, sourceByteLen, getLocInput, de
1918
uint32 = buffer.uint32;
2019
float64 = buffer.float64;
2120
sourceText = sourceTextInput;
22-
sourceIsAscii = sourceText.length === sourceByteLen;
23-
if (!sourceIsAscii) {
24-
firstNonAsciiPos = sourceByteLen;
25-
for (let i = 0; i < sourceByteLen; i++)
26-
if (uint8[i] >= 128) {
27-
firstNonAsciiPos = i;
28-
break;
29-
}
21+
if (sourceText.length === sourceByteLen) firstNonAsciiPos = sourceByteLen;
22+
else {
23+
let i = 0;
24+
for (; i < sourceByteLen && uint8[i] < 128; i++);
25+
firstNonAsciiPos = i;
3026
}
3127
return deserialize(uint32[536870900]);
3228
}
@@ -4546,11 +4542,10 @@ function deserializeStr(pos) {
45464542
len = uint32[pos32 + 2];
45474543
if (len === 0) return "";
45484544
pos = uint32[pos32];
4549-
if (pos < sourceEndPos && (sourceIsAscii || pos + len <= firstNonAsciiPos))
4550-
return sourceText.substr(pos, len);
4545+
let end = pos + len;
4546+
if (end <= firstNonAsciiPos) return sourceText.substr(pos, len);
45514547
// Use `TextDecoder` for strings longer than 9 bytes.
45524548
// For shorter strings, the byte-by-byte loop below avoids native call overhead.
4553-
let end = pos + len;
45544549
if (len > 9) return decodeStr(uint8.subarray(pos, end));
45554550
// Shorter strings decode by hand to avoid native call
45564551
let out = "",

napi/parser/src-js/generated/deserialize/js_parent.js

Lines changed: 7 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,6 @@ let uint8,
55
uint32,
66
float64,
77
sourceText,
8-
sourceIsAscii,
9-
sourceEndPos,
108
firstNonAsciiPos,
119
parent = null;
1210

@@ -15,7 +13,6 @@ const textDecoder = new TextDecoder("utf-8", { ignoreBOM: true }),
1513
{ fromCharCode } = String;
1614

1715
export function deserialize(buffer, sourceText, sourceByteLen) {
18-
sourceEndPos = sourceByteLen;
1916
let data = deserializeWith(buffer, sourceText, sourceByteLen, null, deserializeRawTransferData);
2017
resetBuffer();
2118
return data;
@@ -26,14 +23,11 @@ function deserializeWith(buffer, sourceTextInput, sourceByteLen, getLocInput, de
2623
uint32 = buffer.uint32;
2724
float64 = buffer.float64;
2825
sourceText = sourceTextInput;
29-
sourceIsAscii = sourceText.length === sourceByteLen;
30-
if (!sourceIsAscii) {
31-
firstNonAsciiPos = sourceByteLen;
32-
for (let i = 0; i < sourceByteLen; i++)
33-
if (uint8[i] >= 128) {
34-
firstNonAsciiPos = i;
35-
break;
36-
}
26+
if (sourceText.length === sourceByteLen) firstNonAsciiPos = sourceByteLen;
27+
else {
28+
let i = 0;
29+
for (; i < sourceByteLen && uint8[i] < 128; i++);
30+
firstNonAsciiPos = i;
3731
}
3832
return deserialize(uint32[536870900]);
3933
}
@@ -5083,11 +5077,10 @@ function deserializeStr(pos) {
50835077
len = uint32[pos32 + 2];
50845078
if (len === 0) return "";
50855079
pos = uint32[pos32];
5086-
if (pos < sourceEndPos && (sourceIsAscii || pos + len <= firstNonAsciiPos))
5087-
return sourceText.substr(pos, len);
5080+
let end = pos + len;
5081+
if (end <= firstNonAsciiPos) return sourceText.substr(pos, len);
50885082
// Use `TextDecoder` for strings longer than 9 bytes.
50895083
// For shorter strings, the byte-by-byte loop below avoids native call overhead.
5090-
let end = pos + len;
50915084
if (len > 9) return decodeStr(uint8.subarray(pos, end));
50925085
// Shorter strings decode by hand to avoid native call
50935086
let out = "",

napi/parser/src-js/generated/deserialize/js_range.js

Lines changed: 8 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,13 @@
11
// Auto-generated code, DO NOT EDIT DIRECTLY!
22
// To edit this generated file you have to edit `tasks/ast_tools/src/generators/raw_transfer.rs`.
33

4-
let uint8, uint32, float64, sourceText, sourceIsAscii, sourceEndPos, firstNonAsciiPos;
4+
let uint8, uint32, float64, sourceText, firstNonAsciiPos;
55

66
const textDecoder = new TextDecoder("utf-8", { ignoreBOM: true }),
77
decodeStr = textDecoder.decode.bind(textDecoder),
88
{ fromCharCode } = String;
99

1010
export function deserialize(buffer, sourceText, sourceByteLen) {
11-
sourceEndPos = sourceByteLen;
1211
let data = deserializeWith(buffer, sourceText, sourceByteLen, null, deserializeRawTransferData);
1312
resetBuffer();
1413
return data;
@@ -19,14 +18,11 @@ function deserializeWith(buffer, sourceTextInput, sourceByteLen, getLocInput, de
1918
uint32 = buffer.uint32;
2019
float64 = buffer.float64;
2120
sourceText = sourceTextInput;
22-
sourceIsAscii = sourceText.length === sourceByteLen;
23-
if (!sourceIsAscii) {
24-
firstNonAsciiPos = sourceByteLen;
25-
for (let i = 0; i < sourceByteLen; i++)
26-
if (uint8[i] >= 128) {
27-
firstNonAsciiPos = i;
28-
break;
29-
}
21+
if (sourceText.length === sourceByteLen) firstNonAsciiPos = sourceByteLen;
22+
else {
23+
let i = 0;
24+
for (; i < sourceByteLen && uint8[i] < 128; i++);
25+
firstNonAsciiPos = i;
3026
}
3127
return deserialize(uint32[536870900]);
3228
}
@@ -5088,11 +5084,10 @@ function deserializeStr(pos) {
50885084
len = uint32[pos32 + 2];
50895085
if (len === 0) return "";
50905086
pos = uint32[pos32];
5091-
if (pos < sourceEndPos && (sourceIsAscii || pos + len <= firstNonAsciiPos))
5092-
return sourceText.substr(pos, len);
5087+
let end = pos + len;
5088+
if (end <= firstNonAsciiPos) return sourceText.substr(pos, len);
50935089
// Use `TextDecoder` for strings longer than 9 bytes.
50945090
// For shorter strings, the byte-by-byte loop below avoids native call overhead.
5095-
let end = pos + len;
50965091
if (len > 9) return decodeStr(uint8.subarray(pos, end));
50975092
// Shorter strings decode by hand to avoid native call
50985093
let out = "",

napi/parser/src-js/generated/deserialize/js_range_parent.js

Lines changed: 7 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,6 @@ let uint8,
55
uint32,
66
float64,
77
sourceText,
8-
sourceIsAscii,
9-
sourceEndPos,
108
firstNonAsciiPos,
119
parent = null;
1210

@@ -15,7 +13,6 @@ const textDecoder = new TextDecoder("utf-8", { ignoreBOM: true }),
1513
{ fromCharCode } = String;
1614

1715
export function deserialize(buffer, sourceText, sourceByteLen) {
18-
sourceEndPos = sourceByteLen;
1916
let data = deserializeWith(buffer, sourceText, sourceByteLen, null, deserializeRawTransferData);
2017
resetBuffer();
2118
return data;
@@ -26,14 +23,11 @@ function deserializeWith(buffer, sourceTextInput, sourceByteLen, getLocInput, de
2623
uint32 = buffer.uint32;
2724
float64 = buffer.float64;
2825
sourceText = sourceTextInput;
29-
sourceIsAscii = sourceText.length === sourceByteLen;
30-
if (!sourceIsAscii) {
31-
firstNonAsciiPos = sourceByteLen;
32-
for (let i = 0; i < sourceByteLen; i++)
33-
if (uint8[i] >= 128) {
34-
firstNonAsciiPos = i;
35-
break;
36-
}
26+
if (sourceText.length === sourceByteLen) firstNonAsciiPos = sourceByteLen;
27+
else {
28+
let i = 0;
29+
for (; i < sourceByteLen && uint8[i] < 128; i++);
30+
firstNonAsciiPos = i;
3731
}
3832
return deserialize(uint32[536870900]);
3933
}
@@ -5628,11 +5622,10 @@ function deserializeStr(pos) {
56285622
len = uint32[pos32 + 2];
56295623
if (len === 0) return "";
56305624
pos = uint32[pos32];
5631-
if (pos < sourceEndPos && (sourceIsAscii || pos + len <= firstNonAsciiPos))
5632-
return sourceText.substr(pos, len);
5625+
let end = pos + len;
5626+
if (end <= firstNonAsciiPos) return sourceText.substr(pos, len);
56335627
// Use `TextDecoder` for strings longer than 9 bytes.
56345628
// For shorter strings, the byte-by-byte loop below avoids native call overhead.
5635-
let end = pos + len;
56365629
if (len > 9) return decodeStr(uint8.subarray(pos, end));
56375630
// Shorter strings decode by hand to avoid native call
56385631
let out = "",

napi/parser/src-js/generated/deserialize/ts.js

Lines changed: 8 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,13 @@
11
// Auto-generated code, DO NOT EDIT DIRECTLY!
22
// To edit this generated file you have to edit `tasks/ast_tools/src/generators/raw_transfer.rs`.
33

4-
let uint8, uint32, float64, sourceText, sourceIsAscii, sourceEndPos, firstNonAsciiPos;
4+
let uint8, uint32, float64, sourceText, firstNonAsciiPos;
55

66
const textDecoder = new TextDecoder("utf-8", { ignoreBOM: true }),
77
decodeStr = textDecoder.decode.bind(textDecoder),
88
{ fromCharCode } = String;
99

1010
export function deserialize(buffer, sourceText, sourceByteLen) {
11-
sourceEndPos = sourceByteLen;
1211
let data = deserializeWith(buffer, sourceText, sourceByteLen, null, deserializeRawTransferData);
1312
resetBuffer();
1413
return data;
@@ -19,14 +18,11 @@ function deserializeWith(buffer, sourceTextInput, sourceByteLen, getLocInput, de
1918
uint32 = buffer.uint32;
2019
float64 = buffer.float64;
2120
sourceText = sourceTextInput;
22-
sourceIsAscii = sourceText.length === sourceByteLen;
23-
if (!sourceIsAscii) {
24-
firstNonAsciiPos = sourceByteLen;
25-
for (let i = 0; i < sourceByteLen; i++)
26-
if (uint8[i] >= 128) {
27-
firstNonAsciiPos = i;
28-
break;
29-
}
21+
if (sourceText.length === sourceByteLen) firstNonAsciiPos = sourceByteLen;
22+
else {
23+
let i = 0;
24+
for (; i < sourceByteLen && uint8[i] < 128; i++);
25+
firstNonAsciiPos = i;
3026
}
3127
return deserialize(uint32[536870900]);
3228
}
@@ -4855,11 +4851,10 @@ function deserializeStr(pos) {
48554851
len = uint32[pos32 + 2];
48564852
if (len === 0) return "";
48574853
pos = uint32[pos32];
4858-
if (pos < sourceEndPos && (sourceIsAscii || pos + len <= firstNonAsciiPos))
4859-
return sourceText.substr(pos, len);
4854+
let end = pos + len;
4855+
if (end <= firstNonAsciiPos) return sourceText.substr(pos, len);
48604856
// Use `TextDecoder` for strings longer than 9 bytes.
48614857
// For shorter strings, the byte-by-byte loop below avoids native call overhead.
4862-
let end = pos + len;
48634858
if (len > 9) return decodeStr(uint8.subarray(pos, end));
48644859
// Shorter strings decode by hand to avoid native call
48654860
let out = "",

napi/parser/src-js/generated/deserialize/ts_parent.js

Lines changed: 7 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,6 @@ let uint8,
55
uint32,
66
float64,
77
sourceText,
8-
sourceIsAscii,
9-
sourceEndPos,
108
firstNonAsciiPos,
119
parent = null;
1210

@@ -15,7 +13,6 @@ const textDecoder = new TextDecoder("utf-8", { ignoreBOM: true }),
1513
{ fromCharCode } = String;
1614

1715
export function deserialize(buffer, sourceText, sourceByteLen) {
18-
sourceEndPos = sourceByteLen;
1916
let data = deserializeWith(buffer, sourceText, sourceByteLen, null, deserializeRawTransferData);
2017
resetBuffer();
2118
return data;
@@ -26,14 +23,11 @@ function deserializeWith(buffer, sourceTextInput, sourceByteLen, getLocInput, de
2623
uint32 = buffer.uint32;
2724
float64 = buffer.float64;
2825
sourceText = sourceTextInput;
29-
sourceIsAscii = sourceText.length === sourceByteLen;
30-
if (!sourceIsAscii) {
31-
firstNonAsciiPos = sourceByteLen;
32-
for (let i = 0; i < sourceByteLen; i++)
33-
if (uint8[i] >= 128) {
34-
firstNonAsciiPos = i;
35-
break;
36-
}
26+
if (sourceText.length === sourceByteLen) firstNonAsciiPos = sourceByteLen;
27+
else {
28+
let i = 0;
29+
for (; i < sourceByteLen && uint8[i] < 128; i++);
30+
firstNonAsciiPos = i;
3731
}
3832
return deserialize(uint32[536870900]);
3933
}
@@ -5419,11 +5413,10 @@ function deserializeStr(pos) {
54195413
len = uint32[pos32 + 2];
54205414
if (len === 0) return "";
54215415
pos = uint32[pos32];
5422-
if (pos < sourceEndPos && (sourceIsAscii || pos + len <= firstNonAsciiPos))
5423-
return sourceText.substr(pos, len);
5416+
let end = pos + len;
5417+
if (end <= firstNonAsciiPos) return sourceText.substr(pos, len);
54245418
// Use `TextDecoder` for strings longer than 9 bytes.
54255419
// For shorter strings, the byte-by-byte loop below avoids native call overhead.
5426-
let end = pos + len;
54275420
if (len > 9) return decodeStr(uint8.subarray(pos, end));
54285421
// Shorter strings decode by hand to avoid native call
54295422
let out = "",

0 commit comments

Comments
 (0)