Skip to content

Commit 3da7560

Browse files
test: cross test our CSS tokenizer (#20601)
1 parent f588954 commit 3da7560

7 files changed

Lines changed: 11164 additions & 3260 deletions

File tree

lib/css/CssParser.js

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ const CC_RIGHT_CURLY = "}".charCodeAt(0);
5050
const CC_HYPHEN_MINUS = "-".charCodeAt(0);
5151
const CC_TILDE = "~".charCodeAt(0);
5252
const CC_EQUAL = "=".charCodeAt(0);
53+
const CC_FULL_STOP = ".".charCodeAt(0);
5354

5455
// https://www.w3.org/TR/css-syntax-3/#newline
5556
// We don't have `preprocessing` stage, so we need specify all of them
@@ -2280,7 +2281,11 @@ class CssParser extends Parser {
22802281
return end;
22812282
},
22822283
delim: (input, start, end) => {
2283-
if (isNextRulePrelude && isLocalMode()) {
2284+
if (
2285+
input.charCodeAt(start) === CC_FULL_STOP &&
2286+
isNextRulePrelude &&
2287+
isLocalMode()
2288+
) {
22842289
return processClassSelector(input, start, end);
22852290
}
22862291

lib/css/walkCssTokens.js

Lines changed: 110 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@
77

88
/**
99
* @typedef {object} CssTokenCallbacks
10-
* @property {((input: string, start: number, end: number, innerStart: number, innerEnd: number) => number)=} url
1110
* @property {((input: string, start: number, end: number) => number)=} comment
11+
* @property {((input: string, start: number, end: number) => number)=} whitespace
1212
* @property {((input: string, start: number, end: number) => number)=} string
1313
* @property {((input: string, start: number, end: number) => number)=} leftCurlyBracket
1414
* @property {((input: string, start: number, end: number) => number)=} rightCurlyBracket
@@ -17,13 +17,21 @@
1717
* @property {((input: string, start: number, end: number) => number)=} leftSquareBracket
1818
* @property {((input: string, start: number, end: number) => number)=} rightSquareBracket
1919
* @property {((input: string, start: number, end: number) => number)=} function
20+
* @property {((input: string, start: number, end: number, innerStart: number, innerEnd: number) => number)=} url
2021
* @property {((input: string, start: number, end: number) => number)=} colon
2122
* @property {((input: string, start: number, end: number) => number)=} atKeyword
2223
* @property {((input: string, start: number, end: number) => number)=} delim
2324
* @property {((input: string, start: number, end: number) => number)=} identifier
25+
* @property {((input: string, start: number, end: number) => number)=} percentage
26+
* @property {((input: string, start: number, end: number) => number)=} number
27+
* @property {((input: string, start: number, end: number) => number)=} dimension
2428
* @property {((input: string, start: number, end: number, isId: boolean) => number)=} hash
2529
* @property {((input: string, start: number, end: number) => number)=} semicolon
2630
* @property {((input: string, start: number, end: number) => number)=} comma
31+
* @property {((input: string, start: number, end: number) => number)=} cdo
32+
* @property {((input: string, start: number, end: number) => number)=} cdc
33+
* @property {((input: string, start: number, end: number) => number)=} badStringToken
34+
* @property {((input: string, start: number, end: number) => number)=} badUrlToken
2735
* @property {(() => boolean)=} needTerminate
2836
*/
2937

@@ -81,13 +89,19 @@ const CC_LESS_THAN_SIGN = "<".charCodeAt(0);
8189
const CC_GREATER_THAN_SIGN = ">".charCodeAt(0);
8290

8391
/** @type {CharHandler} */
84-
const consumeSpace = (input, pos, _callbacks) => {
92+
const consumeSpace = (input, pos, callbacks) => {
93+
const start = pos - 1;
94+
8595
// Consume as much whitespace as possible.
8696
while (_isWhiteSpace(input.charCodeAt(pos))) {
8797
pos++;
8898
}
8999

90100
// Return a <whitespace-token>.
101+
if (callbacks.whitespace !== undefined) {
102+
return callbacks.whitespace(input, start, pos);
103+
}
104+
91105
return pos;
92106
};
93107

@@ -143,9 +157,14 @@ const isIdentStartCodePoint = (cc) =>
143157
cc >= 0x80;
144158

145159
/** @type {CharHandler} */
146-
const consumeDelimToken = (input, pos, _callbacks) =>
160+
const consumeDelimToken = (input, pos, callbacks) => {
147161
// Return a <delim-token> with its value set to the current input code point.
148-
pos;
162+
if (callbacks.delim) {
163+
pos = callbacks.delim(input, pos - 1, pos);
164+
}
165+
166+
return pos;
167+
};
149168

150169
/** @type {CharHandler} */
151170
const consumeComments = (input, pos, callbacks) => {
@@ -286,6 +305,11 @@ const consumeAStringToken = (input, pos, callbacks) => {
286305
// Reconsume the current input code point, create a <bad-string-token>, and return it.
287306
else if (_isNewline(cc)) {
288307
pos--;
308+
309+
if (callbacks.badStringToken !== undefined) {
310+
return callbacks.badStringToken(input, start, pos);
311+
}
312+
289313
// bad string
290314
return pos;
291315
}
@@ -523,6 +547,10 @@ const consumeNumberSign = (input, pos, callbacks) => {
523547
return pos;
524548
}
525549

550+
if (callbacks.delim !== undefined) {
551+
return callbacks.delim(input, start, pos);
552+
}
553+
526554
// Otherwise, return a <delim-token> with its value set to the current input code point.
527555
return pos;
528556
};
@@ -539,6 +567,10 @@ const consumeHyphenMinus = (input, pos, callbacks) => {
539567
input.charCodeAt(pos) === CC_HYPHEN_MINUS &&
540568
input.charCodeAt(pos + 1) === CC_GREATER_THAN_SIGN
541569
) {
570+
if (callbacks.cdc !== undefined) {
571+
return callbacks.cdc(input, pos - 1, pos + 2);
572+
}
573+
542574
return pos + 2;
543575
}
544576
// Otherwise, if the input stream starts with an ident sequence, reconsume the current input code point, consume an ident-like token, and return it.
@@ -547,6 +579,10 @@ const consumeHyphenMinus = (input, pos, callbacks) => {
547579
return consumeAnIdentLikeToken(input, pos, callbacks);
548580
}
549581

582+
if (callbacks.delim !== undefined) {
583+
return callbacks.delim(input, pos - 1, pos);
584+
}
585+
550586
// Otherwise, return a <delim-token> with its value set to the current input code point.
551587
return pos;
552588
};
@@ -571,13 +607,19 @@ const consumeFullStop = (input, pos, callbacks) => {
571607

572608
/** @type {CharHandler} */
573609
const consumePlusSign = (input, pos, callbacks) => {
610+
const start = pos - 1;
611+
574612
// If the input stream starts with a number, reconsume the current input code point, consume a numeric token, and return it.
575613
if (_ifThreeCodePointsWouldStartANumber(input, pos)) {
576614
pos--;
577615
return consumeANumericToken(input, pos, callbacks);
578616
}
579617

580618
// Otherwise, return a <delim-token> with its value set to the current input code point.
619+
if (callbacks.delim !== undefined) {
620+
return callbacks.delim(input, start, pos);
621+
}
622+
581623
return pos;
582624
};
583625

@@ -657,6 +699,8 @@ const consumeANumericToken = (input, pos, callbacks) => {
657699
// This section describes how to consume a numeric token from a stream of code points.
658700
// It returns either a <number-token>, <percentage-token>, or <dimension-token>.
659701

702+
const start = pos;
703+
660704
// Consume a number and let number be the result.
661705
pos = _consumeANumber(input, pos, callbacks);
662706

@@ -679,15 +723,29 @@ const consumeANumericToken = (input, pos, callbacks) => {
679723
third
680724
)
681725
) {
682-
return _consumeAnIdentSequence(input, pos, callbacks);
726+
pos = _consumeAnIdentSequence(input, pos, callbacks);
727+
728+
if (callbacks.dimension !== undefined) {
729+
return callbacks.dimension(input, start, pos);
730+
}
731+
732+
return pos;
683733
}
684734
// Otherwise, if the next input code point is U+0025 PERCENTAGE SIGN (%), consume it.
685735
// Create a <percentage-token> with the same value as number, and return it.
686736
else if (first === CC_PERCENTAGE) {
737+
if (callbacks.percentage !== undefined) {
738+
return callbacks.percentage(input, start, pos + 1);
739+
}
740+
687741
return pos + 1;
688742
}
689743

690744
// Otherwise, create a <number-token> with the same value and type flag as number, and return it.
745+
if (callbacks.number !== undefined) {
746+
return callbacks.number(input, start, pos);
747+
}
748+
691749
return pos;
692750
};
693751

@@ -697,6 +755,7 @@ const consumeColon = (input, pos, callbacks) => {
697755
if (callbacks.colon !== undefined) {
698756
return callbacks.colon(input, pos - 1, pos);
699757
}
758+
700759
return pos;
701760
};
702761

@@ -706,6 +765,7 @@ const consumeLeftParenthesis = (input, pos, callbacks) => {
706765
if (callbacks.leftParenthesis !== undefined) {
707766
return callbacks.leftParenthesis(input, pos - 1, pos);
708767
}
768+
709769
return pos;
710770
};
711771

@@ -715,6 +775,7 @@ const consumeRightParenthesis = (input, pos, callbacks) => {
715775
if (callbacks.rightParenthesis !== undefined) {
716776
return callbacks.rightParenthesis(input, pos - 1, pos);
717777
}
778+
718779
return pos;
719780
};
720781

@@ -724,6 +785,7 @@ const consumeLeftSquareBracket = (input, pos, callbacks) => {
724785
if (callbacks.leftSquareBracket !== undefined) {
725786
return callbacks.leftSquareBracket(input, pos - 1, pos);
726787
}
788+
727789
return pos;
728790
};
729791

@@ -733,6 +795,7 @@ const consumeRightSquareBracket = (input, pos, callbacks) => {
733795
if (callbacks.rightSquareBracket !== undefined) {
734796
return callbacks.rightSquareBracket(input, pos - 1, pos);
735797
}
798+
736799
return pos;
737800
};
738801

@@ -742,6 +805,7 @@ const consumeLeftCurlyBracket = (input, pos, callbacks) => {
742805
if (callbacks.leftCurlyBracket !== undefined) {
743806
return callbacks.leftCurlyBracket(input, pos - 1, pos);
744807
}
808+
745809
return pos;
746810
};
747811

@@ -751,6 +815,7 @@ const consumeRightCurlyBracket = (input, pos, callbacks) => {
751815
if (callbacks.rightCurlyBracket !== undefined) {
752816
return callbacks.rightCurlyBracket(input, pos - 1, pos);
753817
}
818+
754819
return pos;
755820
};
756821

@@ -760,6 +825,7 @@ const consumeSemicolon = (input, pos, callbacks) => {
760825
if (callbacks.semicolon !== undefined) {
761826
return callbacks.semicolon(input, pos - 1, pos);
762827
}
828+
763829
return pos;
764830
};
765831

@@ -769,6 +835,7 @@ const consumeComma = (input, pos, callbacks) => {
769835
if (callbacks.comma !== undefined) {
770836
return callbacks.comma(input, pos - 1, pos);
771837
}
838+
772839
return pos;
773840
};
774841

@@ -935,7 +1002,13 @@ const consumeAUrlToken = (input, pos, fnStart, callbacks) => {
9351002
}
9361003

9371004
// Don't handle bad urls
938-
return consumeTheRemnantsOfABadUrl(input, pos);
1005+
pos = consumeTheRemnantsOfABadUrl(input, pos);
1006+
1007+
if (callbacks.badUrlToken !== undefined) {
1008+
return callbacks.badUrlToken(input, fnStart, pos);
1009+
}
1010+
1011+
return pos;
9391012
}
9401013
// U+0022 QUOTATION MARK (")
9411014
// U+0027 APOSTROPHE (')
@@ -949,7 +1022,13 @@ const consumeAUrlToken = (input, pos, fnStart, callbacks) => {
9491022
_isNonPrintableCodePoint(cc)
9501023
) {
9511024
// Don't handle bad urls
952-
return consumeTheRemnantsOfABadUrl(input, pos);
1025+
pos = consumeTheRemnantsOfABadUrl(input, pos);
1026+
1027+
if (callbacks.badUrlToken !== undefined) {
1028+
return callbacks.badUrlToken(input, fnStart, pos);
1029+
}
1030+
1031+
return pos;
9531032
}
9541033
// // U+005C REVERSE SOLIDUS (\)
9551034
// // If the stream starts with a valid escape, consume an escaped code point and append the returned code point to the <url-token>’s value.
@@ -959,7 +1038,13 @@ const consumeAUrlToken = (input, pos, fnStart, callbacks) => {
9591038
pos = _consumeAnEscapedCodePoint(input, pos);
9601039
} else {
9611040
// Don't handle bad urls
962-
return consumeTheRemnantsOfABadUrl(input, pos);
1041+
pos = consumeTheRemnantsOfABadUrl(input, pos);
1042+
1043+
if (callbacks.badUrlToken !== undefined) {
1044+
return callbacks.badUrlToken(input, fnStart, pos);
1045+
}
1046+
1047+
return pos;
9631048
}
9641049
}
9651050
// anything else
@@ -1033,12 +1118,20 @@ const consumeAnIdentLikeToken = (input, pos, callbacks) => {
10331118
};
10341119

10351120
/** @type {CharHandler} */
1036-
const consumeLessThan = (input, pos, _callbacks) => {
1121+
const consumeLessThan = (input, pos, callbacks) => {
10371122
// If the next 3 input code points are U+0021 EXCLAMATION MARK U+002D HYPHEN-MINUS U+002D HYPHEN-MINUS (!--), consume them and return a <CDO-token>.
10381123
if (input.slice(pos, pos + 3) === "!--") {
1124+
if (callbacks.cdo !== undefined) {
1125+
return callbacks.cdo(input, pos - 1, pos + 3);
1126+
}
1127+
10391128
return pos + 3;
10401129
}
10411130

1131+
if (callbacks.delim !== undefined) {
1132+
return callbacks.delim(input, pos - 1, pos);
1133+
}
1134+
10421135
// Otherwise, return a <delim-token> with its value set to the current input code point.
10431136
return pos;
10441137
};
@@ -1067,6 +1160,10 @@ const consumeCommercialAt = (input, pos, callbacks) => {
10671160
}
10681161

10691162
// Otherwise, return a <delim-token> with its value set to the current input code point.
1163+
if (callbacks.delim !== undefined) {
1164+
return callbacks.delim(input, start, pos);
1165+
}
1166+
10701167
return pos;
10711168
};
10721169

@@ -1079,6 +1176,10 @@ const consumeReverseSolidus = (input, pos, callbacks) => {
10791176
}
10801177

10811178
// Otherwise, this is a parse error. Return a <delim-token> with its value set to the current input code point.
1179+
if (callbacks.delim !== undefined) {
1180+
return callbacks.delim(input, pos - 1, pos);
1181+
}
1182+
10821183
return pos;
10831184
};
10841185

0 commit comments

Comments
 (0)