Skip to content

Commit ac0dbc0

Browse files
committed
fix: align identifier and string parsing and rendering with CSS standards, closes #36, closes #37
1 parent 33413e8 commit ac0dbc0

File tree

5 files changed

+117
-45
lines changed

5 files changed

+117
-45
lines changed

src/parser.ts

+37-10
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ import {
2929
getXmlOptions,
3030
SyntaxDefinition
3131
} from './syntax-definitions.js';
32-
import {digitsChars, isHex, isIdent, isIdentStart, quoteChars, stringEscapeChars, whitespaceChars} from './utils.js';
32+
import {digitsChars, isHex, isIdent, isIdentStart, maxHexLength, quoteChars, whitespaceChars} from './utils.js';
3333

3434
/**
3535
* This error is thrown when parser encounters problems in CSS string.
@@ -225,17 +225,23 @@ export function createParser(
225225
}
226226
}
227227

228+
/**
229+
* @see https://www.w3.org/TR/css-syntax/#hex-digit-diagram
230+
*/
228231
function parseHex() {
229232
let hex = readAndNext();
230-
while (isHex(chr)) {
233+
let count = 1;
234+
while (isHex(chr) && count < maxHexLength) {
231235
hex += readAndNext();
236+
count++;
232237
}
233-
if (is(' ')) {
234-
next();
235-
}
238+
skipSingleWhitespace();
236239
return String.fromCharCode(parseInt(hex, 16));
237240
}
238241

242+
/**
243+
* @see https://www.w3.org/TR/css-syntax/#string-token-diagram
244+
*/
239245
function parseString(quote: string): string {
240246
let result = '';
241247
pass(quote);
@@ -245,25 +251,33 @@ export function createParser(
245251
return result;
246252
} else if (is('\\')) {
247253
next();
248-
let esc;
249254
if (is(quote)) {
250255
result += quote;
251-
} else if ((esc = stringEscapeChars[chr]) !== undefined) {
252-
result += esc;
256+
next();
257+
} else if (chr === '\n' || chr === '\f') {
258+
next();
259+
} else if (chr === '\r') {
260+
next();
261+
if (is('\n')) {
262+
next();
263+
}
253264
} else if (isHex(chr)) {
254265
result += parseHex();
255-
continue;
256266
} else {
257267
result += chr;
268+
next();
258269
}
259270
} else {
260271
result += chr;
272+
next();
261273
}
262-
next();
263274
}
264275
return result;
265276
}
266277

278+
/**
279+
* @see https://www.w3.org/TR/css-syntax/#ident-token-diagram
280+
*/
267281
function parseIdentifier(): string | null {
268282
if (!isIdentStart(chr)) {
269283
return null;
@@ -321,6 +335,19 @@ export function createParser(
321335
return result.trim();
322336
}
323337

338+
function skipSingleWhitespace() {
339+
if (chr === ' ' || chr === '\t' || chr === '\f' || chr === '\n') {
340+
next();
341+
return;
342+
}
343+
if (chr === '\r') {
344+
next();
345+
}
346+
if (chr === '\n') {
347+
next();
348+
}
349+
}
350+
324351
function skipWhitespace() {
325352
while (whitespaceChars[chr]) {
326353
next();

src/render.ts

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import {AstEntity, AstNamespaceName, AstNoNamespace, AstSubstitution, AstWildcardNamespace} from './ast.js';
2-
import {escapeIdentifier, escapeStr} from './utils.js';
2+
import {escapeIdentifier, escapeString} from './utils.js';
33

44
const errorPrefix = `css-selector-parser render error: `;
55

@@ -96,7 +96,7 @@ export function render(entity: AstEntity): string {
9696
if (operator && value) {
9797
result += operator;
9898
if (value.type === 'String') {
99-
result += escapeStr(value.value);
99+
result += escapeString(value.value);
100100
} else if (value.type === 'Substitution') {
101101
result += renderSubstitution(value);
102102
} else {

src/utils.ts

+20-29
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ export function isHex(c: string) {
1717
return (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') || (c >= '0' && c <= '9');
1818
}
1919

20-
export const identSpecialChars: Record<string, boolean> = {
20+
export const identEscapeChars: Record<string, boolean> = {
2121
'!': true,
2222
'"': true,
2323
'#': true,
@@ -49,20 +49,12 @@ export const identSpecialChars: Record<string, boolean> = {
4949
'~': true
5050
};
5151

52-
export const strReplacementsRev: Record<string, string> = {
53-
'\n': '\\n',
54-
'\r': '\\r',
55-
'\t': '\\t',
56-
'\f': '\\f',
57-
'\v': '\\v'
58-
};
59-
60-
export const stringEscapeChars: Record<string, string> = {
61-
n: '\n',
62-
r: '\r',
63-
t: '\t',
64-
f: '\f',
65-
'\\': '\\'
52+
export const stringRenderEscapeChars: Record<string, boolean> = {
53+
'\n': true,
54+
'\r': true,
55+
'\t': true,
56+
'\f': true,
57+
'\v': true
6658
};
6759

6860
export const whitespaceChars: Record<string, boolean> = {
@@ -91,24 +83,26 @@ export const digitsChars: Record<string, boolean> = {
9183
9: true
9284
};
9385

86+
export const maxHexLength = 6;
87+
9488
export function escapeIdentifier(s: string) {
9589
const len = s.length;
9690
let result = '';
9791
let i = 0;
9892
while (i < len) {
9993
const chr = s.charAt(i);
100-
if (identSpecialChars[chr]) {
94+
if (identEscapeChars[chr] || (chr === '-' && i === 1 && s.charAt(0) === '-')) {
10195
result += '\\' + chr;
10296
} else {
10397
if (
104-
!(
105-
chr === '_' ||
106-
chr === '-' ||
107-
(chr >= 'A' && chr <= 'Z') ||
108-
(chr >= 'a' && chr <= 'z') ||
109-
(i !== 0 && chr >= '0' && chr <= '9')
110-
)
98+
chr === '-' ||
99+
chr === '_' ||
100+
(chr >= 'A' && chr <= 'Z') ||
101+
(chr >= 'a' && chr <= 'z') ||
102+
(chr >= '0' && chr <= '9' && i !== 0 && !(i === 1 && s.charAt(0) === '-'))
111103
) {
104+
result += chr;
105+
} else {
112106
let charCode = chr.charCodeAt(0);
113107
if ((charCode & 0xf800) === 0xd800) {
114108
const extraCharCode = s.charCodeAt(i++);
@@ -118,28 +112,25 @@ export function escapeIdentifier(s: string) {
118112
charCode = ((charCode & 0x3ff) << 10) + (extraCharCode & 0x3ff) + 0x10000;
119113
}
120114
result += '\\' + charCode.toString(16) + ' ';
121-
} else {
122-
result += chr;
123115
}
124116
}
125117
i++;
126118
}
127119
return result.trim();
128120
}
129121

130-
export function escapeStr(s: string) {
122+
export function escapeString(s: string) {
131123
const len = s.length;
132124
let result = '';
133125
let i = 0;
134-
let replacement: string;
135126
while (i < len) {
136127
let chr = s.charAt(i);
137128
if (chr === '"') {
138129
chr = '\\"';
139130
} else if (chr === '\\') {
140131
chr = '\\\\';
141-
} else if ((replacement = strReplacementsRev[chr]) !== undefined) {
142-
chr = replacement;
132+
} else if (stringRenderEscapeChars[chr]) {
133+
chr = '\\' + chr.charCodeAt(0).toString(16) + (i === len - 1 ? '' : ' ');
143134
}
144135
result += chr;
145136
i++;

test/parser.test.ts

+46
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,24 @@ describe('parse()', () => {
2828
})
2929
);
3030
});
31+
it('should parse an identifier with hex-encoded characters', () => {
32+
const astSelector = ast.selector({
33+
rules: [
34+
ast.rule({
35+
items: [ast.id({name: 'hello\nworld'})]
36+
})
37+
]
38+
});
39+
expect(parse('#hello\\aworld')).toEqual(astSelector);
40+
expect(parse('#hello\\a world')).toEqual(astSelector);
41+
expect(parse('#hello\\a\tworld')).toEqual(astSelector);
42+
expect(parse('#hello\\a\fworld')).toEqual(astSelector);
43+
expect(parse('#hello\\a\nworld')).toEqual(astSelector);
44+
expect(parse('#hello\\a\nworld')).toEqual(astSelector);
45+
expect(parse('#hello\\a\rworld')).toEqual(astSelector);
46+
expect(parse('#hello\\a\r\nworld')).toEqual(astSelector);
47+
expect(parse('#hello\\00000aworld')).toEqual(astSelector);
48+
});
3149
it('should fail on an identifier starting with multiple hyphens', () => {
3250
expect(() => parse('#--id')).toThrow('Identifiers cannot start with two hyphens with strict mode on.');
3351
});
@@ -584,6 +602,34 @@ describe('parse()', () => {
584602
})
585603
);
586604
});
605+
it('should properly parse escapes', () => {
606+
const astSelector = ast.selector({
607+
rules: [
608+
ast.rule({
609+
items: [
610+
ast.attribute({
611+
name: 'attr',
612+
operator: '=',
613+
value: ast.string({
614+
value: 'hello\nworld'
615+
})
616+
})
617+
]
618+
})
619+
]
620+
});
621+
expect(parse('[attr="hello\\aworld"]')).toEqual(astSelector);
622+
expect(parse('[attr="hell\\o\\aworld"]')).toEqual(astSelector);
623+
expect(parse('[attr="hell\\\no\\aworld"]')).toEqual(astSelector);
624+
expect(parse('[attr="hello\\a world"]')).toEqual(astSelector);
625+
expect(parse('[attr="hello\\a\tworld"]')).toEqual(astSelector);
626+
expect(parse('[attr="hello\\a\fworld"]')).toEqual(astSelector);
627+
expect(parse('[attr="hello\\a\nworld"]')).toEqual(astSelector);
628+
expect(parse('[attr="hello\\a\nworld"]')).toEqual(astSelector);
629+
expect(parse('[attr="hello\\a\rworld"]')).toEqual(astSelector);
630+
expect(parse('[attr="hello\\a\r\nworld"]')).toEqual(astSelector);
631+
expect(parse('[attr="hello\\00000aworld"]')).toEqual(astSelector);
632+
});
587633
it('should properly parse single quotes', () => {
588634
expect(parse("[ attr = 'val\\'\\ue\\20' i ]")).toEqual(
589635
ast.selector({

test/render.test.ts

+12-4
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@ const testCases = {
2828
'tagname[x="y \\""]': 'tagname[x="y \\""]',
2929
'tagname[x="y\'"]': 'tagname[x="y\'"]',
3030
"tagname[x='y \\'']": 'tagname[x="y \'"]',
31+
"div[role='a\\00000ab']": 'div[role="a\\a b"]',
32+
"div[role='\\a']": 'div[role="\\a"]',
3133
'tag1 tag2': 'tag1 tag2',
3234
'ns1|tag1': 'ns1|tag1',
3335
'|tag1': '|tag1',
@@ -85,7 +87,7 @@ const testCases = {
8587
'.cls\\n\\\\name\\.\\[': '.clsn\\\\name\\.\\[',
8688
'[attr\\n\\\\name\\.\\[=a1]': '[attrn\\\\name\\.\\[="a1"]',
8789
':pseudo\\n\\\\name\\.\\[\\((123)': ':pseudon\\\\name\\.\\[\\((\\31 23)',
88-
'[attr="val\nval"]': '[attr="val\\nval"]',
90+
'[attr="val\nval"]': '[attr="val\\a val"]',
8991
'[attr="val\\"val"]': '[attr="val\\"val"]',
9092
'[attr="val\\00a0val"]': '[attr="val val"]',
9193
'tag\\00a0 tag': 'tag\\a0 tag',
@@ -97,9 +99,15 @@ const testCases = {
9799
'#\\3123': '#\\3123',
98100
'#\\31 23': '#\\31 23',
99101
'#\\00031 23': '#\\31 23',
100-
'#\\0003123': '#\\3123',
101-
'#\\0004123': '#\\4123',
102-
'#\\0o': '#\\0 o'
102+
'#\\003123': '#\\3123',
103+
'#\\0003123': '#\\312 3',
104+
'#\\004123': '#\\4123',
105+
'#\\0004123': '#\\412 3',
106+
'#\\0o': '#\\0 o',
107+
'#--a': '#-\\-a',
108+
'#--\\30': '#-\\-0',
109+
'#-\\30': '#-\\30',
110+
'#\\30': '#\\30'
103111
};
104112

105113
describe('render()', () => {

0 commit comments

Comments
 (0)