Skip to content

Commit 1a2f966

Browse files
authored
feat: no-useless-escape support v flag (#17420)
* feat: `no-useless-escape` support `v` flag * fix: false positives for `/[\^]/v` * fix: remove unnecessary `if` statement * fix: incorrect suggestion
1 parent 0aa0bc3 commit 1a2f966

2 files changed

Lines changed: 888 additions & 81 deletions

File tree

lib/rules/no-useless-escape.js

Lines changed: 155 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,12 @@
66
"use strict";
77

88
const astUtils = require("./utils/ast-utils");
9+
const { RegExpParser, visitRegExpAST } = require("@eslint-community/regexpp");
910

11+
/**
12+
* @typedef {import('@eslint-community/regexpp').AST.CharacterClass} CharacterClass
13+
* @typedef {import('@eslint-community/regexpp').AST.ExpressionCharacterClass} ExpressionCharacterClass
14+
*/
1015
//------------------------------------------------------------------------------
1116
// Rule Definition
1217
//------------------------------------------------------------------------------
@@ -28,55 +33,17 @@ const VALID_STRING_ESCAPES = union(new Set("\\nrvtbfux"), astUtils.LINEBREAKS);
2833
const REGEX_GENERAL_ESCAPES = new Set("\\bcdDfnpPrsStvwWxu0123456789]");
2934
const REGEX_NON_CHARCLASS_ESCAPES = union(REGEX_GENERAL_ESCAPES, new Set("^/.$*+?[{}|()Bk"));
3035

31-
/**
32-
* Parses a regular expression into a list of characters with character class info.
33-
* @param {string} regExpText The raw text used to create the regular expression
34-
* @returns {Object[]} A list of characters, each with info on escaping and whether they're in a character class.
35-
* @example
36-
*
37-
* parseRegExp("a\\b[cd-]");
38-
*
39-
* // returns:
40-
* [
41-
* { text: "a", index: 0, escaped: false, inCharClass: false, startsCharClass: false, endsCharClass: false },
42-
* { text: "b", index: 2, escaped: true, inCharClass: false, startsCharClass: false, endsCharClass: false },
43-
* { text: "c", index: 4, escaped: false, inCharClass: true, startsCharClass: true, endsCharClass: false },
44-
* { text: "d", index: 5, escaped: false, inCharClass: true, startsCharClass: false, endsCharClass: false },
45-
* { text: "-", index: 6, escaped: false, inCharClass: true, startsCharClass: false, endsCharClass: false }
46-
* ];
47-
*
36+
/*
37+
* Set of characters that require escaping in character classes in `unicodeSets` mode.
38+
* ( ) [ ] { } / - \ | are ClassSetSyntaxCharacter
4839
*/
49-
function parseRegExp(regExpText) {
50-
const charList = [];
40+
const REGEX_CLASSSET_CHARACTER_ESCAPES = union(REGEX_GENERAL_ESCAPES, new Set("q/[{}|()-"));
5141

52-
regExpText.split("").reduce((state, char, index) => {
53-
if (!state.escapeNextChar) {
54-
if (char === "\\") {
55-
return Object.assign(state, { escapeNextChar: true });
56-
}
57-
if (char === "[" && !state.inCharClass) {
58-
return Object.assign(state, { inCharClass: true, startingCharClass: true });
59-
}
60-
if (char === "]" && state.inCharClass) {
61-
if (charList.length && charList[charList.length - 1].inCharClass) {
62-
charList[charList.length - 1].endsCharClass = true;
63-
}
64-
return Object.assign(state, { inCharClass: false, startingCharClass: false });
65-
}
66-
}
67-
charList.push({
68-
text: char,
69-
index,
70-
escaped: state.escapeNextChar,
71-
inCharClass: state.inCharClass,
72-
startsCharClass: state.startingCharClass,
73-
endsCharClass: false
74-
});
75-
return Object.assign(state, { escapeNextChar: false, startingCharClass: false });
76-
}, { escapeNextChar: false, inCharClass: false, startingCharClass: false });
77-
78-
return charList;
79-
}
42+
/*
43+
* A single character set of ClassSetReservedDoublePunctuator.
44+
* && !! ## $$ %% ** ++ ,, .. :: ;; << == >> ?? @@ ^^ `` ~~ are ClassSetReservedDoublePunctuator
45+
*/
46+
const REGEX_CLASS_SET_RESERVED_DOUBLE_PUNCTUATOR = new Set("!#$%&*+,.:;<=>?@^`~");
8047

8148
/** @type {import('../shared/types').Rule} */
8249
module.exports = {
@@ -103,15 +70,17 @@ module.exports = {
10370

10471
create(context) {
10572
const sourceCode = context.sourceCode;
73+
const parser = new RegExpParser();
10674

10775
/**
10876
* Reports a node
10977
* @param {ASTNode} node The node to report
11078
* @param {number} startOffset The backslash's offset from the start of the node
11179
* @param {string} character The uselessly escaped character (not including the backslash)
80+
* @param {boolean} [disableEscapeBackslashSuggest] `true` if escapeBackslash suggestion should be turned off.
11281
* @returns {void}
11382
*/
114-
function report(node, startOffset, character) {
83+
function report(node, startOffset, character, disableEscapeBackslashSuggest) {
11584
const rangeStart = node.range[0] + startOffset;
11685
const range = [rangeStart, rangeStart + 1];
11786
const start = sourceCode.getLocFromIndex(rangeStart);
@@ -134,12 +103,16 @@ module.exports = {
134103
return fixer.removeRange(range);
135104
}
136105
},
137-
{
138-
messageId: "escapeBackslash",
139-
fix(fixer) {
140-
return fixer.insertTextBeforeRange(range, "\\");
141-
}
142-
}
106+
...disableEscapeBackslashSuggest
107+
? []
108+
: [
109+
{
110+
messageId: "escapeBackslash",
111+
fix(fixer) {
112+
return fixer.insertTextBeforeRange(range, "\\");
113+
}
114+
}
115+
]
143116
]
144117
});
145118
}
@@ -182,6 +155,133 @@ module.exports = {
182155
}
183156
}
184157

158+
/**
159+
* Checks if the escape character in given regexp is unnecessary.
160+
* @private
161+
* @param {ASTNode} node node to validate.
162+
* @returns {void}
163+
*/
164+
function validateRegExp(node) {
165+
const { pattern, flags } = node.regex;
166+
let patternNode;
167+
const unicode = flags.includes("u");
168+
const unicodeSets = flags.includes("v");
169+
170+
try {
171+
patternNode = parser.parsePattern(pattern, 0, pattern.length, { unicode, unicodeSets });
172+
} catch {
173+
174+
// Ignore regular expressions with syntax errors
175+
return;
176+
}
177+
178+
/** @type {(CharacterClass | ExpressionCharacterClass)[]} */
179+
const characterClassStack = [];
180+
181+
visitRegExpAST(patternNode, {
182+
onCharacterClassEnter: characterClassNode => characterClassStack.unshift(characterClassNode),
183+
onCharacterClassLeave: () => characterClassStack.shift(),
184+
onExpressionCharacterClassEnter: characterClassNode => characterClassStack.unshift(characterClassNode),
185+
onExpressionCharacterClassLeave: () => characterClassStack.shift(),
186+
onCharacterEnter(characterNode) {
187+
if (!characterNode.raw.startsWith("\\")) {
188+
189+
// It's not an escaped character.
190+
return;
191+
}
192+
193+
const escapedChar = characterNode.raw.slice(1);
194+
195+
if (escapedChar !== String.fromCodePoint(characterNode.value)) {
196+
197+
// It's a valid escape.
198+
return;
199+
}
200+
let allowedEscapes;
201+
202+
if (characterClassStack.length) {
203+
allowedEscapes = unicodeSets ? REGEX_CLASSSET_CHARACTER_ESCAPES : REGEX_GENERAL_ESCAPES;
204+
} else {
205+
allowedEscapes = REGEX_NON_CHARCLASS_ESCAPES;
206+
}
207+
if (allowedEscapes.has(escapedChar)) {
208+
return;
209+
}
210+
211+
const reportedIndex = characterNode.start + 1;
212+
let disableEscapeBackslashSuggest = false;
213+
214+
if (characterClassStack.length) {
215+
const characterClassNode = characterClassStack[0];
216+
217+
if (escapedChar === "^") {
218+
219+
/*
220+
* The '^' character is also a special case; it must always be escaped outside of character classes, but
221+
* it only needs to be escaped in character classes if it's at the beginning of the character class. To
222+
* account for this, consider it to be a valid escape character outside of character classes, and filter
223+
* out '^' characters that appear at the start of a character class.
224+
*/
225+
if (characterClassNode.start + 1 === characterNode.start) {
226+
227+
return;
228+
}
229+
}
230+
if (!unicodeSets) {
231+
if (escapedChar === "-") {
232+
233+
/*
234+
* The '-' character is a special case, because it's only valid to escape it if it's in a character
235+
* class, and is not at either edge of the character class. To account for this, don't consider '-'
236+
* characters to be valid in general, and filter out '-' characters that appear in the middle of a
237+
* character class.
238+
*/
239+
if (characterClassNode.start + 1 !== characterNode.start && characterNode.end !== characterClassNode.end - 1) {
240+
241+
return;
242+
}
243+
}
244+
} else { // unicodeSets mode
245+
if (REGEX_CLASS_SET_RESERVED_DOUBLE_PUNCTUATOR.has(escapedChar)) {
246+
247+
// Escaping is valid if it is a ClassSetReservedDoublePunctuator.
248+
if (pattern[characterNode.end] === escapedChar) {
249+
return;
250+
}
251+
if (pattern[characterNode.start - 1] === escapedChar) {
252+
if (escapedChar !== "^") {
253+
return;
254+
}
255+
256+
// If the previous character is a `negate` caret(`^`), escape to caret is unnecessary.
257+
258+
if (!characterClassNode.negate) {
259+
return;
260+
}
261+
const negateCaretIndex = characterClassNode.start + 1;
262+
263+
if (negateCaretIndex < characterNode.start - 1) {
264+
return;
265+
}
266+
}
267+
}
268+
269+
if (characterNode.parent.type === "ClassIntersection" || characterNode.parent.type === "ClassSubtraction") {
270+
disableEscapeBackslashSuggest = true;
271+
}
272+
}
273+
}
274+
275+
report(
276+
node,
277+
reportedIndex,
278+
escapedChar,
279+
disableEscapeBackslashSuggest
280+
);
281+
}
282+
});
283+
}
284+
185285
/**
186286
* Checks if a node has an escape.
187287
* @param {ASTNode} node node to check.
@@ -220,32 +320,7 @@ module.exports = {
220320
validateString(node, match);
221321
}
222322
} else if (node.regex) {
223-
parseRegExp(node.regex.pattern)
224-
225-
/*
226-
* The '-' character is a special case, because it's only valid to escape it if it's in a character
227-
* class, and is not at either edge of the character class. To account for this, don't consider '-'
228-
* characters to be valid in general, and filter out '-' characters that appear in the middle of a
229-
* character class.
230-
*/
231-
.filter(charInfo => !(charInfo.text === "-" && charInfo.inCharClass && !charInfo.startsCharClass && !charInfo.endsCharClass))
232-
233-
/*
234-
* The '^' character is also a special case; it must always be escaped outside of character classes, but
235-
* it only needs to be escaped in character classes if it's at the beginning of the character class. To
236-
* account for this, consider it to be a valid escape character outside of character classes, and filter
237-
* out '^' characters that appear at the start of a character class.
238-
*/
239-
.filter(charInfo => !(charInfo.text === "^" && charInfo.startsCharClass))
240-
241-
// Filter out characters that aren't escaped.
242-
.filter(charInfo => charInfo.escaped)
243-
244-
// Filter out characters that are valid to escape, based on their position in the regular expression.
245-
.filter(charInfo => !(charInfo.inCharClass ? REGEX_GENERAL_ESCAPES : REGEX_NON_CHARCLASS_ESCAPES).has(charInfo.text))
246-
247-
// Report all the remaining characters.
248-
.forEach(charInfo => report(node, charInfo.index, charInfo.text));
323+
validateRegExp(node);
249324
}
250325

251326
}

0 commit comments

Comments
 (0)