66"use strict" ;
77
88const astUtils = require ( "./utils/ast-utils" ) ;
9+ const { RegExpParser, visitRegExpAST } = require ( "@eslint-community/regexpp" ) ;
910
11+ /**
12+ * @typedef {import('@eslint-community/regexpp').AST.CharacterClass } CharacterClass
13+ * @typedef {import('@eslint-community/regexpp').AST.ExpressionCharacterClass } ExpressionCharacterClass
14+ */
1015//------------------------------------------------------------------------------
1116// Rule Definition
1217//------------------------------------------------------------------------------
@@ -28,55 +33,17 @@ const VALID_STRING_ESCAPES = union(new Set("\\nrvtbfux"), astUtils.LINEBREAKS);
2833const REGEX_GENERAL_ESCAPES = new Set ( "\\bcdDfnpPrsStvwWxu0123456789]" ) ;
2934const REGEX_NON_CHARCLASS_ESCAPES = union ( REGEX_GENERAL_ESCAPES , new Set ( "^/.$*+?[{}|()Bk" ) ) ;
3035
31- /**
32- * Parses a regular expression into a list of characters with character class info.
33- * @param {string } regExpText The raw text used to create the regular expression
34- * @returns {Object[] } A list of characters, each with info on escaping and whether they're in a character class.
35- * @example
36- *
37- * parseRegExp("a\\b[cd-]");
38- *
39- * // returns:
40- * [
41- * { text: "a", index: 0, escaped: false, inCharClass: false, startsCharClass: false, endsCharClass: false },
42- * { text: "b", index: 2, escaped: true, inCharClass: false, startsCharClass: false, endsCharClass: false },
43- * { text: "c", index: 4, escaped: false, inCharClass: true, startsCharClass: true, endsCharClass: false },
44- * { text: "d", index: 5, escaped: false, inCharClass: true, startsCharClass: false, endsCharClass: false },
45- * { text: "-", index: 6, escaped: false, inCharClass: true, startsCharClass: false, endsCharClass: false }
46- * ];
47- *
36+ /*
37+ * Set of characters that require escaping in character classes in `unicodeSets` mode.
38+ * ( ) [ ] { } / - \ | are ClassSetSyntaxCharacter
4839 */
49- function parseRegExp ( regExpText ) {
50- const charList = [ ] ;
40+ const REGEX_CLASSSET_CHARACTER_ESCAPES = union ( REGEX_GENERAL_ESCAPES , new Set ( "q/[{}|()-" ) ) ;
5141
52- regExpText . split ( "" ) . reduce ( ( state , char , index ) => {
53- if ( ! state . escapeNextChar ) {
54- if ( char === "\\" ) {
55- return Object . assign ( state , { escapeNextChar : true } ) ;
56- }
57- if ( char === "[" && ! state . inCharClass ) {
58- return Object . assign ( state , { inCharClass : true , startingCharClass : true } ) ;
59- }
60- if ( char === "]" && state . inCharClass ) {
61- if ( charList . length && charList [ charList . length - 1 ] . inCharClass ) {
62- charList [ charList . length - 1 ] . endsCharClass = true ;
63- }
64- return Object . assign ( state , { inCharClass : false , startingCharClass : false } ) ;
65- }
66- }
67- charList . push ( {
68- text : char ,
69- index,
70- escaped : state . escapeNextChar ,
71- inCharClass : state . inCharClass ,
72- startsCharClass : state . startingCharClass ,
73- endsCharClass : false
74- } ) ;
75- return Object . assign ( state , { escapeNextChar : false , startingCharClass : false } ) ;
76- } , { escapeNextChar : false , inCharClass : false , startingCharClass : false } ) ;
77-
78- return charList ;
79- }
42+ /*
43+ * A single character set of ClassSetReservedDoublePunctuator.
44+ * && !! ## $$ %% ** ++ ,, .. :: ;; << == >> ?? @@ ^^ `` ~~ are ClassSetReservedDoublePunctuator
45+ */
46+ const REGEX_CLASS_SET_RESERVED_DOUBLE_PUNCTUATOR = new Set ( "!#$%&*+,.:;<=>?@^`~" ) ;
8047
8148/** @type {import('../shared/types').Rule } */
8249module . exports = {
@@ -103,15 +70,17 @@ module.exports = {
10370
10471 create ( context ) {
10572 const sourceCode = context . sourceCode ;
73+ const parser = new RegExpParser ( ) ;
10674
10775 /**
10876 * Reports a node
10977 * @param {ASTNode } node The node to report
11078 * @param {number } startOffset The backslash's offset from the start of the node
11179 * @param {string } character The uselessly escaped character (not including the backslash)
80+ * @param {boolean } [disableEscapeBackslashSuggest] `true` if escapeBackslash suggestion should be turned off.
11281 * @returns {void }
11382 */
114- function report ( node , startOffset , character ) {
83+ function report ( node , startOffset , character , disableEscapeBackslashSuggest ) {
11584 const rangeStart = node . range [ 0 ] + startOffset ;
11685 const range = [ rangeStart , rangeStart + 1 ] ;
11786 const start = sourceCode . getLocFromIndex ( rangeStart ) ;
@@ -134,12 +103,16 @@ module.exports = {
134103 return fixer . removeRange ( range ) ;
135104 }
136105 } ,
137- {
138- messageId : "escapeBackslash" ,
139- fix ( fixer ) {
140- return fixer . insertTextBeforeRange ( range , "\\" ) ;
141- }
142- }
106+ ...disableEscapeBackslashSuggest
107+ ? [ ]
108+ : [
109+ {
110+ messageId : "escapeBackslash" ,
111+ fix ( fixer ) {
112+ return fixer . insertTextBeforeRange ( range , "\\" ) ;
113+ }
114+ }
115+ ]
143116 ]
144117 } ) ;
145118 }
@@ -182,6 +155,133 @@ module.exports = {
182155 }
183156 }
184157
158+ /**
159+ * Checks if the escape character in given regexp is unnecessary.
160+ * @private
161+ * @param {ASTNode } node node to validate.
162+ * @returns {void }
163+ */
164+ function validateRegExp ( node ) {
165+ const { pattern, flags } = node . regex ;
166+ let patternNode ;
167+ const unicode = flags . includes ( "u" ) ;
168+ const unicodeSets = flags . includes ( "v" ) ;
169+
170+ try {
171+ patternNode = parser . parsePattern ( pattern , 0 , pattern . length , { unicode, unicodeSets } ) ;
172+ } catch {
173+
174+ // Ignore regular expressions with syntax errors
175+ return ;
176+ }
177+
178+ /** @type {(CharacterClass | ExpressionCharacterClass)[] } */
179+ const characterClassStack = [ ] ;
180+
181+ visitRegExpAST ( patternNode , {
182+ onCharacterClassEnter : characterClassNode => characterClassStack . unshift ( characterClassNode ) ,
183+ onCharacterClassLeave : ( ) => characterClassStack . shift ( ) ,
184+ onExpressionCharacterClassEnter : characterClassNode => characterClassStack . unshift ( characterClassNode ) ,
185+ onExpressionCharacterClassLeave : ( ) => characterClassStack . shift ( ) ,
186+ onCharacterEnter ( characterNode ) {
187+ if ( ! characterNode . raw . startsWith ( "\\" ) ) {
188+
189+ // It's not an escaped character.
190+ return ;
191+ }
192+
193+ const escapedChar = characterNode . raw . slice ( 1 ) ;
194+
195+ if ( escapedChar !== String . fromCodePoint ( characterNode . value ) ) {
196+
197+ // It's a valid escape.
198+ return ;
199+ }
200+ let allowedEscapes ;
201+
202+ if ( characterClassStack . length ) {
203+ allowedEscapes = unicodeSets ? REGEX_CLASSSET_CHARACTER_ESCAPES : REGEX_GENERAL_ESCAPES ;
204+ } else {
205+ allowedEscapes = REGEX_NON_CHARCLASS_ESCAPES ;
206+ }
207+ if ( allowedEscapes . has ( escapedChar ) ) {
208+ return ;
209+ }
210+
211+ const reportedIndex = characterNode . start + 1 ;
212+ let disableEscapeBackslashSuggest = false ;
213+
214+ if ( characterClassStack . length ) {
215+ const characterClassNode = characterClassStack [ 0 ] ;
216+
217+ if ( escapedChar === "^" ) {
218+
219+ /*
220+ * The '^' character is also a special case; it must always be escaped outside of character classes, but
221+ * it only needs to be escaped in character classes if it's at the beginning of the character class. To
222+ * account for this, consider it to be a valid escape character outside of character classes, and filter
223+ * out '^' characters that appear at the start of a character class.
224+ */
225+ if ( characterClassNode . start + 1 === characterNode . start ) {
226+
227+ return ;
228+ }
229+ }
230+ if ( ! unicodeSets ) {
231+ if ( escapedChar === "-" ) {
232+
233+ /*
234+ * The '-' character is a special case, because it's only valid to escape it if it's in a character
235+ * class, and is not at either edge of the character class. To account for this, don't consider '-'
236+ * characters to be valid in general, and filter out '-' characters that appear in the middle of a
237+ * character class.
238+ */
239+ if ( characterClassNode . start + 1 !== characterNode . start && characterNode . end !== characterClassNode . end - 1 ) {
240+
241+ return ;
242+ }
243+ }
244+ } else { // unicodeSets mode
245+ if ( REGEX_CLASS_SET_RESERVED_DOUBLE_PUNCTUATOR . has ( escapedChar ) ) {
246+
247+ // Escaping is valid if it is a ClassSetReservedDoublePunctuator.
248+ if ( pattern [ characterNode . end ] === escapedChar ) {
249+ return ;
250+ }
251+ if ( pattern [ characterNode . start - 1 ] === escapedChar ) {
252+ if ( escapedChar !== "^" ) {
253+ return ;
254+ }
255+
256+ // If the previous character is a `negate` caret(`^`), escape to caret is unnecessary.
257+
258+ if ( ! characterClassNode . negate ) {
259+ return ;
260+ }
261+ const negateCaretIndex = characterClassNode . start + 1 ;
262+
263+ if ( negateCaretIndex < characterNode . start - 1 ) {
264+ return ;
265+ }
266+ }
267+ }
268+
269+ if ( characterNode . parent . type === "ClassIntersection" || characterNode . parent . type === "ClassSubtraction" ) {
270+ disableEscapeBackslashSuggest = true ;
271+ }
272+ }
273+ }
274+
275+ report (
276+ node ,
277+ reportedIndex ,
278+ escapedChar ,
279+ disableEscapeBackslashSuggest
280+ ) ;
281+ }
282+ } ) ;
283+ }
284+
185285 /**
186286 * Checks if a node has an escape.
187287 * @param {ASTNode } node node to check.
@@ -220,32 +320,7 @@ module.exports = {
220320 validateString ( node , match ) ;
221321 }
222322 } else if ( node . regex ) {
223- parseRegExp ( node . regex . pattern )
224-
225- /*
226- * The '-' character is a special case, because it's only valid to escape it if it's in a character
227- * class, and is not at either edge of the character class. To account for this, don't consider '-'
228- * characters to be valid in general, and filter out '-' characters that appear in the middle of a
229- * character class.
230- */
231- . filter ( charInfo => ! ( charInfo . text === "-" && charInfo . inCharClass && ! charInfo . startsCharClass && ! charInfo . endsCharClass ) )
232-
233- /*
234- * The '^' character is also a special case; it must always be escaped outside of character classes, but
235- * it only needs to be escaped in character classes if it's at the beginning of the character class. To
236- * account for this, consider it to be a valid escape character outside of character classes, and filter
237- * out '^' characters that appear at the start of a character class.
238- */
239- . filter ( charInfo => ! ( charInfo . text === "^" && charInfo . startsCharClass ) )
240-
241- // Filter out characters that aren't escaped.
242- . filter ( charInfo => charInfo . escaped )
243-
244- // Filter out characters that are valid to escape, based on their position in the regular expression.
245- . filter ( charInfo => ! ( charInfo . inCharClass ? REGEX_GENERAL_ESCAPES : REGEX_NON_CHARCLASS_ESCAPES ) . has ( charInfo . text ) )
246-
247- // Report all the remaining characters.
248- . forEach ( charInfo => report ( node , charInfo . index , charInfo . text ) ) ;
323+ validateRegExp ( node ) ;
249324 }
250325
251326 }
0 commit comments