Skip to content

Commit cab0c22

Browse files
feat: add Unicode flag suggestion in no-misleading-character-class (#15867)
* refactor: Simplify kinds tracking with a Set * feat: Add fixer for missing regex unicode flag * test: Update tests * refactor: Change fix to suggestion * Revert "test: Update tests" * Address review comments * Add tests * Update lib/rules/no-misleading-character-class.js Co-authored-by: Milos Djermanovic <[email protected]> * add suggestions:null assertions * update docs * add more tests * fix edge cases when inserting flags argument * validate pattern for regex literals * validate pattern for regex constructor calls Co-authored-by: Mathias Rasmussen <[email protected]>
1 parent c686e4c commit cab0c22

3 files changed

Lines changed: 472 additions & 71 deletions

File tree

docs/src/rules/no-misleading-character-class.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ rule_type: problem
77

88
<!--RECOMMENDED-->
99

10+
<!--SUGGESTIONS-->
11+
1012
Disallows characters which are made with multiple code points in character class syntax.
1113

1214
Unicode includes the characters which are made with multiple code points.

lib/rules/no-misleading-character-class.js

Lines changed: 90 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,16 @@
44
"use strict";
55

66
const { CALL, CONSTRUCT, ReferenceTracker, getStringIfConstant } = require("eslint-utils");
7-
const { RegExpParser, visitRegExpAST } = require("regexpp");
7+
const { RegExpValidator, RegExpParser, visitRegExpAST } = require("regexpp");
88
const { isCombiningCharacter, isEmojiModifier, isRegionalIndicatorSymbol, isSurrogatePair } = require("./utils/unicode");
9+
const astUtils = require("./utils/ast-utils.js");
910

1011
//------------------------------------------------------------------------------
1112
// Helpers
1213
//------------------------------------------------------------------------------
1314

15+
const REGEXPP_LATEST_ECMA_VERSION = 2022;
16+
1417
/**
1518
* Iterate character sequences of a given nodes.
1619
*
@@ -109,35 +112,32 @@ module.exports = {
109112
url: "https://eslint.org/docs/rules/no-misleading-character-class"
110113
},
111114

115+
hasSuggestions: true,
116+
112117
schema: [],
113118

114119
messages: {
115120
surrogatePairWithoutUFlag: "Unexpected surrogate pair in character class. Use 'u' flag.",
116121
combiningClass: "Unexpected combined character in character class.",
117122
emojiModifier: "Unexpected modified Emoji in character class.",
118123
regionalIndicatorSymbol: "Unexpected national flag in character class.",
119-
zwj: "Unexpected joined character sequence in character class."
124+
zwj: "Unexpected joined character sequence in character class.",
125+
suggestUnicodeFlag: "Add unicode 'u' flag to regex."
120126
}
121127
},
122128
create(context) {
129+
const sourceCode = context.getSourceCode();
123130
const parser = new RegExpParser();
124131

125132
/**
126133
* Verify a given regular expression.
127134
* @param {Node} node The node to report.
128135
* @param {string} pattern The regular expression pattern to verify.
129136
* @param {string} flags The flags of the regular expression.
137+
* @param {Function} unicodeFixer Fixer for missing "u" flag.
130138
* @returns {void}
131139
*/
132-
function verify(node, pattern, flags) {
133-
const has = {
134-
surrogatePairWithoutUFlag: false,
135-
combiningClass: false,
136-
variationSelector: false,
137-
emojiModifier: false,
138-
regionalIndicatorSymbol: false,
139-
zwj: false
140-
};
140+
function verify(node, pattern, flags, unicodeFixer) {
141141
let patternNode;
142142

143143
try {
@@ -153,26 +153,75 @@ module.exports = {
153153
return;
154154
}
155155

156+
const foundKinds = new Set();
157+
156158
visitRegExpAST(patternNode, {
157159
onCharacterClassEnter(ccNode) {
158160
for (const chars of iterateCharacterSequence(ccNode.elements)) {
159161
for (const kind of kinds) {
160-
has[kind] = has[kind] || hasCharacterSequence[kind](chars);
162+
if (hasCharacterSequence[kind](chars)) {
163+
foundKinds.add(kind);
164+
}
161165
}
162166
}
163167
}
164168
});
165169

166-
for (const kind of kinds) {
167-
if (has[kind]) {
168-
context.report({ node, messageId: kind });
170+
for (const kind of foundKinds) {
171+
let suggest;
172+
173+
if (kind === "surrogatePairWithoutUFlag") {
174+
suggest = [{
175+
messageId: "suggestUnicodeFlag",
176+
fix: unicodeFixer
177+
}];
169178
}
179+
180+
context.report({
181+
node,
182+
messageId: kind,
183+
suggest
184+
});
170185
}
171186
}
172187

188+
/**
189+
* Checks if the given regular expression pattern would be valid with the `u` flag.
190+
* @param {string} pattern The regular expression pattern to verify.
191+
* @returns {boolean} `true` if the pattern would be valid with the `u` flag.
192+
* `false` if the pattern would be invalid with the `u` flag or the configured
193+
* ecmaVersion doesn't support the `u` flag.
194+
*/
195+
function isValidWithUnicodeFlag(pattern) {
196+
const { ecmaVersion } = context.parserOptions;
197+
198+
// ecmaVersion is unknown or it doesn't support the 'u' flag
199+
if (typeof ecmaVersion !== "number" || ecmaVersion <= 5) {
200+
return false;
201+
}
202+
203+
const validator = new RegExpValidator({
204+
ecmaVersion: Math.min(ecmaVersion + 2009, REGEXPP_LATEST_ECMA_VERSION)
205+
});
206+
207+
try {
208+
validator.validatePattern(pattern, void 0, void 0, /* uFlag = */ true);
209+
} catch {
210+
return false;
211+
}
212+
213+
return true;
214+
}
215+
173216
return {
174217
"Literal[regex]"(node) {
175-
verify(node, node.regex.pattern, node.regex.flags);
218+
verify(node, node.regex.pattern, node.regex.flags, fixer => {
219+
if (!isValidWithUnicodeFlag(node.regex.pattern)) {
220+
return null;
221+
}
222+
223+
return fixer.insertTextAfter(node, "u");
224+
});
176225
},
177226
"Program"() {
178227
const scope = context.getScope();
@@ -191,7 +240,31 @@ module.exports = {
191240
const flags = getStringIfConstant(flagsNode, scope);
192241

193242
if (typeof pattern === "string") {
194-
verify(node, pattern, flags || "");
243+
verify(node, pattern, flags || "", fixer => {
244+
245+
if (!isValidWithUnicodeFlag(pattern)) {
246+
return null;
247+
}
248+
249+
if (node.arguments.length === 1) {
250+
const penultimateToken = sourceCode.getLastToken(node, { skip: 1 }); // skip closing parenthesis
251+
252+
return fixer.insertTextAfter(
253+
penultimateToken,
254+
astUtils.isCommaToken(penultimateToken)
255+
? ' "u",'
256+
: ', "u"'
257+
);
258+
}
259+
260+
if ((flagsNode.type === "Literal" && typeof flagsNode.value === "string") || flagsNode.type === "TemplateLiteral") {
261+
const range = [flagsNode.range[0], flagsNode.range[1] - 1];
262+
263+
return fixer.insertTextAfterRange(range, "u");
264+
}
265+
266+
return null;
267+
});
195268
}
196269
}
197270
}

0 commit comments

Comments
 (0)