44"use strict" ;
55
66const { CALL , CONSTRUCT , ReferenceTracker, getStringIfConstant } = require ( "eslint-utils" ) ;
7- const { RegExpParser, visitRegExpAST } = require ( "regexpp" ) ;
7+ const { RegExpValidator , RegExpParser, visitRegExpAST } = require ( "regexpp" ) ;
88const { isCombiningCharacter, isEmojiModifier, isRegionalIndicatorSymbol, isSurrogatePair } = require ( "./utils/unicode" ) ;
9+ const astUtils = require ( "./utils/ast-utils.js" ) ;
910
1011//------------------------------------------------------------------------------
1112// Helpers
1213//------------------------------------------------------------------------------
1314
15+ const REGEXPP_LATEST_ECMA_VERSION = 2022 ;
16+
1417/**
1518 * Iterate character sequences of a given nodes.
1619 *
@@ -109,35 +112,32 @@ module.exports = {
109112 url : "https://eslint.org/docs/rules/no-misleading-character-class"
110113 } ,
111114
115+ hasSuggestions : true ,
116+
112117 schema : [ ] ,
113118
114119 messages : {
115120 surrogatePairWithoutUFlag : "Unexpected surrogate pair in character class. Use 'u' flag." ,
116121 combiningClass : "Unexpected combined character in character class." ,
117122 emojiModifier : "Unexpected modified Emoji in character class." ,
118123 regionalIndicatorSymbol : "Unexpected national flag in character class." ,
119- zwj : "Unexpected joined character sequence in character class."
124+ zwj : "Unexpected joined character sequence in character class." ,
125+ suggestUnicodeFlag : "Add unicode 'u' flag to regex."
120126 }
121127 } ,
122128 create ( context ) {
129+ const sourceCode = context . getSourceCode ( ) ;
123130 const parser = new RegExpParser ( ) ;
124131
125132 /**
126133 * Verify a given regular expression.
127134 * @param {Node } node The node to report.
128135 * @param {string } pattern The regular expression pattern to verify.
129136 * @param {string } flags The flags of the regular expression.
137+ * @param {Function } unicodeFixer Fixer for missing "u" flag.
130138 * @returns {void }
131139 */
132- function verify ( node , pattern , flags ) {
133- const has = {
134- surrogatePairWithoutUFlag : false ,
135- combiningClass : false ,
136- variationSelector : false ,
137- emojiModifier : false ,
138- regionalIndicatorSymbol : false ,
139- zwj : false
140- } ;
140+ function verify ( node , pattern , flags , unicodeFixer ) {
141141 let patternNode ;
142142
143143 try {
@@ -153,26 +153,75 @@ module.exports = {
153153 return ;
154154 }
155155
156+ const foundKinds = new Set ( ) ;
157+
156158 visitRegExpAST ( patternNode , {
157159 onCharacterClassEnter ( ccNode ) {
158160 for ( const chars of iterateCharacterSequence ( ccNode . elements ) ) {
159161 for ( const kind of kinds ) {
160- has [ kind ] = has [ kind ] || hasCharacterSequence [ kind ] ( chars ) ;
162+ if ( hasCharacterSequence [ kind ] ( chars ) ) {
163+ foundKinds . add ( kind ) ;
164+ }
161165 }
162166 }
163167 }
164168 } ) ;
165169
166- for ( const kind of kinds ) {
167- if ( has [ kind ] ) {
168- context . report ( { node, messageId : kind } ) ;
170+ for ( const kind of foundKinds ) {
171+ let suggest ;
172+
173+ if ( kind === "surrogatePairWithoutUFlag" ) {
174+ suggest = [ {
175+ messageId : "suggestUnicodeFlag" ,
176+ fix : unicodeFixer
177+ } ] ;
169178 }
179+
180+ context . report ( {
181+ node,
182+ messageId : kind ,
183+ suggest
184+ } ) ;
170185 }
171186 }
172187
188+ /**
189+ * Checks if the given regular expression pattern would be valid with the `u` flag.
190+ * @param {string } pattern The regular expression pattern to verify.
191+ * @returns {boolean } `true` if the pattern would be valid with the `u` flag.
192+ * `false` if the pattern would be invalid with the `u` flag or the configured
193+ * ecmaVersion doesn't support the `u` flag.
194+ */
195+ function isValidWithUnicodeFlag ( pattern ) {
196+ const { ecmaVersion } = context . parserOptions ;
197+
198+ // ecmaVersion is unknown or it doesn't support the 'u' flag
199+ if ( typeof ecmaVersion !== "number" || ecmaVersion <= 5 ) {
200+ return false ;
201+ }
202+
203+ const validator = new RegExpValidator ( {
204+ ecmaVersion : Math . min ( ecmaVersion + 2009 , REGEXPP_LATEST_ECMA_VERSION )
205+ } ) ;
206+
207+ try {
208+ validator . validatePattern ( pattern , void 0 , void 0 , /* uFlag = */ true ) ;
209+ } catch {
210+ return false ;
211+ }
212+
213+ return true ;
214+ }
215+
173216 return {
174217 "Literal[regex]" ( node ) {
175- verify ( node , node . regex . pattern , node . regex . flags ) ;
218+ verify ( node , node . regex . pattern , node . regex . flags , fixer => {
219+ if ( ! isValidWithUnicodeFlag ( node . regex . pattern ) ) {
220+ return null ;
221+ }
222+
223+ return fixer . insertTextAfter ( node , "u" ) ;
224+ } ) ;
176225 } ,
177226 "Program" ( ) {
178227 const scope = context . getScope ( ) ;
@@ -191,7 +240,31 @@ module.exports = {
191240 const flags = getStringIfConstant ( flagsNode , scope ) ;
192241
193242 if ( typeof pattern === "string" ) {
194- verify ( node , pattern , flags || "" ) ;
243+ verify ( node , pattern , flags || "" , fixer => {
244+
245+ if ( ! isValidWithUnicodeFlag ( pattern ) ) {
246+ return null ;
247+ }
248+
249+ if ( node . arguments . length === 1 ) {
250+ const penultimateToken = sourceCode . getLastToken ( node , { skip : 1 } ) ; // skip closing parenthesis
251+
252+ return fixer . insertTextAfter (
253+ penultimateToken ,
254+ astUtils . isCommaToken ( penultimateToken )
255+ ? ' "u",'
256+ : ', "u"'
257+ ) ;
258+ }
259+
260+ if ( ( flagsNode . type === "Literal" && typeof flagsNode . value === "string" ) || flagsNode . type === "TemplateLiteral" ) {
261+ const range = [ flagsNode . range [ 0 ] , flagsNode . range [ 1 ] - 1 ] ;
262+
263+ return fixer . insertTextAfterRange ( range , "u" ) ;
264+ }
265+
266+ return null ;
267+ } ) ;
195268 }
196269 }
197270 }
0 commit comments