@@ -13,27 +13,34 @@ const { isValidWithUnicodeFlag } = require("./utils/regular-expressions");
1313// Helpers
1414//------------------------------------------------------------------------------
1515
16+ /**
17+ * @typedef {import('@eslint-community/regexpp').AST.Character } Character
18+ * @typedef {import('@eslint-community/regexpp').AST.CharacterClassElement } CharacterClassElement
19+ */
20+
1621/**
1722 * Iterate character sequences of a given nodes.
1823 *
1924 * CharacterClassRange syntax can steal a part of character sequence,
2025 * so this function reverts CharacterClassRange syntax and restore the sequence.
21- * @param {import('@eslint-community/regexpp').AST. CharacterClassElement[] } nodes The node list to iterate character sequences.
22- * @returns {IterableIterator<number []> } The list of character sequences.
26+ * @param {CharacterClassElement[] } nodes The node list to iterate character sequences.
27+ * @returns {IterableIterator<Character []> } The list of character sequences.
2328 */
2429function * iterateCharacterSequence ( nodes ) {
30+
31+ /** @type {Character[] } */
2532 let seq = [ ] ;
2633
2734 for ( const node of nodes ) {
2835 switch ( node . type ) {
2936 case "Character" :
30- seq . push ( node . value ) ;
37+ seq . push ( node ) ;
3138 break ;
3239
3340 case "CharacterClassRange" :
34- seq . push ( node . min . value ) ;
41+ seq . push ( node . min ) ;
3542 yield seq ;
36- seq = [ node . max . value ] ;
43+ seq = [ node . max ] ;
3744 break ;
3845
3946 case "CharacterSet" :
@@ -55,32 +62,74 @@ function *iterateCharacterSequence(nodes) {
5562 }
5663}
5764
65+
66+ /**
67+ * Checks whether the given character node is a Unicode code point escape or not.
68+ * @param {Character } char the character node to check.
69+ * @returns {boolean } `true` if the character node is a Unicode code point escape.
70+ */
71+ function isUnicodeCodePointEscape ( char ) {
72+ return / ^ \\ u \{ [ \d a - f ] + \} $ / iu. test ( char . raw ) ;
73+ }
74+
75+ /**
76+ * Each function returns `true` if it detects that kind of problem.
77+ * @type {Record<string, (chars: Character[]) => boolean> }
78+ */
5879const hasCharacterSequence = {
5980 surrogatePairWithoutUFlag ( chars ) {
60- return chars . some ( ( c , i ) => i !== 0 && isSurrogatePair ( chars [ i - 1 ] , c ) ) ;
81+ return chars . some ( ( c , i ) => {
82+ if ( i === 0 ) {
83+ return false ;
84+ }
85+ const c1 = chars [ i - 1 ] ;
86+
87+ return (
88+ isSurrogatePair ( c1 . value , c . value ) &&
89+ ! isUnicodeCodePointEscape ( c1 ) &&
90+ ! isUnicodeCodePointEscape ( c )
91+ ) ;
92+ } ) ;
93+ } ,
94+
95+ surrogatePair ( chars ) {
96+ return chars . some ( ( c , i ) => {
97+ if ( i === 0 ) {
98+ return false ;
99+ }
100+ const c1 = chars [ i - 1 ] ;
101+
102+ return (
103+ isSurrogatePair ( c1 . value , c . value ) &&
104+ (
105+ isUnicodeCodePointEscape ( c1 ) ||
106+ isUnicodeCodePointEscape ( c )
107+ )
108+ ) ;
109+ } ) ;
61110 } ,
62111
63112 combiningClass ( chars ) {
64113 return chars . some ( ( c , i ) => (
65114 i !== 0 &&
66- isCombiningCharacter ( c ) &&
67- ! isCombiningCharacter ( chars [ i - 1 ] )
115+ isCombiningCharacter ( c . value ) &&
116+ ! isCombiningCharacter ( chars [ i - 1 ] . value )
68117 ) ) ;
69118 } ,
70119
71120 emojiModifier ( chars ) {
72121 return chars . some ( ( c , i ) => (
73122 i !== 0 &&
74- isEmojiModifier ( c ) &&
75- ! isEmojiModifier ( chars [ i - 1 ] )
123+ isEmojiModifier ( c . value ) &&
124+ ! isEmojiModifier ( chars [ i - 1 ] . value )
76125 ) ) ;
77126 } ,
78127
79128 regionalIndicatorSymbol ( chars ) {
80129 return chars . some ( ( c , i ) => (
81130 i !== 0 &&
82- isRegionalIndicatorSymbol ( c ) &&
83- isRegionalIndicatorSymbol ( chars [ i - 1 ] )
131+ isRegionalIndicatorSymbol ( c . value ) &&
132+ isRegionalIndicatorSymbol ( chars [ i - 1 ] . value )
84133 ) ) ;
85134 } ,
86135
@@ -90,9 +139,9 @@ const hasCharacterSequence = {
90139 return chars . some ( ( c , i ) => (
91140 i !== 0 &&
92141 i !== lastIndex &&
93- c === 0x200d &&
94- chars [ i - 1 ] !== 0x200d &&
95- chars [ i + 1 ] !== 0x200d
142+ c . value === 0x200d &&
143+ chars [ i - 1 ] . value !== 0x200d &&
144+ chars [ i + 1 ] . value !== 0x200d
96145 ) ) ;
97146 }
98147} ;
@@ -120,6 +169,7 @@ module.exports = {
120169
121170 messages : {
122171 surrogatePairWithoutUFlag : "Unexpected surrogate pair in character class. Use 'u' flag." ,
172+ surrogatePair : "Unexpected surrogate pair in character class." ,
123173 combiningClass : "Unexpected combined character in character class." ,
124174 emojiModifier : "Unexpected modified Emoji in character class." ,
125175 regionalIndicatorSymbol : "Unexpected national flag in character class." ,
0 commit comments