Skip to content

Commit 3c05b81

Browse files
Dan Rubelcommit-bot@chromium.org
authored andcommitted
move identifier recovery into scanner
Change-Id: I1e4f4fa900b72e4fd405ae8dc86dfa9bc13f370f Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/110183 Commit-Queue: Dan Rubel <[email protected]> Reviewed-by: Brian Wilkerson <[email protected]>
1 parent c62bd8f commit 3c05b81

13 files changed

+133
-137
lines changed

pkg/front_end/lib/src/fasta/scanner/abstract_scanner.dart

Lines changed: 40 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ import '../../scanner/token.dart' as analyzer show StringToken;
1515

1616
import '../fasta_codes.dart'
1717
show
18-
Message,
1918
messageExpectedHexDigit,
2019
messageMissingExponent,
2120
messageUnexpectedDollarInString,
@@ -28,14 +27,16 @@ import '../util/link.dart' show Link;
2827

2928
import 'error_token.dart'
3029
show
30+
NonAsciiIdentifierToken,
3131
UnmatchedToken,
3232
UnsupportedOperator,
3333
UnterminatedString,
3434
UnterminatedToken;
3535

3636
import 'keyword_state.dart' show KeywordState;
3737

38-
import 'token.dart' show CommentToken, DartDocToken, LanguageVersionToken;
38+
import 'token.dart'
39+
show CommentToken, DartDocToken, LanguageVersionToken, StringToken;
3940

4041
import 'token_constants.dart';
4142

@@ -421,7 +422,10 @@ abstract class AbstractScanner implements Scanner {
421422
}
422423

423424
/// Append [token] to the token stream.
425+
/// DEPRECATED: Use prependErrorToken instead.
424426
void appendErrorToken(ErrorToken token) {
427+
// TODO(danrubel): Update scanner to use prependErrorToken everywhere
428+
// then remove this method
425429
hasErrors = true;
426430
appendToken(token);
427431
}
@@ -1497,11 +1501,7 @@ abstract class AbstractScanner implements Scanner {
14971501
*/
14981502
int tokenizeIdentifier(int next, int start, bool allowDollar) {
14991503
while (true) {
1500-
if (($a <= next && next <= $z) ||
1501-
($A <= next && next <= $Z) ||
1502-
($0 <= next && next <= $9) ||
1503-
identical(next, $_) ||
1504-
(identical(next, $$) && allowDollar)) {
1504+
if (_isIdentifierChar(next, allowDollar)) {
15051505
next = advance();
15061506
} else {
15071507
// Identifier ends here.
@@ -1751,8 +1751,31 @@ abstract class AbstractScanner implements Scanner {
17511751
}
17521752

17531753
int unexpected(int character) {
1754-
appendErrorToken(buildUnexpectedCharacterToken(character, tokenStart));
1755-
return advanceAfterError(true);
1754+
var errorToken = buildUnexpectedCharacterToken(character, tokenStart);
1755+
if (errorToken is NonAsciiIdentifierToken) {
1756+
int charOffset;
1757+
List<int> codeUnits = <int>[];
1758+
if (tail.type == TokenType.IDENTIFIER && tail.charEnd == tokenStart) {
1759+
charOffset = tail.charOffset;
1760+
codeUnits.addAll(tail.lexeme.codeUnits);
1761+
tail = tail.previous;
1762+
} else {
1763+
charOffset = errorToken.charOffset;
1764+
}
1765+
codeUnits.add(errorToken.character);
1766+
prependErrorToken(errorToken);
1767+
int next = advanceAfterError(true);
1768+
while (_isIdentifierChar(next, true)) {
1769+
codeUnits.add(next);
1770+
next = advance();
1771+
}
1772+
appendToken(StringToken.fromString(TokenType.IDENTIFIER,
1773+
new String.fromCharCodes(codeUnits), charOffset));
1774+
return next;
1775+
} else {
1776+
prependErrorToken(errorToken);
1777+
return advanceAfterError(true);
1778+
}
17561779
}
17571780

17581781
void unterminatedString(int quoteChar, int quoteStart, int start,
@@ -1891,3 +1914,11 @@ class ScannerConfiguration {
18911914
this.enableNonNullable = enableNonNullable ?? false,
18921915
this.enableTripleShift = enableTripleShift ?? false;
18931916
}
1917+
1918+
bool _isIdentifierChar(int next, bool allowDollar) {
1919+
return ($a <= next && next <= $z) ||
1920+
($A <= next && next <= $Z) ||
1921+
($0 <= next && next <= $9) ||
1922+
identical(next, $_) ||
1923+
(identical(next, $$) && allowDollar);
1924+
}

pkg/front_end/lib/src/fasta/scanner/recover.dart

Lines changed: 2 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -55,74 +55,8 @@ Token scannerRecovery(List<int> bytes, Token tokens, List<int> lineStarts) {
5555
/// Used for appending to [good].
5656
Token goodTail;
5757

58-
/// The previous token appended to [good]. Since tokens are single linked
59-
/// lists, this allows us to rewrite the current token without scanning all
60-
/// of [good]. This is supposed to be the token immediately before
61-
/// [goodTail], that is, `beforeGoodTail.next == goodTail`.
62-
Token beforeGoodTail;
63-
6458
recoverIdentifier(NonAsciiIdentifierToken first) {
65-
List<int> codeUnits = <int>[];
66-
67-
// True if the previous good token is an identifier and ends right where
68-
// [first] starts. This is the case for input like `blåbærgrød`. In this
69-
// case, the scanner produces this sequence of tokens:
70-
//
71-
// [
72-
// StringToken("bl"),
73-
// NonAsciiIdentifierToken("å"),
74-
// StringToken("b"),
75-
// NonAsciiIdentifierToken("æ"),
76-
// StringToken("rgr"),
77-
// NonAsciiIdentifierToken("ø"),
78-
// StringToken("d"),
79-
// EOF,
80-
// ]
81-
bool prepend = false;
82-
83-
// True if following token is also an identifier that starts right where
84-
// [errorTail] ends. This is the case for "b" above.
85-
bool append = false;
86-
if (goodTail != null) {
87-
if (goodTail.type == TokenType.IDENTIFIER &&
88-
goodTail.charEnd == first.charOffset) {
89-
prepend = true;
90-
}
91-
}
92-
Token next = errorTail.next;
93-
if (next.type == TokenType.IDENTIFIER &&
94-
errorTail.charOffset + 1 == next.charOffset) {
95-
append = true;
96-
}
97-
if (prepend) {
98-
codeUnits.addAll(goodTail.lexeme.codeUnits);
99-
}
100-
NonAsciiIdentifierToken current = first;
101-
while (current != errorTail) {
102-
codeUnits.add(current.character);
103-
current = current.next;
104-
}
105-
codeUnits.add(errorTail.character);
106-
int charOffset = first.charOffset;
107-
if (prepend) {
108-
charOffset = goodTail.charOffset;
109-
if (beforeGoodTail == null) {
110-
// We're prepending the first good token, so the new token will become
111-
// the first good token.
112-
good = null;
113-
goodTail = null;
114-
beforeGoodTail = null;
115-
} else {
116-
goodTail = beforeGoodTail;
117-
}
118-
}
119-
if (append) {
120-
codeUnits.addAll(next.lexeme.codeUnits);
121-
next = next.next;
122-
}
123-
String value = new String.fromCharCodes(codeUnits);
124-
return synthesizeToken(charOffset, value, TokenType.IDENTIFIER)
125-
..setNext(next);
59+
throw "Internal error: Identifier error token should have been prepended";
12660
}
12761

12862
recoverExponent() {
@@ -153,13 +87,7 @@ Token scannerRecovery(List<int> bytes, Token tokens, List<int> lineStarts) {
15387

15488
// All unmatched error tokens should have been prepended
15589
Token current = tokens;
156-
while (current is ErrorToken &&
157-
(current.errorCode == codeExpectedHexDigit ||
158-
current.errorCode == codeUnexpectedDollarInString ||
159-
current.errorCode == codeMissingExponent ||
160-
current.errorCode == codeUnmatchedToken ||
161-
current.errorCode == codeUnterminatedComment ||
162-
current.errorCode == codeUnterminatedString)) {
90+
while (current is ErrorToken) {
16391
if (errorTail == null) {
16492
error = current;
16593
}
@@ -217,7 +145,6 @@ Token scannerRecovery(List<int> bytes, Token tokens, List<int> lineStarts) {
217145
} else {
218146
goodTail.setNext(current);
219147
}
220-
beforeGoodTail = goodTail;
221148
goodTail = current;
222149
}
223150

pkg/front_end/test/scanner_test.dart

Lines changed: 41 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -316,12 +316,50 @@ abstract class ScannerTestBase {
316316
}
317317

318318
void test_illegalChar_cyrillicLetter_middle() {
319-
_assertError(
320-
ScannerErrorCode.ILLEGAL_CHARACTER, 5, "Shche\u0433lov", [0x433]);
319+
final identifier = "Shche\u0433lov";
320+
final token = _assertError(
321+
ScannerErrorCode.ILLEGAL_CHARACTER, 5, identifier, [0x433]);
322+
expect(token.type, TokenType.IDENTIFIER);
323+
expect(token.lexeme, identifier);
324+
}
325+
326+
void test_illegalChar_cyrillicLetter_multiple() {
327+
ErrorListener listener = new ErrorListener();
328+
var tokens = scanWithListener("a = Shche\u0433lov\u0429x;", listener);
329+
listener.assertErrors([
330+
new TestError(9, ScannerErrorCode.ILLEGAL_CHARACTER, [0x433]),
331+
new TestError(13, ScannerErrorCode.ILLEGAL_CHARACTER, [0x429]),
332+
]);
333+
var token = tokens;
334+
expect(token.lexeme, 'a');
335+
token = token.next;
336+
expect(token.lexeme, '=');
337+
token = token.next;
338+
expect(token.type, TokenType.IDENTIFIER);
339+
expect(token.lexeme, "Shche\u0433lov\u0429x");
340+
token = token.next;
341+
expect(token.lexeme, ';');
321342
}
322343

323344
void test_illegalChar_cyrillicLetter_start() {
324-
_assertError(ScannerErrorCode.ILLEGAL_CHARACTER, 0, "\u0429", [0x429]);
345+
final identifier = "\u0429";
346+
final token = _assertError(
347+
ScannerErrorCode.ILLEGAL_CHARACTER, 0, identifier, [0x429]);
348+
expect(token.type, TokenType.IDENTIFIER);
349+
expect(token.lexeme, identifier);
350+
}
351+
352+
void test_illegalChar_cyrillicLetter_start_expression() {
353+
var token = _assertError(
354+
ScannerErrorCode.ILLEGAL_CHARACTER, 4, 'a = \u0429;', [0x429]);
355+
expect(token.lexeme, 'a');
356+
token = token.next;
357+
expect(token.lexeme, '=');
358+
token = token.next;
359+
expect(token.type, TokenType.IDENTIFIER);
360+
expect(token.lexeme, "\u0429");
361+
token = token.next;
362+
expect(token.lexeme, ';');
325363
}
326364

327365
void test_illegalChar_nbsp() {

pkg/front_end/testcases/regress/issue_29976.dart.legacy.expect

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,11 @@ library;
22
//
33
// Problems in library:
44
//
5+
// pkg/front_end/testcases/regress/issue_29976.dart:9:14: Error: The non-ASCII character 'é' (U+00E9) can't be used in identifiers, only in strings and comments.
6+
// Try using an US-ASCII letter, a digit, '_' (an underscore), or '$' (a dollar sign).
7+
// "x${x*"'"é'}x
8+
// ^
9+
//
510
// pkg/front_end/testcases/regress/issue_29976.dart:9:15: Error: String starting with ' must end with '.
611
// "x${x*"'"é'}x
712
// ^^^
@@ -14,11 +19,6 @@ library;
1419
// "x${x*"'"é'}x
1520
// ^^^^^^^^^^^^^^...
1621
//
17-
// pkg/front_end/testcases/regress/issue_29976.dart:9:14: Error: The non-ASCII character 'é' (U+00E9) can't be used in identifiers, only in strings and comments.
18-
// Try using an US-ASCII letter, a digit, '_' (an underscore), or '$' (a dollar sign).
19-
// "x${x*"'"é'}x
20-
// ^
21-
//
2222
// pkg/front_end/testcases/regress/issue_29976.dart:12:1: Error: Expected a declaration, but got ''.
2323
//
2424
// pkg/front_end/testcases/regress/issue_29976.dart:9:14: Error: Expected '}' before this.

pkg/front_end/testcases/regress/issue_29976.dart.legacy.transformed.expect

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,11 @@ library;
22
//
33
// Problems in library:
44
//
5+
// pkg/front_end/testcases/regress/issue_29976.dart:9:14: Error: The non-ASCII character 'é' (U+00E9) can't be used in identifiers, only in strings and comments.
6+
// Try using an US-ASCII letter, a digit, '_' (an underscore), or '$' (a dollar sign).
7+
// "x${x*"'"é'}x
8+
// ^
9+
//
510
// pkg/front_end/testcases/regress/issue_29976.dart:9:15: Error: String starting with ' must end with '.
611
// "x${x*"'"é'}x
712
// ^^^
@@ -14,11 +19,6 @@ library;
1419
// "x${x*"'"é'}x
1520
// ^^^^^^^^^^^^^^...
1621
//
17-
// pkg/front_end/testcases/regress/issue_29976.dart:9:14: Error: The non-ASCII character 'é' (U+00E9) can't be used in identifiers, only in strings and comments.
18-
// Try using an US-ASCII letter, a digit, '_' (an underscore), or '$' (a dollar sign).
19-
// "x${x*"'"é'}x
20-
// ^
21-
//
2222
// pkg/front_end/testcases/regress/issue_29976.dart:12:1: Error: Expected a declaration, but got ''.
2323
//
2424
// pkg/front_end/testcases/regress/issue_29976.dart:9:14: Error: Expected '}' before this.

pkg/front_end/testcases/regress/issue_29976.dart.outline.expect

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,11 @@ library;
22
//
33
// Problems in library:
44
//
5+
// pkg/front_end/testcases/regress/issue_29976.dart:9:14: Error: The non-ASCII character 'é' (U+00E9) can't be used in identifiers, only in strings and comments.
6+
// Try using an US-ASCII letter, a digit, '_' (an underscore), or '$' (a dollar sign).
7+
// "x${x*"'"é'}x
8+
// ^
9+
//
510
// pkg/front_end/testcases/regress/issue_29976.dart:9:15: Error: String starting with ' must end with '.
611
// "x${x*"'"é'}x
712
// ^^^
@@ -14,11 +19,6 @@ library;
1419
// "x${x*"'"é'}x
1520
// ^^^^^^^^^^^^^^...
1621
//
17-
// pkg/front_end/testcases/regress/issue_29976.dart:9:14: Error: The non-ASCII character 'é' (U+00E9) can't be used in identifiers, only in strings and comments.
18-
// Try using an US-ASCII letter, a digit, '_' (an underscore), or '$' (a dollar sign).
19-
// "x${x*"'"é'}x
20-
// ^
21-
//
2222
// pkg/front_end/testcases/regress/issue_29976.dart:12:1: Error: Expected a declaration, but got ''.
2323
//
2424
import self as self;

pkg/front_end/testcases/regress/issue_29976.dart.strong.expect

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,11 @@ library;
22
//
33
// Problems in library:
44
//
5+
// pkg/front_end/testcases/regress/issue_29976.dart:9:14: Error: The non-ASCII character 'é' (U+00E9) can't be used in identifiers, only in strings and comments.
6+
// Try using an US-ASCII letter, a digit, '_' (an underscore), or '$' (a dollar sign).
7+
// "x${x*"'"é'}x
8+
// ^
9+
//
510
// pkg/front_end/testcases/regress/issue_29976.dart:9:15: Error: String starting with ' must end with '.
611
// "x${x*"'"é'}x
712
// ^^^
@@ -14,11 +19,6 @@ library;
1419
// "x${x*"'"é'}x
1520
// ^^^^^^^^^^^^^^...
1621
//
17-
// pkg/front_end/testcases/regress/issue_29976.dart:9:14: Error: The non-ASCII character 'é' (U+00E9) can't be used in identifiers, only in strings and comments.
18-
// Try using an US-ASCII letter, a digit, '_' (an underscore), or '$' (a dollar sign).
19-
// "x${x*"'"é'}x
20-
// ^
21-
//
2222
// pkg/front_end/testcases/regress/issue_29976.dart:12:1: Error: Expected a declaration, but got ''.
2323
//
2424
// pkg/front_end/testcases/regress/issue_29976.dart:9:14: Error: Expected '}' before this.

pkg/front_end/testcases/regress/issue_29976.dart.strong.transformed.expect

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,11 @@ library;
22
//
33
// Problems in library:
44
//
5+
// pkg/front_end/testcases/regress/issue_29976.dart:9:14: Error: The non-ASCII character 'é' (U+00E9) can't be used in identifiers, only in strings and comments.
6+
// Try using an US-ASCII letter, a digit, '_' (an underscore), or '$' (a dollar sign).
7+
// "x${x*"'"é'}x
8+
// ^
9+
//
510
// pkg/front_end/testcases/regress/issue_29976.dart:9:15: Error: String starting with ' must end with '.
611
// "x${x*"'"é'}x
712
// ^^^
@@ -14,11 +19,6 @@ library;
1419
// "x${x*"'"é'}x
1520
// ^^^^^^^^^^^^^^...
1621
//
17-
// pkg/front_end/testcases/regress/issue_29976.dart:9:14: Error: The non-ASCII character 'é' (U+00E9) can't be used in identifiers, only in strings and comments.
18-
// Try using an US-ASCII letter, a digit, '_' (an underscore), or '$' (a dollar sign).
19-
// "x${x*"'"é'}x
20-
// ^
21-
//
2222
// pkg/front_end/testcases/regress/issue_29976.dart:12:1: Error: Expected a declaration, but got ''.
2323
//
2424
// pkg/front_end/testcases/regress/issue_29976.dart:9:14: Error: Expected '}' before this.

pkg/front_end/testcases/regress/issue_29982.dart.legacy.expect

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,11 @@ library;
22
//
33
// Problems in library:
44
//
5+
// pkg/front_end/testcases/regress/issue_29982.dart:7:15: Error: The non-ASCII character 'é' (U+00E9) can't be used in identifiers, only in strings and comments.
6+
// Try using an US-ASCII letter, a digit, '_' (an underscore), or '$' (a dollar sign).
7+
// print('${eh[éh']}');
8+
// ^
9+
//
510
// pkg/front_end/testcases/regress/issue_29982.dart:7:14: Error: Can't find ']' to match '['.
611
// print('${eh[éh']}');
712
// ^
@@ -14,11 +19,6 @@ library;
1419
// print('${eh[éh']}');
1520
// ^^^^^^^^^^^^^...
1621
//
17-
// pkg/front_end/testcases/regress/issue_29982.dart:7:15: Error: The non-ASCII character 'é' (U+00E9) can't be used in identifiers, only in strings and comments.
18-
// Try using an US-ASCII letter, a digit, '_' (an underscore), or '$' (a dollar sign).
19-
// print('${eh[éh']}');
20-
// ^
21-
//
2222
// pkg/front_end/testcases/regress/issue_29982.dart:9:1: Error: Expected a declaration, but got ''.
2323
//
2424
// pkg/front_end/testcases/regress/issue_29982.dart:7:17: Error: Expected ']' before this.

0 commit comments

Comments
 (0)