Skip to content

Commit d5ed922

Browse files
committed
Auto merge of #123752 - estebank:emoji-prefix, r=<try>
Properly handle emojis as literal prefix in macros Do not accept the following ```rust macro_rules! lexes {($($_:tt)*) => {}} lexes!(🐛"foo"); ``` Before, invalid emoji identifiers were gated during parsing instead of lexing in all cases, but this didn't account for macro expansion of literal prefixes. Fix #123696.
2 parents b3bd705 + 92debb1 commit d5ed922

File tree

4 files changed

+31
-2
lines changed

4 files changed

+31
-2
lines changed

compiler/rustc_lexer/src/lib.rs

+5-1
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,10 @@ pub enum TokenKind {
8888
/// tokens.
8989
UnknownPrefix,
9090

91+
/// Similar to the above, but *always* an error on every edition. This is used
92+
/// for emoji identifier recovery, as those are not meant to be ever accepted.
93+
InvalidPrefix,
94+
9195
/// Examples: `12u8`, `1.0e-40`, `b"123"`. Note that `_` is an invalid
9296
/// suffix, but may be present here on string and float literals. Users of
9397
/// this type will need to check for and reject that case.
@@ -528,7 +532,7 @@ impl Cursor<'_> {
528532
// Known prefixes must have been handled earlier. So if
529533
// we see a prefix here, it is definitely an unknown prefix.
530534
match self.first() {
531-
'#' | '"' | '\'' => UnknownPrefix,
535+
'#' | '"' | '\'' => InvalidPrefix,
532536
_ => InvalidIdent,
533537
}
534538
}

compiler/rustc_parse/src/lexer/mod.rs

+4-1
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,7 @@ impl<'psess, 'src> StringReader<'psess, 'src> {
204204
self.ident(start)
205205
}
206206
rustc_lexer::TokenKind::InvalidIdent
207+
| rustc_lexer::TokenKind::InvalidPrefix
207208
// Do not recover an identifier with emoji if the codepoint is a confusable
208209
// with a recoverable substitution token, like `➖`.
209210
if !UNICODE_ARRAY
@@ -301,7 +302,9 @@ impl<'psess, 'src> StringReader<'psess, 'src> {
301302
rustc_lexer::TokenKind::Caret => token::BinOp(token::Caret),
302303
rustc_lexer::TokenKind::Percent => token::BinOp(token::Percent),
303304

304-
rustc_lexer::TokenKind::Unknown | rustc_lexer::TokenKind::InvalidIdent => {
305+
rustc_lexer::TokenKind::Unknown
306+
| rustc_lexer::TokenKind::InvalidIdent
307+
| rustc_lexer::TokenKind::InvalidPrefix => {
305308
// Don't emit diagnostics for sequences of the same invalid token
306309
if swallow_next_invalid > 0 {
307310
swallow_next_invalid -= 1;
+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
macro_rules! lexes {($($_:tt)*) => {}}
2+
3+
lexes!(🐛#); //~ ERROR identifiers cannot contain emoji
4+
lexes!(🐛"foo");
5+
lexes!(🐛'q');
6+
lexes!(🐛'q);
7+
8+
fn main() {}
+14
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
error: identifiers cannot contain emoji: `🐛`
2+
--> $DIR/emoji-literal-prefix.rs:3:8
3+
|
4+
LL | lexes!(🐛#);
5+
| ^^
6+
LL | lexes!(🐛"foo");
7+
| ^^
8+
LL | lexes!(🐛'q');
9+
| ^^
10+
LL | lexes!(🐛'q);
11+
| ^^
12+
13+
error: aborting due to 1 previous error
14+

0 commit comments

Comments
 (0)