Skip to content

Commit 23d8678

Browse files
authored
Unrolled build for rust-lang#123752
Rollup merge of rust-lang#123752 - estebank:emoji-prefix, r=wesleywiser Properly handle emojis as literal prefix in macros Do not accept the following ```rust macro_rules! lexes {($($_:tt)*) => {}} lexes!(🐛"foo"); ``` Before, invalid emoji identifiers were gated during parsing instead of lexing in all cases, but this didn't account for macro pre-expansion of literal prefixes. Fix rust-lang#123696.
2 parents 13e63f7 + 19821ad commit 23d8678

File tree

6 files changed

+36
-6
lines changed

6 files changed

+36
-6
lines changed

compiler/rustc_lexer/src/lib.rs

+5-1
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,10 @@ pub enum TokenKind {
8888
/// tokens.
8989
UnknownPrefix,
9090

91+
/// Similar to the above, but *always* an error on every edition. This is used
92+
/// for emoji identifier recovery, as those are not meant to be ever accepted.
93+
InvalidPrefix,
94+
9195
/// Examples: `12u8`, `1.0e-40`, `b"123"`. Note that `_` is an invalid
9296
/// suffix, but may be present here on string and float literals. Users of
9397
/// this type will need to check for and reject that case.
@@ -528,7 +532,7 @@ impl Cursor<'_> {
528532
// Known prefixes must have been handled earlier. So if
529533
// we see a prefix here, it is definitely an unknown prefix.
530534
match self.first() {
531-
'#' | '"' | '\'' => UnknownPrefix,
535+
'#' | '"' | '\'' => InvalidPrefix,
532536
_ => InvalidIdent,
533537
}
534538
}

compiler/rustc_parse/src/lexer/mod.rs

+4-1
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,7 @@ impl<'psess, 'src> StringReader<'psess, 'src> {
204204
self.ident(start)
205205
}
206206
rustc_lexer::TokenKind::InvalidIdent
207+
| rustc_lexer::TokenKind::InvalidPrefix
207208
// Do not recover an identifier with emoji if the codepoint is a confusable
208209
// with a recoverable substitution token, like `➖`.
209210
if !UNICODE_ARRAY
@@ -301,7 +302,9 @@ impl<'psess, 'src> StringReader<'psess, 'src> {
301302
rustc_lexer::TokenKind::Caret => token::BinOp(token::Caret),
302303
rustc_lexer::TokenKind::Percent => token::BinOp(token::Percent),
303304

304-
rustc_lexer::TokenKind::Unknown | rustc_lexer::TokenKind::InvalidIdent => {
305+
rustc_lexer::TokenKind::Unknown
306+
| rustc_lexer::TokenKind::InvalidIdent
307+
| rustc_lexer::TokenKind::InvalidPrefix => {
305308
// Don't emit diagnostics for sequences of the same invalid token
306309
if swallow_next_invalid > 0 {
307310
swallow_next_invalid -= 1;

src/librustdoc/html/highlight.rs

+4-3
Original file line numberDiff line numberDiff line change
@@ -876,9 +876,10 @@ impl<'src> Classifier<'src> {
876876
},
877877
Some(c) => c,
878878
},
879-
TokenKind::RawIdent | TokenKind::UnknownPrefix | TokenKind::InvalidIdent => {
880-
Class::Ident(self.new_span(before, text))
881-
}
879+
TokenKind::RawIdent
880+
| TokenKind::UnknownPrefix
881+
| TokenKind::InvalidPrefix
882+
| TokenKind::InvalidIdent => Class::Ident(self.new_span(before, text)),
882883
TokenKind::Lifetime { .. } => Class::Lifetime,
883884
TokenKind::Eof => panic!("Eof in advance"),
884885
};

src/tools/rust-analyzer/crates/parser/src/lexed_str.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,7 @@ impl<'a> Converter<'a> {
178178
rustc_lexer::TokenKind::Ident => {
179179
SyntaxKind::from_keyword(token_text).unwrap_or(IDENT)
180180
}
181-
rustc_lexer::TokenKind::InvalidIdent => {
181+
rustc_lexer::TokenKind::InvalidPrefix | rustc_lexer::TokenKind::InvalidIdent => {
182182
err = "Ident contains invalid characters";
183183
IDENT
184184
}
+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
macro_rules! lexes {($($_:tt)*) => {}}
2+
3+
lexes!(🐛#); //~ ERROR identifiers cannot contain emoji
4+
lexes!(🐛"foo");
5+
lexes!(🐛'q');
6+
lexes!(🐛'q);
7+
8+
fn main() {}
+14
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
error: identifiers cannot contain emoji: `🐛`
2+
--> $DIR/emoji-literal-prefix.rs:3:8
3+
|
4+
LL | lexes!(🐛#);
5+
| ^^
6+
LL | lexes!(🐛"foo");
7+
| ^^
8+
LL | lexes!(🐛'q');
9+
| ^^
10+
LL | lexes!(🐛'q);
11+
| ^^
12+
13+
error: aborting due to 1 previous error
14+

0 commit comments

Comments
 (0)