Skip to content

Commit 32e1f87

Browse files
authored
Unrolled build for rust-lang#128865
Rollup merge of rust-lang#128865 - jieyouxu:unicurd, r=Urgau Ensure let stmt compound assignment removal suggestion respect codepoint boundaries Previously we would try to issue a suggestion for `let x <op>= 1`, i.e. a compound assignment within a `let` binding, to remove the `<op>`. The suggestion code unfortunately incorrectly assumed that the `<op>` is an exactly-1-byte ASCII character, but this assumption is incorrect because we also recover Unicode-confusables like `➖=` as `-=`. In this example, the suggestion code used a `+ BytePos(1)` to calculate the span of the `<op>` codepoint that looks like `-` but the mult-byte Unicode look-alike would cause the suggested removal span to be inside a multi-byte codepoint boundary, triggering a codepoint boundary assertion. The fix is to use `SourceMap::start_point(token_span)` which properly accounts for codepoint boundaries. Fixes rust-lang#128845. cc rust-lang#128790 r? ````@fmease````
2 parents 899eb03 + d65f131 commit 32e1f87

File tree

3 files changed

+48
-2
lines changed

3 files changed

+48
-2
lines changed

compiler/rustc_parse/src/parser/stmt.rs

+6-2
Original file line numberDiff line numberDiff line change
@@ -408,10 +408,14 @@ impl<'a> Parser<'a> {
408408
fn parse_initializer(&mut self, eq_optional: bool) -> PResult<'a, Option<P<Expr>>> {
409409
let eq_consumed = match self.token.kind {
410410
token::BinOpEq(..) => {
411-
// Recover `let x <op>= 1` as `let x = 1`
411+
// Recover `let x <op>= 1` as `let x = 1` We must not use `+ BytePos(1)` here
412+
// because `<op>` can be a multi-byte lookalike that was recovered, e.g. `➖=` (the
413+
// `➖` is a U+2796 Heavy Minus Sign Unicode Character) that was recovered as a
414+
// `-=`.
415+
let extra_op_span = self.psess.source_map().start_point(self.token.span);
412416
self.dcx().emit_err(errors::CompoundAssignmentExpressionInLet {
413417
span: self.token.span,
414-
suggestion: self.token.span.with_hi(self.token.span.lo() + BytePos(1)),
418+
suggestion: extra_op_span,
415419
});
416420
self.bump();
417421
true
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
//! Previously we would try to issue a suggestion for `let x <op>= 1`, i.e. a compound assignment
2+
//! within a `let` binding, to remove the `<op>`. The suggestion code unfortunately incorrectly
3+
//! assumed that the `<op>` is an exactly-1-byte ASCII character, but this assumption is incorrect
4+
//! because we also recover Unicode-confusables like `➖=` as `-=`. In this example, the suggestion
5+
//! code used a `+ BytePos(1)` to calculate the span of the `<op>` codepoint that looks like `-` but
6+
//! the mult-byte Unicode look-alike would cause the suggested removal span to be inside a
7+
//! multi-byte codepoint boundary, triggering a codepoint boundary assertion.
8+
//!
9+
//! issue: rust-lang/rust#128845
10+
11+
fn main() {
12+
// Adapted from #128845 but with irrelevant components removed and simplified.
13+
let x ➖= 1;
14+
//~^ ERROR unknown start of token: \u{2796}
15+
//~| ERROR: can't reassign to an uninitialized variable
16+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
error: unknown start of token: \u{2796}
2+
--> $DIR/suggest-remove-compount-assign-let-ice.rs:13:11
3+
|
4+
LL | let x ➖= 1;
5+
| ^^
6+
|
7+
help: Unicode character '➖' (Heavy Minus Sign) looks like '-' (Minus/Hyphen), but it is not
8+
|
9+
LL | let x -= 1;
10+
| ~
11+
12+
error: can't reassign to an uninitialized variable
13+
--> $DIR/suggest-remove-compount-assign-let-ice.rs:13:11
14+
|
15+
LL | let x ➖= 1;
16+
| ^^^
17+
|
18+
= help: if you meant to overwrite, remove the `let` binding
19+
help: initialize the variable
20+
|
21+
LL - let x ➖= 1;
22+
LL + let x = 1;
23+
|
24+
25+
error: aborting due to 2 previous errors
26+

0 commit comments

Comments
 (0)