Skip to content

Commit 6d05f12

Browse files
committed
Auto merge of #129346 - nnethercote:fix-double-handling-in-collect_tokens, r=petrochenkov
Fix double handling in `collect_tokens` Double handling of AST nodes can occur in `collect_tokens`. This is when an inner call to `collect_tokens` produces an AST node, and then an outer call to `collect_tokens` produces the same AST node. This can happen in a few places, e.g. expression statements where the statement delegates `HasTokens` and `HasAttrs` to the expression. It will also happen more after #124141. This PR fixes some double handling cases that cause problems, including #129166. r? `@petrochenkov`
2 parents 7f4b270 + d4bf28c commit 6d05f12

File tree

10 files changed

+196
-174
lines changed

10 files changed

+196
-174
lines changed

Cargo.lock

+1
Original file line numberDiff line numberDiff line change
@@ -4167,6 +4167,7 @@ dependencies = [
41674167
"rustc_errors",
41684168
"rustc_feature",
41694169
"rustc_fluent_macro",
4170+
"rustc_index",
41704171
"rustc_lexer",
41714172
"rustc_macros",
41724173
"rustc_session",

compiler/rustc_index/src/interval.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ mod tests;
1818
#[derive(Debug, Clone)]
1919
pub struct IntervalSet<I> {
2020
// Start, end
21-
map: SmallVec<[(u32, u32); 4]>,
21+
map: SmallVec<[(u32, u32); 2]>,
2222
domain: usize,
2323
_data: PhantomData<I>,
2424
}

compiler/rustc_parse/Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ rustc_data_structures = { path = "../rustc_data_structures" }
1212
rustc_errors = { path = "../rustc_errors" }
1313
rustc_feature = { path = "../rustc_feature" }
1414
rustc_fluent_macro = { path = "../rustc_fluent_macro" }
15+
rustc_index = { path = "../rustc_index" }
1516
rustc_lexer = { path = "../rustc_lexer" }
1617
rustc_macros = { path = "../rustc_macros" }
1718
rustc_session = { path = "../rustc_session" }

compiler/rustc_parse/src/parser/attr_wrapper.rs

+79-54
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
use std::borrow::Cow;
12
use std::{iter, mem};
23

34
use rustc_ast::token::{Delimiter, Token, TokenKind};
@@ -6,6 +7,7 @@ use rustc_ast::tokenstream::{
67
Spacing, ToAttrTokenStream,
78
};
89
use rustc_ast::{self as ast, AttrVec, Attribute, HasAttrs, HasTokens};
10+
use rustc_data_structures::fx::FxHashSet;
911
use rustc_errors::PResult;
1012
use rustc_session::parse::ParseSess;
1113
use rustc_span::{sym, Span, DUMMY_SP};
@@ -134,30 +136,17 @@ impl ToAttrTokenStream for LazyAttrTokenStreamImpl {
134136
node_replacements.array_windows()
135137
{
136138
assert!(
137-
node_range.0.end <= next_node_range.0.start
138-
|| node_range.0.end >= next_node_range.0.end,
139-
"Node ranges should be disjoint or nested: ({:?}, {:?}) ({:?}, {:?})",
139+
node_range.0.end <= next_node_range.0.start,
140+
"Node ranges should be disjoint: ({:?}, {:?}) ({:?}, {:?})",
140141
node_range,
141142
tokens,
142143
next_node_range,
143144
next_tokens,
144145
);
145146
}
146147

147-
// Process the replace ranges, starting from the highest start
148-
// position and working our way back. If have tokens like:
149-
//
150-
// `#[cfg(FALSE)] struct Foo { #[cfg(FALSE)] field: bool }`
151-
//
152-
// Then we will generate replace ranges for both
153-
// the `#[cfg(FALSE)] field: bool` and the entire
154-
// `#[cfg(FALSE)] struct Foo { #[cfg(FALSE)] field: bool }`
155-
//
156-
// By starting processing from the replace range with the greatest
157-
// start position, we ensure that any (outer) replace range which
158-
// encloses another (inner) replace range will fully overwrite the
159-
// inner range's replacement.
160-
for (node_range, target) in node_replacements.into_iter().rev() {
148+
// Process the replace ranges.
149+
for (node_range, target) in node_replacements.into_iter() {
161150
assert!(
162151
!node_range.0.is_empty(),
163152
"Cannot replace an empty node range: {:?}",
@@ -234,6 +223,8 @@ impl<'a> Parser<'a> {
234223
force_collect: ForceCollect,
235224
f: impl FnOnce(&mut Self, AttrVec) -> PResult<'a, (R, Trailing, UsePreAttrPos)>,
236225
) -> PResult<'a, R> {
226+
let possible_capture_mode = self.capture_cfg;
227+
237228
// We must collect if anything could observe the collected tokens, i.e.
238229
// if any of the following conditions hold.
239230
// - We are force collecting tokens (because force collection requires
@@ -244,9 +235,9 @@ impl<'a> Parser<'a> {
244235
// - Our target supports custom inner attributes (custom
245236
// inner attribute invocation might require token capturing).
246237
|| R::SUPPORTS_CUSTOM_INNER_ATTRS
247-
// - We are in `capture_cfg` mode (which requires tokens if
238+
// - We are in "possible capture mode" (which requires tokens if
248239
// the parsed node has `#[cfg]` or `#[cfg_attr]` attributes).
249-
|| self.capture_cfg;
240+
|| possible_capture_mode;
250241
if !needs_collection {
251242
return Ok(f(self, attrs.attrs)?.0);
252243
}
@@ -267,18 +258,48 @@ impl<'a> Parser<'a> {
267258
res?
268259
};
269260

270-
// When we're not in `capture_cfg` mode, then skip collecting and
271-
// return early if either of the following conditions hold.
272261
// - `None`: Our target doesn't support tokens at all (e.g. `NtIdent`).
262+
// - `Some(None)`: Our target supports tokens and has none.
273263
// - `Some(Some(_))`: Our target already has tokens set (e.g. we've
274-
// parsed something like `#[my_attr] $item`). The actual parsing code
275-
// takes care of prepending any attributes to the nonterminal, so we
276-
// don't need to modify the already captured tokens.
264+
// parsed something like `#[my_attr] $item`).
265+
let ret_can_hold_tokens = matches!(ret.tokens_mut(), Some(None));
266+
267+
// Ignore any attributes we've previously processed. This happens when
268+
// an inner call to `collect_tokens` returns an AST node and then an
269+
// outer call ends up with the same AST node without any additional
270+
// wrapping layer.
271+
let mut seen_indices = FxHashSet::default();
272+
for (i, attr) in ret.attrs().iter().enumerate() {
273+
let is_unseen = self.capture_state.seen_attrs.insert(attr.id);
274+
if !is_unseen {
275+
seen_indices.insert(i);
276+
}
277+
}
278+
let ret_attrs: Cow<'_, [Attribute]> =
279+
if seen_indices.is_empty() {
280+
Cow::Borrowed(ret.attrs())
281+
} else {
282+
let ret_attrs =
283+
ret.attrs()
284+
.iter()
285+
.enumerate()
286+
.filter_map(|(i, attr)| {
287+
if seen_indices.contains(&i) { None } else { Some(attr.clone()) }
288+
})
289+
.collect();
290+
Cow::Owned(ret_attrs)
291+
};
292+
293+
// When we're not in "definite capture mode", then skip collecting and
294+
// return early if `ret` doesn't support tokens or already has some.
277295
//
278296
// Note that this check is independent of `force_collect`. There's no
279297
// need to collect tokens when we don't support tokens or already have
280298
// tokens.
281-
if !self.capture_cfg && matches!(ret.tokens_mut(), None | Some(Some(_))) {
299+
let definite_capture_mode = self.capture_cfg
300+
&& matches!(self.capture_state.capturing, Capturing::Yes)
301+
&& has_cfg_or_cfg_attr(&ret_attrs);
302+
if !definite_capture_mode && !ret_can_hold_tokens {
282303
return Ok(ret);
283304
}
284305

@@ -297,12 +318,12 @@ impl<'a> Parser<'a> {
297318
// outer and inner attributes. So this check is more precise than
298319
// the earlier `needs_tokens` check, and we don't need to
299320
// check `R::SUPPORTS_CUSTOM_INNER_ATTRS`.)
300-
|| needs_tokens(ret.attrs())
301-
// - We are in `capture_cfg` mode and there are `#[cfg]` or
302-
// `#[cfg_attr]` attributes. (During normal non-`capture_cfg`
303-
// parsing, we don't need any special capturing for those
304-
// attributes, because they're builtin.)
305-
|| (self.capture_cfg && has_cfg_or_cfg_attr(ret.attrs()));
321+
|| needs_tokens(&ret_attrs)
322+
// - We are in "definite capture mode", which requires that there
323+
// are `#[cfg]` or `#[cfg_attr]` attributes. (During normal
324+
// non-`capture_cfg` parsing, we don't need any special capturing
325+
// for those attributes, because they're builtin.)
326+
|| definite_capture_mode;
306327
if !needs_collection {
307328
return Ok(ret);
308329
}
@@ -336,7 +357,7 @@ impl<'a> Parser<'a> {
336357
// `Parser::parse_inner_attributes`, and pair them in a `ParserReplacement` with `None`,
337358
// which means the relevant tokens will be removed. (More details below.)
338359
let mut inner_attr_parser_replacements = Vec::new();
339-
for attr in ret.attrs() {
360+
for attr in ret_attrs.iter() {
340361
if attr.style == ast::AttrStyle::Inner {
341362
if let Some(inner_attr_parser_range) =
342363
self.capture_state.inner_attr_parser_ranges.remove(&attr.id)
@@ -359,11 +380,10 @@ impl<'a> Parser<'a> {
359380
// from `ParserRange` form to `NodeRange` form. We will perform the actual
360381
// replacement only when we convert the `LazyAttrTokenStream` to an
361382
// `AttrTokenStream`.
362-
self.capture_state.parser_replacements
363-
[parser_replacements_start..parser_replacements_end]
364-
.iter()
365-
.cloned()
366-
.chain(inner_attr_parser_replacements.iter().cloned())
383+
self.capture_state
384+
.parser_replacements
385+
.drain(parser_replacements_start..parser_replacements_end)
386+
.chain(inner_attr_parser_replacements.into_iter())
367387
.map(|(parser_range, data)| {
368388
(NodeRange::new(parser_range, collect_pos.start_pos), data)
369389
})
@@ -399,20 +419,12 @@ impl<'a> Parser<'a> {
399419
break_last_token: self.break_last_token,
400420
node_replacements,
401421
});
422+
let mut tokens_used = false;
402423

403-
// If we support tokens and don't already have them, store the newly captured tokens.
404-
if let Some(target_tokens @ None) = ret.tokens_mut() {
405-
*target_tokens = Some(tokens.clone());
406-
}
407-
408-
// If `capture_cfg` is set and we're inside a recursive call to
409-
// `collect_tokens`, then we need to register a replace range if we
410-
// have `#[cfg]` or `#[cfg_attr]`. This allows us to run eager
411-
// cfg-expansion on the captured token stream.
412-
if self.capture_cfg
413-
&& matches!(self.capture_state.capturing, Capturing::Yes)
414-
&& has_cfg_or_cfg_attr(ret.attrs())
415-
{
424+
// If in "definite capture mode" we need to register a replace range
425+
// for the `#[cfg]` and/or `#[cfg_attr]` attrs. This allows us to run
426+
// eager cfg-expansion on the captured token stream.
427+
if definite_capture_mode {
416428
assert!(!self.break_last_token, "Should not have unglued last token with cfg attr");
417429

418430
// What is the status here when parsing the example code at the top of this method?
@@ -429,7 +441,9 @@ impl<'a> Parser<'a> {
429441
// cfg-expand this AST node.
430442
let start_pos =
431443
if has_outer_attrs { attrs.start_pos.unwrap() } else { collect_pos.start_pos };
432-
let target = AttrsTarget { attrs: ret.attrs().iter().cloned().collect(), tokens };
444+
let target =
445+
AttrsTarget { attrs: ret_attrs.iter().cloned().collect(), tokens: tokens.clone() };
446+
tokens_used = true;
433447
self.capture_state
434448
.parser_replacements
435449
.push((ParserRange(start_pos..end_pos), Some(target)));
@@ -438,7 +452,16 @@ impl<'a> Parser<'a> {
438452
// the outermost call to this method.
439453
self.capture_state.parser_replacements.clear();
440454
self.capture_state.inner_attr_parser_ranges.clear();
455+
self.capture_state.seen_attrs.clear();
441456
}
457+
458+
// If we support tokens and don't already have them, store the newly captured tokens.
459+
if let Some(target_tokens @ None) = ret.tokens_mut() {
460+
tokens_used = true;
461+
*target_tokens = Some(tokens);
462+
}
463+
464+
assert!(tokens_used); // check we didn't create `tokens` unnecessarily
442465
Ok(ret)
443466
}
444467
}
@@ -510,9 +533,11 @@ fn make_attr_token_stream(
510533
}
511534

512535
/// Tokens are needed if:
513-
/// - any non-single-segment attributes (other than doc comments) are present; or
514-
/// - any `cfg_attr` attributes are present;
515-
/// - any single-segment, non-builtin attributes are present.
536+
/// - any non-single-segment attributes (other than doc comments) are present,
537+
/// e.g. `rustfmt::skip`; or
538+
/// - any `cfg_attr` attributes are present; or
539+
/// - any single-segment, non-builtin attributes are present, e.g. `derive`,
540+
/// `test`, `global_allocator`.
516541
fn needs_tokens(attrs: &[ast::Attribute]) -> bool {
517542
attrs.iter().any(|attr| match attr.ident() {
518543
None => !attr.is_doc_comment(),

compiler/rustc_parse/src/parser/mod.rs

+8-2
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ use rustc_ast_pretty::pprust;
3535
use rustc_data_structures::fx::FxHashMap;
3636
use rustc_data_structures::sync::Lrc;
3737
use rustc_errors::{Applicability, Diag, FatalError, MultiSpan, PResult};
38+
use rustc_index::interval::IntervalSet;
3839
use rustc_session::parse::ParseSess;
3940
use rustc_span::symbol::{kw, sym, Ident, Symbol};
4041
use rustc_span::{Span, DUMMY_SP};
@@ -183,7 +184,7 @@ pub struct Parser<'a> {
183184
// This type is used a lot, e.g. it's cloned when matching many declarative macro rules with nonterminals. Make sure
184185
// it doesn't unintentionally get bigger.
185186
#[cfg(target_pointer_width = "64")]
186-
rustc_data_structures::static_assert_size!(Parser<'_>, 256);
187+
rustc_data_structures::static_assert_size!(Parser<'_>, 288);
187188

188189
/// Stores span information about a closure.
189190
#[derive(Clone, Debug)]
@@ -238,7 +239,8 @@ impl NodeRange {
238239
// is the position of the function's start token. This gives
239240
// `NodeRange(10..15)`.
240241
fn new(ParserRange(parser_range): ParserRange, start_pos: u32) -> NodeRange {
241-
assert!(parser_range.start >= start_pos && parser_range.end >= start_pos);
242+
assert!(!parser_range.is_empty());
243+
assert!(parser_range.start >= start_pos);
242244
NodeRange((parser_range.start - start_pos)..(parser_range.end - start_pos))
243245
}
244246
}
@@ -260,6 +262,9 @@ struct CaptureState {
260262
capturing: Capturing,
261263
parser_replacements: Vec<ParserReplacement>,
262264
inner_attr_parser_ranges: FxHashMap<AttrId, ParserRange>,
265+
// `IntervalSet` is good for perf because attrs are mostly added to this
266+
// set in contiguous ranges.
267+
seen_attrs: IntervalSet<AttrId>,
263268
}
264269

265270
/// Iterator over a `TokenStream` that produces `Token`s. It's a bit odd that
@@ -457,6 +462,7 @@ impl<'a> Parser<'a> {
457462
capturing: Capturing::No,
458463
parser_replacements: Vec::new(),
459464
inner_attr_parser_ranges: Default::default(),
465+
seen_attrs: IntervalSet::new(u32::MAX as usize),
460466
},
461467
current_closure: None,
462468
recovery: Recovery::Allowed,

tests/crashes/129166.rs

-7
This file was deleted.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
// This was triggering an assertion failure in `NodeRange::new`.
2+
3+
#![feature(cfg_eval)]
4+
#![feature(stmt_expr_attributes)]
5+
6+
fn f() -> u32 {
7+
#[cfg_eval] #[cfg(not(FALSE))] 0
8+
//~^ ERROR removing an expression is not supported in this position
9+
}
10+
11+
fn main() {}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
error: removing an expression is not supported in this position
2+
--> $DIR/invalid-node-range-issue-129166.rs:7:17
3+
|
4+
LL | #[cfg_eval] #[cfg(not(FALSE))] 0
5+
| ^^^^^^^^^^^^^^^^^^
6+
7+
error: aborting due to 1 previous error
8+

tests/ui/proc-macro/macro-rules-derive-cfg.rs

+5-7
Original file line numberDiff line numberDiff line change
@@ -14,17 +14,15 @@ extern crate test_macros;
1414
macro_rules! produce_it {
1515
($expr:expr) => {
1616
#[derive(Print)]
17-
struct Foo {
18-
val: [bool; {
19-
let a = #[cfg_attr(not(FALSE), rustc_dummy(first))] $expr;
20-
0
21-
}]
22-
}
17+
struct Foo(
18+
[bool; #[cfg_attr(not(FALSE), rustc_dummy(first))] $expr]
19+
);
2320
}
2421
}
2522

2623
produce_it!(#[cfg_attr(not(FALSE), rustc_dummy(second))] {
27-
#![cfg_attr(not(FALSE), allow(unused))]
24+
#![cfg_attr(not(FALSE), rustc_dummy(third))]
25+
#[cfg_attr(not(FALSE), rustc_dummy(fourth))]
2826
30
2927
});
3028

0 commit comments

Comments
 (0)