Skip to content

Commit 8a0c550

Browse files
committed
Auto merge of #95259 - nnethercote:more-macro-expansion-optimizations, r=petrochenkov
More macro expansion optimizations A few nice wins for macro-heavy crates. r? `@petrochenkov`
2 parents 4ce257f + fdec26d commit 8a0c550

File tree

3 files changed

+101
-86
lines changed

3 files changed

+101
-86
lines changed

compiler/rustc_expand/src/mbe/macro_parser.rs

+51-36
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,14 @@ struct MatcherTtFrame<'tt> {
101101
idx: usize,
102102
}
103103

104-
type NamedMatchVec = SmallVec<[NamedMatch; 4]>;
104+
// One element is enough to cover 95-99% of vectors for most benchmarks. Also,
105+
// vectors longer than one frequently have many elements, not just two or
106+
// three.
107+
type NamedMatchVec = SmallVec<[NamedMatch; 1]>;
108+
109+
// This type is used a lot. Make sure it doesn't unintentionally get bigger.
110+
#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
111+
rustc_data_structures::static_assert_size!(NamedMatchVec, 48);
105112

106113
/// Represents a single "position" (aka "matcher position", aka "item"), as
107114
/// described in the module documentation.
@@ -153,7 +160,7 @@ struct MatcherPos<'tt> {
153160

154161
// This type is used a lot. Make sure it doesn't unintentionally get bigger.
155162
#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))]
156-
rustc_data_structures::static_assert_size!(MatcherPos<'_>, 136);
163+
rustc_data_structures::static_assert_size!(MatcherPos<'_>, 112);
157164

158165
impl<'tt> MatcherPos<'tt> {
159166
/// `len` `Vec`s (initially shared and empty) that will store matches of metavars.
@@ -202,11 +209,7 @@ impl<'tt> MatcherPos<'tt> {
202209
match_lo: up.match_cur,
203210
match_cur: up.match_cur,
204211
match_hi: up.match_cur + seq.num_captures,
205-
repetition: Some(MatcherPosRepetition {
206-
up,
207-
sep: seq.separator.clone(),
208-
seq_op: seq.kleene.op,
209-
}),
212+
repetition: Some(MatcherPosRepetition { up, seq }),
210213
stack: smallvec![],
211214
}
212215
}
@@ -220,15 +223,12 @@ impl<'tt> MatcherPos<'tt> {
220223

221224
#[derive(Clone)]
222225
struct MatcherPosRepetition<'tt> {
223-
/// The KleeneOp of this sequence.
224-
seq_op: mbe::KleeneOp,
225-
226-
/// The separator.
227-
sep: Option<Token>,
228-
229226
/// The "parent" matcher position. That is, the matcher position just before we enter the
230227
/// sequence.
231228
up: Box<MatcherPos<'tt>>,
229+
230+
/// The sequence itself.
231+
seq: &'tt SequenceRepetition,
232232
}
233233

234234
enum EofItems<'tt> {
@@ -274,22 +274,20 @@ pub(super) fn count_names(ms: &[TokenTree]) -> usize {
274274
})
275275
}
276276

277-
/// `NamedMatch` is a pattern-match result for a single `token::MATCH_NONTERMINAL`:
278-
/// so it is associated with a single ident in a parse, and all
279-
/// `MatchedNonterminal`s in the `NamedMatch` have the same non-terminal type
280-
/// (expr, item, etc). Each leaf in a single `NamedMatch` corresponds to a
281-
/// single `token::MATCH_NONTERMINAL` in the `TokenTree` that produced it.
277+
/// `NamedMatch` is a pattern-match result for a single metavar. All
278+
/// `MatchedNtNonTt`s in the `NamedMatch` have the same non-terminal type
279+
/// (expr, item, etc).
282280
///
283281
/// The in-memory structure of a particular `NamedMatch` represents the match
284282
/// that occurred when a particular subset of a matcher was applied to a
285283
/// particular token tree.
286284
///
287285
/// The width of each `MatchedSeq` in the `NamedMatch`, and the identity of
288-
/// the `MatchedNonterminal`s, will depend on the token tree it was applied
289-
/// to: each `MatchedSeq` corresponds to a single `TTSeq` in the originating
286+
/// the `MatchedNtNonTts`s, will depend on the token tree it was applied
287+
/// to: each `MatchedSeq` corresponds to a single repetition in the originating
290288
/// token tree. The depth of the `NamedMatch` structure will therefore depend
291-
/// only on the nesting depth of `ast::TTSeq`s in the originating
292-
/// token tree it was derived from.
289+
/// only on the nesting depth of repetitions in the originating token tree it
290+
/// was derived from.
293291
///
294292
/// In layman's terms: `NamedMatch` will form a tree representing nested matches of a particular
295293
/// meta variable. For example, if we are matching the following macro against the following
@@ -308,24 +306,32 @@ pub(super) fn count_names(ms: &[TokenTree]) -> usize {
308306
/// ```rust
309307
/// MatchedSeq([
310308
/// MatchedSeq([
311-
/// MatchedNonterminal(a),
312-
/// MatchedNonterminal(b),
313-
/// MatchedNonterminal(c),
314-
/// MatchedNonterminal(d),
309+
/// MatchedNtNonTt(a),
310+
/// MatchedNtNonTt(b),
311+
/// MatchedNtNonTt(c),
312+
/// MatchedNtNonTt(d),
315313
/// ]),
316314
/// MatchedSeq([
317-
/// MatchedNonterminal(a),
318-
/// MatchedNonterminal(b),
319-
/// MatchedNonterminal(c),
320-
/// MatchedNonterminal(d),
321-
/// MatchedNonterminal(e),
315+
/// MatchedNtNonTt(a),
316+
/// MatchedNtNonTt(b),
317+
/// MatchedNtNonTt(c),
318+
/// MatchedNtNonTt(d),
319+
/// MatchedNtNonTt(e),
322320
/// ])
323321
/// ])
324322
/// ```
325323
#[derive(Debug, Clone)]
326324
crate enum NamedMatch {
327325
MatchedSeq(Lrc<NamedMatchVec>),
328-
MatchedNonterminal(Lrc<Nonterminal>),
326+
327+
// This variant should never hold an `NtTT`. `MatchedNtTt` should be used
328+
// for that case.
329+
MatchedNtNonTt(Lrc<Nonterminal>),
330+
331+
// `NtTT` is handled without any cloning when transcribing, unlike other
332+
// nonterminals. Therefore, an `Lrc` isn't helpful and causes unnecessary
333+
// allocations. Hence this separate variant.
334+
MatchedNtTt(rustc_ast::tokenstream::TokenTree),
329335
}
330336

331337
/// Takes a slice of token trees `ms` representing a matcher which successfully matched input
@@ -546,14 +552,19 @@ impl<'tt> TtParser<'tt> {
546552
self.cur_items.push(new_pos);
547553
}
548554

549-
if idx == len && repetition.sep.is_some() {
550-
if repetition.sep.as_ref().map_or(false, |sep| token_name_eq(token, sep)) {
555+
if idx == len && repetition.seq.separator.is_some() {
556+
if repetition
557+
.seq
558+
.separator
559+
.as_ref()
560+
.map_or(false, |sep| token_name_eq(token, sep))
561+
{
551562
// The matcher has a separator, and it matches the current token. We can
552563
// advance past the separator token.
553564
item.idx += 1;
554565
self.next_items.push(item);
555566
}
556-
} else if repetition.seq_op != mbe::KleeneOp::ZeroOrOne {
567+
} else if repetition.seq.kleene.op != mbe::KleeneOp::ZeroOrOne {
557568
// We don't need a separator. Move the "dot" back to the beginning of the
558569
// matcher and try to match again UNLESS we are only allowed to have _one_
559570
// repetition.
@@ -665,7 +676,11 @@ impl<'tt> TtParser<'tt> {
665676
}
666677
Ok(nt) => nt,
667678
};
668-
item.push_match(match_cur, MatchedNonterminal(Lrc::new(nt)));
679+
let m = match nt {
680+
Nonterminal::NtTT(tt) => MatchedNtTt(tt),
681+
_ => MatchedNtNonTt(Lrc::new(nt)),
682+
};
683+
item.push_match(match_cur, m);
669684
item.idx += 1;
670685
item.match_cur += 1;
671686
} else {

compiler/rustc_expand/src/mbe/macro_rules.rs

+28-32
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,11 @@ use crate::expand::{ensure_complete_parse, parse_ast_fragment, AstFragment, AstF
44
use crate::mbe;
55
use crate::mbe::macro_check;
66
use crate::mbe::macro_parser::{Error, ErrorReported, Failure, Success, TtParser};
7-
use crate::mbe::macro_parser::{MatchedNonterminal, MatchedSeq};
7+
use crate::mbe::macro_parser::{MatchedNtTt, MatchedSeq};
88
use crate::mbe::transcribe::transcribe;
99

1010
use rustc_ast as ast;
11-
use rustc_ast::token::{self, NonterminalKind, NtTT, Token, TokenKind::*};
11+
use rustc_ast::token::{self, NonterminalKind, Token, TokenKind::*};
1212
use rustc_ast::tokenstream::{DelimSpan, TokenStream};
1313
use rustc_ast::{NodeId, DUMMY_NODE_ID};
1414
use rustc_ast_pretty::pprust;
@@ -470,22 +470,20 @@ pub fn compile_declarative_macro(
470470
MatchedSeq(ref s) => s
471471
.iter()
472472
.map(|m| {
473-
if let MatchedNonterminal(ref nt) = *m {
474-
if let NtTT(ref tt) = **nt {
475-
let mut tts = vec![];
476-
mbe::quoted::parse(
477-
tt.clone().into(),
478-
true,
479-
&sess.parse_sess,
480-
def.id,
481-
features,
482-
edition,
483-
&mut tts,
484-
);
485-
let tt = tts.pop().unwrap();
486-
valid &= check_lhs_nt_follows(&sess.parse_sess, features, &def, &tt);
487-
return tt;
488-
}
473+
if let MatchedNtTt(ref tt) = *m {
474+
let mut tts = vec![];
475+
mbe::quoted::parse(
476+
tt.clone().into(),
477+
true,
478+
&sess.parse_sess,
479+
def.id,
480+
features,
481+
edition,
482+
&mut tts,
483+
);
484+
let tt = tts.pop().unwrap();
485+
valid &= check_lhs_nt_follows(&sess.parse_sess, features, &def, &tt);
486+
return tt;
489487
}
490488
sess.parse_sess.span_diagnostic.span_bug(def.span, "wrong-structured lhs")
491489
})
@@ -497,20 +495,18 @@ pub fn compile_declarative_macro(
497495
MatchedSeq(ref s) => s
498496
.iter()
499497
.map(|m| {
500-
if let MatchedNonterminal(ref nt) = *m {
501-
if let NtTT(ref tt) = **nt {
502-
let mut tts = vec![];
503-
mbe::quoted::parse(
504-
tt.clone().into(),
505-
false,
506-
&sess.parse_sess,
507-
def.id,
508-
features,
509-
edition,
510-
&mut tts,
511-
);
512-
return tts.pop().unwrap();
513-
}
498+
if let MatchedNtTt(ref tt) = *m {
499+
let mut tts = vec![];
500+
mbe::quoted::parse(
501+
tt.clone().into(),
502+
false,
503+
&sess.parse_sess,
504+
def.id,
505+
features,
506+
edition,
507+
&mut tts,
508+
);
509+
return tts.pop().unwrap();
514510
}
515511
sess.parse_sess.span_diagnostic.span_bug(def.span, "wrong-structured lhs")
516512
})

compiler/rustc_expand/src/mbe/transcribe.rs

+22-18
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
use crate::base::ExtCtxt;
2-
use crate::mbe::macro_parser::{MatchedNonterminal, MatchedSeq, NamedMatch};
2+
use crate::mbe::macro_parser::{MatchedNtNonTt, MatchedNtTt, MatchedSeq, NamedMatch};
33
use crate::mbe::{self, MetaVarExpr};
44
use rustc_ast::mut_visit::{self, MutVisitor};
5-
use rustc_ast::token::{self, NtTT, Token, TokenKind};
5+
use rustc_ast::token::{self, Nonterminal, Token, TokenKind};
66
use rustc_ast::tokenstream::{DelimSpan, TokenStream, TokenTree, TreeAndSpacing};
77
use rustc_data_structures::fx::FxHashMap;
88
use rustc_data_structures::sync::Lrc;
@@ -233,25 +233,29 @@ pub(super) fn transcribe<'a>(
233233
// the meta-var.
234234
let ident = MacroRulesNormalizedIdent::new(orignal_ident);
235235
if let Some(cur_matched) = lookup_cur_matched(ident, interp, &repeats) {
236-
if let MatchedNonterminal(nt) = cur_matched {
237-
let token = if let NtTT(tt) = &**nt {
236+
match cur_matched {
237+
MatchedNtTt(ref tt) => {
238238
// `tt`s are emitted into the output stream directly as "raw tokens",
239239
// without wrapping them into groups.
240-
tt.clone()
241-
} else {
240+
let token = tt.clone();
241+
result.push(token.into());
242+
}
243+
MatchedNtNonTt(ref nt) => {
242244
// Other variables are emitted into the output stream as groups with
243245
// `Delimiter::None` to maintain parsing priorities.
244246
// `Interpolated` is currently used for such groups in rustc parser.
247+
debug_assert!(!matches!(**nt, Nonterminal::NtTT(_)));
245248
marker.visit_span(&mut sp);
246-
TokenTree::token(token::Interpolated(nt.clone()), sp)
247-
};
248-
result.push(token.into());
249-
} else {
250-
// We were unable to descend far enough. This is an error.
251-
return Err(cx.struct_span_err(
252-
sp, /* blame the macro writer */
253-
&format!("variable '{}' is still repeating at this depth", ident),
254-
));
249+
let token = TokenTree::token(token::Interpolated(nt.clone()), sp);
250+
result.push(token.into());
251+
}
252+
MatchedSeq(..) => {
253+
// We were unable to descend far enough. This is an error.
254+
return Err(cx.struct_span_err(
255+
sp, /* blame the macro writer */
256+
&format!("variable '{}' is still repeating at this depth", ident),
257+
));
258+
}
255259
}
256260
} else {
257261
// If we aren't able to match the meta-var, we push it back into the result but
@@ -308,7 +312,7 @@ fn lookup_cur_matched<'a>(
308312
let mut matched = matched;
309313
for &(idx, _) in repeats {
310314
match matched {
311-
MatchedNonterminal(_) => break,
315+
MatchedNtTt(_) | MatchedNtNonTt(_) => break,
312316
MatchedSeq(ref ads) => matched = ads.get(idx).unwrap(),
313317
}
314318
}
@@ -398,7 +402,7 @@ fn lockstep_iter_size(
398402
let name = MacroRulesNormalizedIdent::new(name);
399403
match lookup_cur_matched(name, interpolations, repeats) {
400404
Some(matched) => match matched {
401-
MatchedNonterminal(_) => LockstepIterSize::Unconstrained,
405+
MatchedNtTt(_) | MatchedNtNonTt(_) => LockstepIterSize::Unconstrained,
402406
MatchedSeq(ref ads) => LockstepIterSize::Constraint(ads.len(), name),
403407
},
404408
_ => LockstepIterSize::Unconstrained,
@@ -445,7 +449,7 @@ fn count_repetitions<'a>(
445449
sp: &DelimSpan,
446450
) -> PResult<'a, usize> {
447451
match matched {
448-
MatchedNonterminal(_) => {
452+
MatchedNtTt(_) | MatchedNtNonTt(_) => {
449453
if declared_lhs_depth == 0 {
450454
return Err(cx.struct_span_err(
451455
sp.entire(),

0 commit comments

Comments
 (0)