Skip to content

Commit 4b2bdf7

Browse files
committed
rustc: Don't inline in CGUs at -O0
This commit tweaks the behavior of inlining functions into multiple codegen units when rustc is compiling in debug mode. Today rustc will unconditionally treat `#[inline]` functions by translating them into all codegen units that they're needed within, marking the linkage as `internal`. This commit changes the behavior so that in debug mode (compiling at `-O0`) rustc will instead only translate `#[inline]` functions into *one* codegen unit, forcing all other codegen units to reference this one copy. The goal here is to improve debug compile times by reducing the amount of translation that happens on behalf of multiple codegen units. It was discovered in #44941 that increasing the number of codegen units had the adverse side effect of increasing the overal work done by the compiler, and the suspicion here was that the compiler was inlining, translating, and codegen'ing more functions with more codegen units (for example `String` would be basically inlined into all codegen units if used). The strategy in this commit should reduce the cost of `#[inline]` functions to being equivalent to one codegen unit, which is only translating and codegen'ing inline functions once. Collected [data] shows that this does indeed improve the situation from [before] as the overall cpu-clock time increases at a much slower rate and when pinned to one core rustc does not consume significantly more wall clock time than with one codegen unit. One caveat of this commit is that the symbol names for inlined functions that are only translated once needed some slight tweaking. These inline functions could be translated into multiple crates and we need to make sure the symbols don't collideA so the crate name/disambiguator is mixed in to the symbol name hash in these situations. [data]: #44941 (comment) [before]: #44941 (comment)
1 parent ac76206 commit 4b2bdf7

21 files changed

+202
-83
lines changed

src/librustc/session/config.rs

+2
Original file line numberDiff line numberDiff line change
@@ -1097,6 +1097,8 @@ options! {DebuggingOptions, DebuggingSetter, basic_debugging_options,
10971097
"generate a graphical HTML report of time spent in trans and LLVM"),
10981098
thinlto: bool = (false, parse_bool, [TRACKED],
10991099
"enable ThinLTO when possible"),
1100+
inline_in_all_cgus: Option<bool> = (None, parse_opt_bool, [TRACKED],
1101+
"control whether #[inline] functions are in all cgus"),
11001102
}
11011103

11021104
pub fn default_lib_output() -> CrateType {

src/librustc_trans/back/symbol_names.rs

+38-16
Original file line numberDiff line numberDiff line change
@@ -98,8 +98,10 @@
9898
//! DefPaths which are much more robust in the face of changes to the code base.
9999
100100
use monomorphize::Instance;
101+
use trans_item::{TransItemExt, InstantiationMode};
101102

102103
use rustc::middle::weak_lang_items;
104+
use rustc::middle::trans::TransItem;
103105
use rustc::hir::def_id::DefId;
104106
use rustc::hir::map as hir_map;
105107
use rustc::ty::{self, Ty, TyCtxt, TypeFoldable};
@@ -150,7 +152,10 @@ pub fn provide(providers: &mut Providers) {
150152
fn get_symbol_hash<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
151153

152154
// the DefId of the item this name is for
153-
def_id: Option<DefId>,
155+
def_id: DefId,
156+
157+
// instance this name will be for
158+
instance: Instance<'tcx>,
154159

155160
// type of the item, without any generic
156161
// parameters substituted; this is
@@ -160,7 +165,7 @@ fn get_symbol_hash<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
160165

161166
// values for generic type parameters,
162167
// if any.
163-
substs: Option<&'tcx Substs<'tcx>>)
168+
substs: &'tcx Substs<'tcx>)
164169
-> u64 {
165170
debug!("get_symbol_hash(def_id={:?}, parameters={:?})", def_id, substs);
166171

@@ -170,7 +175,7 @@ fn get_symbol_hash<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
170175
// the main symbol name is not necessarily unique; hash in the
171176
// compiler's internal def-path, guaranteeing each symbol has a
172177
// truly unique path
173-
hasher.hash(def_id.map(|def_id| tcx.def_path_hash(def_id)));
178+
hasher.hash(tcx.def_path_hash(def_id));
174179

175180
// Include the main item-type. Note that, in this case, the
176181
// assertions about `needs_subst` may not hold, but this item-type
@@ -186,19 +191,36 @@ fn get_symbol_hash<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
186191
}
187192

188193
// also include any type parameters (for generic items)
189-
if let Some(substs) = substs {
190-
assert!(!substs.has_erasable_regions());
191-
assert!(!substs.needs_subst());
192-
substs.visit_with(&mut hasher);
193-
194-
// If this is an instance of a generic function, we also hash in
195-
// the ID of the instantiating crate. This avoids symbol conflicts
196-
// in case the same instances is emitted in two crates of the same
197-
// project.
198-
if substs.types().next().is_some() {
199-
hasher.hash(tcx.crate_name.as_str());
200-
hasher.hash(tcx.sess.local_crate_disambiguator().as_str());
194+
assert!(!substs.has_erasable_regions());
195+
assert!(!substs.needs_subst());
196+
substs.visit_with(&mut hasher);
197+
198+
let mut avoid_cross_crate_conflicts = false;
199+
200+
// If this is an instance of a generic function, we also hash in
201+
// the ID of the instantiating crate. This avoids symbol conflicts
202+
// in case the same instances is emitted in two crates of the same
203+
// project.
204+
if substs.types().next().is_some() {
205+
avoid_cross_crate_conflicts = true;
206+
}
207+
208+
// If we're dealing with an instance of a function that's inlined from
209+
// another crate but we're marking it as globally shared to our
210+
// compliation (aka we're not making an internal copy in each of our
211+
// codegen units) then this symbol may become an exported (but hidden
212+
// visibility) symbol. This means that multiple crates may do the same
213+
// and we want to be sure to avoid any symbol conflicts here.
214+
match TransItem::Fn(instance).instantiation_mode(tcx) {
215+
InstantiationMode::GloballyShared { may_conflict: true } => {
216+
avoid_cross_crate_conflicts = true;
201217
}
218+
_ => {}
219+
}
220+
221+
if avoid_cross_crate_conflicts {
222+
hasher.hash(tcx.crate_name.as_str());
223+
hasher.hash(tcx.sess.local_crate_disambiguator().as_str());
202224
}
203225
});
204226

@@ -309,7 +331,7 @@ fn compute_symbol_name<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, instance: Instance
309331
// and should not matter anyhow.
310332
let instance_ty = tcx.erase_regions(&instance_ty);
311333

312-
let hash = get_symbol_hash(tcx, Some(def_id), instance_ty, Some(substs));
334+
let hash = get_symbol_hash(tcx, def_id, instance, instance_ty, substs);
313335

314336
SymbolPathBuffer::from_interned(tcx.def_symbol_name(def_id)).finish(hash)
315337
}

src/librustc_trans/collector.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -401,9 +401,9 @@ fn collect_items_rec<'a, 'tcx: 'a>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
401401
}
402402

403403
fn record_accesses<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
404-
caller: TransItem<'tcx>,
405-
callees: &[TransItem<'tcx>],
406-
inlining_map: &mut InliningMap<'tcx>) {
404+
caller: TransItem<'tcx>,
405+
callees: &[TransItem<'tcx>],
406+
inlining_map: &mut InliningMap<'tcx>) {
407407
let is_inlining_candidate = |trans_item: &TransItem<'tcx>| {
408408
trans_item.instantiation_mode(tcx) == InstantiationMode::LocalCopy
409409
};

src/librustc_trans/partitioning.rs

+56-57
Original file line numberDiff line numberDiff line change
@@ -279,75 +279,74 @@ fn place_root_translation_items<'a, 'tcx, I>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
279279
let mut internalization_candidates = FxHashSet();
280280

281281
for trans_item in trans_items {
282-
let is_root = trans_item.instantiation_mode(tcx) == InstantiationMode::GloballyShared;
282+
match trans_item.instantiation_mode(tcx) {
283+
InstantiationMode::GloballyShared { .. } => {}
284+
InstantiationMode::LocalCopy => continue,
285+
}
283286

284-
if is_root {
285-
let characteristic_def_id = characteristic_def_id_of_trans_item(tcx, trans_item);
286-
let is_volatile = is_incremental_build &&
287-
trans_item.is_generic_fn();
287+
let characteristic_def_id = characteristic_def_id_of_trans_item(tcx, trans_item);
288+
let is_volatile = is_incremental_build &&
289+
trans_item.is_generic_fn();
288290

289-
let codegen_unit_name = match characteristic_def_id {
290-
Some(def_id) => compute_codegen_unit_name(tcx, def_id, is_volatile),
291-
None => Symbol::intern(FALLBACK_CODEGEN_UNIT).as_str(),
292-
};
291+
let codegen_unit_name = match characteristic_def_id {
292+
Some(def_id) => compute_codegen_unit_name(tcx, def_id, is_volatile),
293+
None => Symbol::intern(FALLBACK_CODEGEN_UNIT).as_str(),
294+
};
293295

294-
let make_codegen_unit = || {
295-
CodegenUnit::new(codegen_unit_name.clone())
296-
};
296+
let make_codegen_unit = || {
297+
CodegenUnit::new(codegen_unit_name.clone())
298+
};
297299

298-
let codegen_unit = codegen_units.entry(codegen_unit_name.clone())
299-
.or_insert_with(make_codegen_unit);
300-
301-
let (linkage, visibility) = match trans_item.explicit_linkage(tcx) {
302-
Some(explicit_linkage) => (explicit_linkage, Visibility::Default),
303-
None => {
304-
match trans_item {
305-
TransItem::Fn(ref instance) => {
306-
let visibility = match instance.def {
307-
InstanceDef::Item(def_id) => {
308-
if def_id.is_local() {
309-
if tcx.is_exported_symbol(def_id) {
310-
Visibility::Default
311-
} else {
312-
internalization_candidates.insert(trans_item);
313-
Visibility::Hidden
314-
}
300+
let codegen_unit = codegen_units.entry(codegen_unit_name.clone())
301+
.or_insert_with(make_codegen_unit);
302+
303+
let (linkage, visibility) = match trans_item.explicit_linkage(tcx) {
304+
Some(explicit_linkage) => (explicit_linkage, Visibility::Default),
305+
None => {
306+
match trans_item {
307+
TransItem::Fn(ref instance) => {
308+
let visibility = match instance.def {
309+
InstanceDef::Item(def_id) => {
310+
if def_id.is_local() {
311+
if tcx.is_exported_symbol(def_id) {
312+
Visibility::Default
315313
} else {
316-
internalization_candidates.insert(trans_item);
317314
Visibility::Hidden
318315
}
316+
} else {
317+
Visibility::Hidden
319318
}
320-
InstanceDef::FnPtrShim(..) |
321-
InstanceDef::Virtual(..) |
322-
InstanceDef::Intrinsic(..) |
323-
InstanceDef::ClosureOnceShim { .. } |
324-
InstanceDef::DropGlue(..) |
325-
InstanceDef::CloneShim(..) => {
326-
bug!("partitioning: Encountered unexpected
327-
root translation item: {:?}",
328-
trans_item)
329-
}
330-
};
331-
(Linkage::External, visibility)
332-
}
333-
TransItem::Static(node_id) |
334-
TransItem::GlobalAsm(node_id) => {
335-
let def_id = tcx.hir.local_def_id(node_id);
336-
let visibility = if tcx.is_exported_symbol(def_id) {
337-
Visibility::Default
338-
} else {
339-
internalization_candidates.insert(trans_item);
319+
}
320+
InstanceDef::FnPtrShim(..) |
321+
InstanceDef::Virtual(..) |
322+
InstanceDef::Intrinsic(..) |
323+
InstanceDef::ClosureOnceShim { .. } |
324+
InstanceDef::DropGlue(..) |
325+
InstanceDef::CloneShim(..) => {
340326
Visibility::Hidden
341-
};
342-
(Linkage::External, visibility)
343-
}
327+
}
328+
};
329+
(Linkage::External, visibility)
330+
}
331+
TransItem::Static(node_id) |
332+
TransItem::GlobalAsm(node_id) => {
333+
let def_id = tcx.hir.local_def_id(node_id);
334+
let visibility = if tcx.is_exported_symbol(def_id) {
335+
Visibility::Default
336+
} else {
337+
Visibility::Hidden
338+
};
339+
(Linkage::External, visibility)
344340
}
345341
}
346-
};
347-
348-
codegen_unit.items_mut().insert(trans_item, (linkage, visibility));
349-
roots.insert(trans_item);
342+
}
343+
};
344+
if visibility == Visibility::Hidden {
345+
internalization_candidates.insert(trans_item);
350346
}
347+
348+
codegen_unit.items_mut().insert(trans_item, (linkage, visibility));
349+
roots.insert(trans_item);
351350
}
352351

353352
// always ensure we have at least one CGU; otherwise, if we have a

src/librustc_trans/trans_item.rs

+32-5
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ use monomorphize::Instance;
2626
use rustc::hir;
2727
use rustc::hir::def_id::DefId;
2828
use rustc::middle::trans::{Linkage, Visibility};
29+
use rustc::session::config::OptLevel;
2930
use rustc::traits;
3031
use rustc::ty::{self, Ty, TyCtxt, TypeFoldable};
3132
use rustc::ty::subst::{Subst, Substs};
@@ -44,7 +45,20 @@ pub use rustc::middle::trans::TransItem;
4445
pub enum InstantiationMode {
4546
/// There will be exactly one instance of the given TransItem. It will have
4647
/// external linkage so that it can be linked to from other codegen units.
47-
GloballyShared,
48+
GloballyShared {
49+
/// In some compilation scenarios we may decide to take functions that
50+
/// are typically `LocalCopy` and instead move them to `GloballyShared`
51+
/// to avoid translating them a bunch of times. In this situation,
52+
/// however, our local copy may conflict with other crates also
53+
/// inlining the same function.
54+
///
55+
/// This flag indicates that this situation is occuring, and informs
56+
/// symbol name calculation that some extra mangling is needed to
57+
/// avoid conflicts. Note that this may eventually go away entirely if
58+
/// ThinLTO enables us to *always* have a globally shared instance of a
59+
/// function within one crate's compilation.
60+
may_conflict: bool,
61+
},
4862

4963
/// Each codegen unit containing a reference to the given TransItem will
5064
/// have its own private copy of the function (with internal linkage).
@@ -154,18 +168,31 @@ pub trait TransItemExt<'a, 'tcx>: fmt::Debug {
154168
fn instantiation_mode(&self,
155169
tcx: TyCtxt<'a, 'tcx, 'tcx>)
156170
-> InstantiationMode {
171+
let inline_in_all_cgus =
172+
tcx.sess.opts.debugging_opts.inline_in_all_cgus.unwrap_or_else(|| {
173+
tcx.sess.opts.optimize != OptLevel::No
174+
});
175+
157176
match *self.as_trans_item() {
158177
TransItem::Fn(ref instance) => {
159178
if self.explicit_linkage(tcx).is_none() &&
160179
common::requests_inline(tcx, instance)
161180
{
162-
InstantiationMode::LocalCopy
181+
if inline_in_all_cgus {
182+
InstantiationMode::LocalCopy
183+
} else {
184+
InstantiationMode::GloballyShared { may_conflict: true }
185+
}
163186
} else {
164-
InstantiationMode::GloballyShared
187+
InstantiationMode::GloballyShared { may_conflict: false }
165188
}
166189
}
167-
TransItem::Static(..) => InstantiationMode::GloballyShared,
168-
TransItem::GlobalAsm(..) => InstantiationMode::GloballyShared,
190+
TransItem::Static(..) => {
191+
InstantiationMode::GloballyShared { may_conflict: false }
192+
}
193+
TransItem::GlobalAsm(..) => {
194+
InstantiationMode::GloballyShared { may_conflict: false }
195+
}
169196
}
170197
}
171198

src/test/codegen-units/item-collection/drop_in_place_intrinsic.rs

+1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
// ignore-tidy-linelength
1212
// compile-flags:-Zprint-trans-items=eager
13+
// compile-flags:-Zinline-in-all-cgus
1314

1415
//~ TRANS_ITEM fn core::ptr[0]::drop_in_place[0]<drop_in_place_intrinsic::StructWithDtor[0]> @@ drop_in_place_intrinsic0[Internal]
1516
struct StructWithDtor(u32);

src/test/codegen-units/item-collection/generic-drop-glue.rs

+1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
// ignore-tidy-linelength
1212
// compile-flags:-Zprint-trans-items=eager
13+
// compile-flags:-Zinline-in-all-cgus
1314

1415
#![deny(dead_code)]
1516

src/test/codegen-units/item-collection/instantiation-through-vtable.rs

+1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
// ignore-tidy-linelength
1212
// compile-flags:-Zprint-trans-items=eager
13+
// compile-flags:-Zinline-in-all-cgus
1314

1415
#![deny(dead_code)]
1516

src/test/codegen-units/item-collection/non-generic-drop-glue.rs

+1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
// ignore-tidy-linelength
1212
// compile-flags:-Zprint-trans-items=eager
13+
// compile-flags:-Zinline-in-all-cgus
1314

1415
#![deny(dead_code)]
1516

src/test/codegen-units/item-collection/transitive-drop-glue.rs

+1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
// ignore-tidy-linelength
1212
// compile-flags:-Zprint-trans-items=eager
13+
// compile-flags:-Zinline-in-all-cgus
1314

1415
#![deny(dead_code)]
1516

src/test/codegen-units/item-collection/tuple-drop-glue.rs

+1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
// ignore-tidy-linelength
1212
// compile-flags:-Zprint-trans-items=eager
13+
// compile-flags:-Zinline-in-all-cgus
1314

1415
#![deny(dead_code)]
1516

src/test/codegen-units/item-collection/unsizing.rs

+1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
// ignore-tidy-linelength
1212
// compile-flags:-Zprint-trans-items=eager
13+
// compile-flags:-Zinline-in-all-cgus
1314

1415
#![deny(dead_code)]
1516
#![feature(coerce_unsized)]

src/test/codegen-units/partitioning/extern-drop-glue.rs

+1
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
// We specify -Z incremental here because we want to test the partitioning for
1414
// incremental compilation
1515
// compile-flags:-Zprint-trans-items=lazy -Zincremental=tmp/partitioning-tests/extern-drop-glue
16+
// compile-flags:-Zinline-in-all-cgus
1617

1718
#![allow(dead_code)]
1819
#![crate_type="lib"]

src/test/codegen-units/partitioning/inlining-from-extern-crate.rs

+1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
// We specify -Z incremental here because we want to test the partitioning for
1313
// incremental compilation
1414
// compile-flags:-Zprint-trans-items=lazy -Zincremental=tmp/partitioning-tests/inlining-from-extern-crate
15+
// compile-flags:-Zinline-in-all-cgus
1516

1617
#![crate_type="lib"]
1718

src/test/codegen-units/partitioning/local-drop-glue.rs

+1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
// We specify -Z incremental here because we want to test the partitioning for
1313
// incremental compilation
1414
// compile-flags:-Zprint-trans-items=lazy -Zincremental=tmp/partitioning-tests/local-drop-glue
15+
// compile-flags:-Zinline-in-all-cgus
1516

1617
#![allow(dead_code)]
1718
#![crate_type="lib"]

0 commit comments

Comments
 (0)