Skip to content

Commit 5cc6c6b

Browse files
authored
Auto merge of #36524 - michaelwoerister:trans-inline-only-on-demand, r=nikomatsakis
trans: Only instantiate #[inline] functions in codegen units referencing them This PR changes how `#[inline]` functions are translated. Before, there was one "master instance" of the function with `external` linkage and a number of on-demand instances with `available_externally` linkage in each codegen unit that referenced the function. This had two downsides: * Public functions marked with `#[inline]` would be present in machine code of libraries unnecessarily (see #36280 for an example) * LLVM would crash on `i686-pc-windows-msvc` due to what I suspect to be a bug in LLVM's Win32 exception handling code, because it doesn't like `available_externally` there (#36309). This PR changes the behavior, so that there is no master instance and only on-demand instances with `internal` linkage. The downside of this is potential code-bloat if LLVM does not completely inline away the `internal` instances because then there'd be N instances of the function instead of 1. However, this can only become a problem when using more than one codegen unit per crate. cc @rust-lang/compiler
2 parents c772948 + cf976fe commit 5cc6c6b

File tree

7 files changed

+49
-120
lines changed

7 files changed

+49
-120
lines changed

src/librustc_trans/base.rs

+7-18
Original file line numberDiff line numberDiff line change
@@ -1421,21 +1421,7 @@ fn internalize_symbols<'a, 'tcx>(sess: &Session,
14211421
.iter()
14221422
.cloned()
14231423
.filter(|trans_item|{
1424-
let def_id = match *trans_item {
1425-
TransItem::DropGlue(..) => {
1426-
return false
1427-
},
1428-
TransItem::Fn(ref instance) => {
1429-
instance.def
1430-
}
1431-
TransItem::Static(node_id) => {
1432-
tcx.map.local_def_id(node_id)
1433-
}
1434-
};
1435-
1436-
trans_item.explicit_linkage(tcx).is_some() ||
1437-
attr::contains_extern_indicator(tcx.sess.diagnostic(),
1438-
&tcx.get_attrs(def_id))
1424+
trans_item.explicit_linkage(tcx).is_some()
14391425
})
14401426
.map(|trans_item| symbol_map.get_or_compute(scx, trans_item))
14411427
.collect();
@@ -1591,7 +1577,11 @@ pub fn filter_reachable_ids(tcx: TyCtxt, reachable: NodeSet) -> NodeSet {
15911577
node: hir::ImplItemKind::Method(..), .. }) => {
15921578
let def_id = tcx.map.local_def_id(id);
15931579
let generics = tcx.lookup_generics(def_id);
1594-
generics.parent_types == 0 && generics.types.is_empty()
1580+
let attributes = tcx.get_attrs(def_id);
1581+
(generics.parent_types == 0 && generics.types.is_empty()) &&
1582+
// Functions marked with #[inline] are only ever translated
1583+
// with "internal" linkage and are never exported.
1584+
!attr::requests_inline(&attributes[..])
15951585
}
15961586

15971587
_ => false
@@ -1896,8 +1886,7 @@ fn collect_and_partition_translation_items<'a, 'tcx>(scx: &SharedCrateContext<'a
18961886
partitioning::partition(scx,
18971887
items.iter().cloned(),
18981888
strategy,
1899-
&inlining_map,
1900-
scx.reachable())
1889+
&inlining_map)
19011890
});
19021891

19031892
assert!(scx.tcx().sess.opts.cg.codegen_units == codegen_units.len() ||

src/librustc_trans/collector.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -401,7 +401,7 @@ fn record_inlining_canditates<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
401401
callees: &[TransItem<'tcx>],
402402
inlining_map: &mut InliningMap<'tcx>) {
403403
let is_inlining_candidate = |trans_item: &TransItem<'tcx>| {
404-
trans_item.is_from_extern_crate() || trans_item.requests_inline(tcx)
404+
trans_item.needs_local_copy(tcx)
405405
};
406406

407407
let inlining_candidates = callees.into_iter()

src/librustc_trans/partitioning.rs

+9-77
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@ use symbol_map::SymbolMap;
133133
use syntax::ast::NodeId;
134134
use syntax::parse::token::{self, InternedString};
135135
use trans_item::TransItem;
136-
use util::nodemap::{FnvHashMap, FnvHashSet, NodeSet};
136+
use util::nodemap::{FnvHashMap, FnvHashSet};
137137

138138
pub enum PartitioningStrategy {
139139
/// Generate one codegen unit per source-level module.
@@ -254,25 +254,17 @@ const FALLBACK_CODEGEN_UNIT: &'static str = "__rustc_fallback_codegen_unit";
254254
pub fn partition<'a, 'tcx, I>(scx: &SharedCrateContext<'a, 'tcx>,
255255
trans_items: I,
256256
strategy: PartitioningStrategy,
257-
inlining_map: &InliningMap<'tcx>,
258-
reachable: &NodeSet)
257+
inlining_map: &InliningMap<'tcx>)
259258
-> Vec<CodegenUnit<'tcx>>
260259
where I: Iterator<Item = TransItem<'tcx>>
261260
{
262261
let tcx = scx.tcx();
263262

264-
if let PartitioningStrategy::FixedUnitCount(1) = strategy {
265-
// If there is only a single codegen-unit, we can use a very simple
266-
// scheme and don't have to bother with doing much analysis.
267-
return vec![single_codegen_unit(tcx, trans_items, reachable)];
268-
}
269-
270263
// In the first step, we place all regular translation items into their
271264
// respective 'home' codegen unit. Regular translation items are all
272265
// functions and statics defined in the local crate.
273266
let mut initial_partitioning = place_root_translation_items(scx,
274-
trans_items,
275-
reachable);
267+
trans_items);
276268

277269
debug_dump(tcx, "INITIAL PARTITONING:", initial_partitioning.codegen_units.iter());
278270

@@ -310,8 +302,7 @@ struct PreInliningPartitioning<'tcx> {
310302
struct PostInliningPartitioning<'tcx>(Vec<CodegenUnit<'tcx>>);
311303

312304
fn place_root_translation_items<'a, 'tcx, I>(scx: &SharedCrateContext<'a, 'tcx>,
313-
trans_items: I,
314-
_reachable: &NodeSet)
305+
trans_items: I)
315306
-> PreInliningPartitioning<'tcx>
316307
where I: Iterator<Item = TransItem<'tcx>>
317308
{
@@ -320,7 +311,7 @@ fn place_root_translation_items<'a, 'tcx, I>(scx: &SharedCrateContext<'a, 'tcx>,
320311
let mut codegen_units = FnvHashMap();
321312

322313
for trans_item in trans_items {
323-
let is_root = !trans_item.is_instantiated_only_on_demand();
314+
let is_root = !trans_item.is_instantiated_only_on_demand(tcx);
324315

325316
if is_root {
326317
let characteristic_def_id = characteristic_def_id_of_trans_item(scx, trans_item);
@@ -350,6 +341,10 @@ fn place_root_translation_items<'a, 'tcx, I>(scx: &SharedCrateContext<'a, 'tcx>,
350341
// This is a non-generic functions, we always
351342
// make it visible externally on the chance that
352343
// it might be used in another codegen unit.
344+
// Later on base::internalize_symbols() will
345+
// assign "internal" linkage to those symbols
346+
// that are not referenced from other codegen
347+
// units (and are not publicly visible).
353348
llvm::ExternalLinkage
354349
} else {
355350
// In the current setup, generic functions cannot
@@ -454,7 +449,6 @@ fn place_inlined_translation_items<'tcx>(initial_partitioning: PreInliningPartit
454449
// reliably in that case.
455450
new_codegen_unit.items.insert(trans_item, llvm::InternalLinkage);
456451
} else {
457-
assert!(trans_item.is_instantiated_only_on_demand());
458452
// We can't be sure if this will also be instantiated
459453
// somewhere else, so we add an instance here with
460454
// InternalLinkage so we don't get any conflicts.
@@ -550,68 +544,6 @@ fn compute_codegen_unit_name<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
550544
return token::intern_and_get_ident(&mod_path[..]);
551545
}
552546

553-
fn single_codegen_unit<'a, 'tcx, I>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
554-
trans_items: I,
555-
reachable: &NodeSet)
556-
-> CodegenUnit<'tcx>
557-
where I: Iterator<Item = TransItem<'tcx>>
558-
{
559-
let mut items = FnvHashMap();
560-
561-
for trans_item in trans_items {
562-
let linkage = trans_item.explicit_linkage(tcx).unwrap_or_else(|| {
563-
match trans_item {
564-
TransItem::Static(node_id) => {
565-
if reachable.contains(&node_id) {
566-
llvm::ExternalLinkage
567-
} else {
568-
llvm::PrivateLinkage
569-
}
570-
}
571-
TransItem::DropGlue(_) => {
572-
llvm::InternalLinkage
573-
}
574-
TransItem::Fn(instance) => {
575-
if trans_item.is_generic_fn() {
576-
// FIXME(mw): Assigning internal linkage to all
577-
// monomorphizations is potentially a waste of space
578-
// since monomorphizations could be shared between
579-
// crates. The main reason for making them internal is
580-
// a limitation in MingW's binutils that cannot deal
581-
// with COFF object that have more than 2^15 sections,
582-
// which is something that can happen for large programs
583-
// when every function gets put into its own COMDAT
584-
// section.
585-
llvm::InternalLinkage
586-
} else if trans_item.is_from_extern_crate() {
587-
// FIXME(mw): It would be nice if we could mark these as
588-
// `AvailableExternallyLinkage`, since they should have
589-
// been instantiated in the extern crate. But this
590-
// sometimes leads to crashes on Windows because LLVM
591-
// does not handle exception handling table instantiation
592-
// reliably in that case.
593-
llvm::InternalLinkage
594-
} else if reachable.contains(&tcx.map
595-
.as_local_node_id(instance.def)
596-
.unwrap()) {
597-
llvm::ExternalLinkage
598-
} else {
599-
// Functions that are not visible outside this crate can
600-
// be marked as internal.
601-
llvm::InternalLinkage
602-
}
603-
}
604-
}
605-
});
606-
607-
items.insert(trans_item, linkage);
608-
}
609-
610-
CodegenUnit::new(
611-
numbered_codegen_unit_name(&tcx.crate_name[..], 0),
612-
items)
613-
}
614-
615547
fn numbered_codegen_unit_name(crate_name: &str, index: usize) -> InternedString {
616548
token::intern_and_get_ident(&format!("{}{}{}",
617549
crate_name,

src/librustc_trans/trans_item.rs

+22-15
Original file line numberDiff line numberDiff line change
@@ -241,19 +241,6 @@ impl<'a, 'tcx> TransItem<'tcx> {
241241
}
242242
}
243243

244-
pub fn requests_inline(&self, tcx: TyCtxt<'a, 'tcx, 'tcx>) -> bool {
245-
match *self {
246-
TransItem::Fn(ref instance) => {
247-
instance.substs.types().next().is_some() || {
248-
let attributes = tcx.get_attrs(instance.def);
249-
attr::requests_inline(&attributes[..])
250-
}
251-
}
252-
TransItem::DropGlue(..) => true,
253-
TransItem::Static(..) => false,
254-
}
255-
}
256-
257244
pub fn is_from_extern_crate(&self) -> bool {
258245
match *self {
259246
TransItem::Fn(ref instance) => !instance.def.is_local(),
@@ -262,10 +249,18 @@ impl<'a, 'tcx> TransItem<'tcx> {
262249
}
263250
}
264251

265-
pub fn is_instantiated_only_on_demand(&self) -> bool {
252+
/// True if the translation item should only be translated to LLVM IR if
253+
/// it is referenced somewhere (like inline functions, for example).
254+
pub fn is_instantiated_only_on_demand(&self, tcx: TyCtxt<'a, 'tcx, 'tcx>) -> bool {
255+
if self.explicit_linkage(tcx).is_some() {
256+
return false;
257+
}
258+
266259
match *self {
267260
TransItem::Fn(ref instance) => {
268-
!instance.def.is_local() || instance.substs.types().next().is_some()
261+
!instance.def.is_local() ||
262+
instance.substs.types().next().is_some() ||
263+
attr::requests_inline(&tcx.get_attrs(instance.def)[..])
269264
}
270265
TransItem::DropGlue(..) => true,
271266
TransItem::Static(..) => false,
@@ -282,6 +277,18 @@ impl<'a, 'tcx> TransItem<'tcx> {
282277
}
283278
}
284279

280+
/// Returns true if there has to be a local copy of this TransItem in every
281+
/// codegen unit that references it (as with inlined functions, for example)
282+
pub fn needs_local_copy(&self, tcx: TyCtxt<'a, 'tcx, 'tcx>) -> bool {
283+
// Currently everything that is instantiated only on demand is done so
284+
// with "internal" linkage, so we need a copy to be present in every
285+
// codegen unit.
286+
// This is coincidental: We could also instantiate something only if it
287+
// is referenced (e.g. a regular, private function) but place it in its
288+
// own codegen unit with "external" linkage.
289+
self.is_instantiated_only_on_demand(tcx)
290+
}
291+
285292
pub fn explicit_linkage(&self, tcx: TyCtxt<'a, 'tcx, 'tcx>) -> Option<llvm::Linkage> {
286293
let def_id = match *self {
287294
TransItem::Fn(ref instance) => instance.def,

src/test/codegen-units/partitioning/local-inlining.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
mod inline {
2020

2121
// Important: This function should show up in all codegen units where it is inlined
22-
//~ TRANS_ITEM fn local_inlining::inline[0]::inlined_function[0] @@ local_inlining-inline[External] local_inlining-user1[Available] local_inlining-user2[Available]
22+
//~ TRANS_ITEM fn local_inlining::inline[0]::inlined_function[0] @@ local_inlining-user1[Internal] local_inlining-user2[Internal]
2323
#[inline(always)]
2424
pub fn inlined_function()
2525
{

src/test/codegen-units/partitioning/local-transitive-inlining.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818

1919
mod inline {
2020

21-
//~ TRANS_ITEM fn local_transitive_inlining::inline[0]::inlined_function[0] @@ local_transitive_inlining-inline[External] local_transitive_inlining-direct_user[Available] local_transitive_inlining-indirect_user[Available]
21+
//~ TRANS_ITEM fn local_transitive_inlining::inline[0]::inlined_function[0] @@ local_transitive_inlining-indirect_user[Internal]
2222
#[inline(always)]
2323
pub fn inlined_function()
2424
{
@@ -29,7 +29,7 @@ mod inline {
2929
mod direct_user {
3030
use super::inline;
3131

32-
//~ TRANS_ITEM fn local_transitive_inlining::direct_user[0]::foo[0] @@ local_transitive_inlining-direct_user[External] local_transitive_inlining-indirect_user[Available]
32+
//~ TRANS_ITEM fn local_transitive_inlining::direct_user[0]::foo[0] @@ local_transitive_inlining-indirect_user[Internal]
3333
#[inline(always)]
3434
pub fn foo() {
3535
inline::inlined_function();
+7-6
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
11
-include ../tools.mk
22

3-
# Test that #[inline(always)] functions still get inlined across compilation
4-
# unit boundaries. Compilation should produce three IR files, with each one
5-
# containing a definition of the inlined function. Also, the non-#[inline]
6-
# function should be defined in only one compilation unit.
3+
# Test that #[inline] functions still get inlined across compilation unit
4+
# boundaries. Compilation should produce three IR files, but only the two
5+
# compilation units that have a usage of the #[inline] function should
6+
# contain a definition. Also, the non-#[inline] function should be defined
7+
# in only one compilation unit.
78

89
all:
910
$(RUSTC) foo.rs --emit=llvm-ir -C codegen-units=3
10-
[ "$$(cat "$(TMPDIR)"/foo.?.ll | grep -c define\ i32\ .*inlined)" -eq "1" ]
11-
[ "$$(cat "$(TMPDIR)"/foo.?.ll | grep -c define\ available_externally\ i32\ .*inlined)" -eq "2" ]
11+
[ "$$(cat "$(TMPDIR)"/foo.?.ll | grep -c define\ i32\ .*inlined)" -eq "0" ]
12+
[ "$$(cat "$(TMPDIR)"/foo.?.ll | grep -c define\ internal\ i32\ .*inlined)" -eq "2" ]
1213
[ "$$(cat "$(TMPDIR)"/foo.?.ll | grep -c define\ i32\ .*normal)" -eq "1" ]
1314
[ "$$(cat "$(TMPDIR)"/foo.?.ll | grep -c declare\ i32\ .*normal)" -eq "2" ]

0 commit comments

Comments
 (0)