Skip to content

Commit 04521fd

Browse files
committed
Auto merge of #118636 - h1467792822:dev, r=michaelwoerister
Add the unstable option to reduce the binary size of dynamic library… # Motivation The average length of symbol names in the rust standard library is about 100 bytes, while the average length of symbol names in the C++ standard library is about 65 bytes. In some embedded environments where dynamic library are widely used, rust dynamic library symbol name space hash become one of the key bottlenecks of application, Especially when the existing C/C++ module is reconstructed into the rust module. The unstable option `-Z symbol_mangling_version=hashed` is added to solve the bottleneck caused by too long dynamic library symbol names. ## Test data The following is a set of test data on the ubuntu 18.04 LTS environment. With this plug-in, the space saving rate of dynamic libraries can reach about 20%. The test object is the standard library of rust (built based on Xargo), tokio crate, and hyper crate. The contents of the Cargo.toml file in the construction project of the three dynamic libraries are as follows: ```txt # Cargo.toml [profile.release] panic = "abort" opt-leve="z" codegen-units=1 strip=true debug=true ``` The built dynamic library also removes the `.rustc` segments that are not needed at run time and then compares the size. The detailed data is as follows: 1. libstd.so > | symbol_mangling_version | size | saving rate | > | --- | --- | --- | > | legacy | 804896 || > | hashed | 608288 | 0.244 | > | v0 | 858144 || > | hashed | 608288 | 0.291 | 2. libhyper.so > | symbol_mangling_version(libhyper.so) | symbol_mangling_version(libstd.so) | size | saving rate | > | --- | --- | --- | --- | > | legacy | legacy | 866312 || > | hashed | legacy | 645128 |0.255| > | legacy | hashed | 854024 || > | hashed | hashed | 632840 |0.259|
2 parents b362939 + 6e53e66 commit 04521fd

File tree

15 files changed

+211
-49
lines changed

15 files changed

+211
-49
lines changed

compiler/rustc_session/src/config.rs

+15
Original file line numberDiff line numberDiff line change
@@ -347,6 +347,7 @@ impl SwitchWithOptPath {
347347
pub enum SymbolManglingVersion {
348348
Legacy,
349349
V0,
350+
Hashed,
350351
}
351352

352353
#[derive(Clone, Copy, Debug, PartialEq, Hash)]
@@ -2692,6 +2693,7 @@ pub fn build_session_options(early_dcx: &mut EarlyDiagCtxt, matches: &getopts::M
26922693
match cg.symbol_mangling_version {
26932694
// Stable values:
26942695
None | Some(SymbolManglingVersion::V0) => {}
2696+
26952697
// Unstable values:
26962698
Some(SymbolManglingVersion::Legacy) => {
26972699
if !unstable_opts.unstable_options {
@@ -2700,6 +2702,13 @@ pub fn build_session_options(early_dcx: &mut EarlyDiagCtxt, matches: &getopts::M
27002702
);
27012703
}
27022704
}
2705+
Some(SymbolManglingVersion::Hashed) => {
2706+
if !unstable_opts.unstable_options {
2707+
early_dcx.early_fatal(
2708+
"`-C symbol-mangling-version=hashed` requires `-Z unstable-options`",
2709+
);
2710+
}
2711+
}
27032712
}
27042713

27052714
// Check for unstable values of `-C instrument-coverage`.
@@ -2741,6 +2750,12 @@ pub fn build_session_options(early_dcx: &mut EarlyDiagCtxt, matches: &getopts::M
27412750
);
27422751
}
27432752
Some(SymbolManglingVersion::V0) => {}
2753+
Some(SymbolManglingVersion::Hashed) => {
2754+
early_dcx.early_warn(
2755+
"-C instrument-coverage requires symbol mangling version `v0`, \
2756+
but `-C symbol-mangling-version=hashed` was specified",
2757+
);
2758+
}
27442759
}
27452760
}
27462761

compiler/rustc_session/src/options.rs

+4-2
Original file line numberDiff line numberDiff line change
@@ -407,7 +407,8 @@ mod desc {
407407
pub const parse_switch_with_opt_path: &str =
408408
"an optional path to the profiling data output directory";
409409
pub const parse_merge_functions: &str = "one of: `disabled`, `trampolines`, or `aliases`";
410-
pub const parse_symbol_mangling_version: &str = "either `legacy` or `v0` (RFC 2603)";
410+
pub const parse_symbol_mangling_version: &str =
411+
"one of: `legacy`, `v0` (RFC 2603), or `hashed`";
411412
pub const parse_src_file_hash: &str = "either `md5` or `sha1`";
412413
pub const parse_relocation_model: &str =
413414
"one of supported relocation models (`rustc --print relocation-models`)";
@@ -1180,6 +1181,7 @@ mod parse {
11801181
*slot = match v {
11811182
Some("legacy") => Some(SymbolManglingVersion::Legacy),
11821183
Some("v0") => Some(SymbolManglingVersion::V0),
1184+
Some("hashed") => Some(SymbolManglingVersion::Hashed),
11831185
_ => return false,
11841186
};
11851187
true
@@ -1504,7 +1506,7 @@ options! {
15041506
"tell the linker which information to strip (`none` (default), `debuginfo` or `symbols`)"),
15051507
symbol_mangling_version: Option<SymbolManglingVersion> = (None,
15061508
parse_symbol_mangling_version, [TRACKED],
1507-
"which mangling version to use for symbol names ('legacy' (default) or 'v0')"),
1509+
"which mangling version to use for symbol names ('legacy' (default), 'v0', or 'hashed')"),
15081510
target_cpu: Option<String> = (None, parse_opt_string, [TRACKED],
15091511
"select target processor (`rustc --print target-cpus` for details)"),
15101512
target_feature: String = (String::new(), parse_target_feature, [TRACKED],
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
use crate::v0;
2+
use rustc_data_structures::stable_hasher::{Hash64, HashStable, StableHasher};
3+
use rustc_hir::def_id::CrateNum;
4+
use rustc_middle::ty::{Instance, TyCtxt};
5+
6+
use std::fmt::Write;
7+
8+
pub(super) fn mangle<'tcx>(
9+
tcx: TyCtxt<'tcx>,
10+
instance: Instance<'tcx>,
11+
instantiating_crate: Option<CrateNum>,
12+
full_mangling_name: impl FnOnce() -> String,
13+
) -> String {
14+
// The symbol of a generic function may be scattered in multiple downstream dylibs.
15+
// If the symbol of a generic function still contains `crate name`, hash conflicts between the
16+
// generic funcion and other symbols of the same `crate` cannot be detected in time during
17+
// construction. This symbol conflict is left over until it occurs during run time.
18+
// In this case, `instantiating-crate name` is used to replace `crate name` can completely
19+
// eliminate the risk of the preceding potential hash conflict.
20+
let crate_num =
21+
if let Some(krate) = instantiating_crate { krate } else { instance.def_id().krate };
22+
23+
let mut symbol = "_RNxC".to_string();
24+
v0::push_ident(tcx.crate_name(crate_num).as_str(), &mut symbol);
25+
26+
let hash = tcx.with_stable_hashing_context(|mut hcx| {
27+
let mut hasher = StableHasher::new();
28+
full_mangling_name().hash_stable(&mut hcx, &mut hasher);
29+
hasher.finish::<Hash64>().as_u64()
30+
});
31+
32+
push_hash64(hash, &mut symbol);
33+
34+
symbol
35+
}
36+
37+
// The hash is encoded based on `base-62` and the final terminator `_` is removed because it does
38+
// not help prevent hash collisions
39+
fn push_hash64(hash: u64, output: &mut String) {
40+
let hash = v0::encode_integer_62(hash);
41+
let hash_len = hash.len();
42+
let _ = write!(output, "{hash_len}H{}", &hash[..hash_len - 1]);
43+
}

compiler/rustc_symbol_mangling/src/lib.rs

+4
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@ use rustc_middle::query::Providers;
109109
use rustc_middle::ty::{self, Instance, TyCtxt};
110110
use rustc_session::config::SymbolManglingVersion;
111111

112+
mod hashed;
112113
mod legacy;
113114
mod v0;
114115

@@ -263,6 +264,9 @@ fn compute_symbol_name<'tcx>(
263264
let symbol = match mangling_version {
264265
SymbolManglingVersion::Legacy => legacy::mangle(tcx, instance, instantiating_crate),
265266
SymbolManglingVersion::V0 => v0::mangle(tcx, instance, instantiating_crate),
267+
SymbolManglingVersion::Hashed => hashed::mangle(tcx, instance, instantiating_crate, || {
268+
v0::mangle(tcx, instance, instantiating_crate)
269+
}),
266270
};
267271

268272
debug_assert!(

compiler/rustc_symbol_mangling/src/v0.rs

+61-43
Original file line numberDiff line numberDiff line change
@@ -116,10 +116,7 @@ impl<'tcx> SymbolMangler<'tcx> {
116116
/// * `x > 0` is encoded as `x - 1` in base 62, followed by `"_"`,
117117
/// e.g. `1` becomes `"0_"`, `62` becomes `"Z_"`, etc.
118118
fn push_integer_62(&mut self, x: u64) {
119-
if let Some(x) = x.checked_sub(1) {
120-
base_n::push_str(x as u128, 62, &mut self.out);
121-
}
122-
self.push("_");
119+
push_integer_62(x, &mut self.out)
123120
}
124121

125122
/// Push a `tag`-prefixed base 62 integer, when larger than `0`, that is:
@@ -138,45 +135,7 @@ impl<'tcx> SymbolMangler<'tcx> {
138135
}
139136

140137
fn push_ident(&mut self, ident: &str) {
141-
let mut use_punycode = false;
142-
for b in ident.bytes() {
143-
match b {
144-
b'_' | b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' => {}
145-
0x80..=0xff => use_punycode = true,
146-
_ => bug!("symbol_names: bad byte {} in ident {:?}", b, ident),
147-
}
148-
}
149-
150-
let punycode_string;
151-
let ident = if use_punycode {
152-
self.push("u");
153-
154-
// FIXME(eddyb) we should probably roll our own punycode implementation.
155-
let mut punycode_bytes = match punycode::encode(ident) {
156-
Ok(s) => s.into_bytes(),
157-
Err(()) => bug!("symbol_names: punycode encoding failed for ident {:?}", ident),
158-
};
159-
160-
// Replace `-` with `_`.
161-
if let Some(c) = punycode_bytes.iter_mut().rfind(|&&mut c| c == b'-') {
162-
*c = b'_';
163-
}
164-
165-
// FIXME(eddyb) avoid rechecking UTF-8 validity.
166-
punycode_string = String::from_utf8(punycode_bytes).unwrap();
167-
&punycode_string
168-
} else {
169-
ident
170-
};
171-
172-
let _ = write!(self.out, "{}", ident.len());
173-
174-
// Write a separating `_` if necessary (leading digit or `_`).
175-
if let Some('_' | '0'..='9') = ident.chars().next() {
176-
self.push("_");
177-
}
178-
179-
self.push(ident);
138+
push_ident(ident, &mut self.out)
180139
}
181140

182141
fn path_append_ns(
@@ -836,3 +795,62 @@ impl<'tcx> Printer<'tcx> for SymbolMangler<'tcx> {
836795
Ok(())
837796
}
838797
}
798+
/// Push a `_`-terminated base 62 integer, using the format
799+
/// specified in the RFC as `<base-62-number>`, that is:
800+
/// * `x = 0` is encoded as just the `"_"` terminator
801+
/// * `x > 0` is encoded as `x - 1` in base 62, followed by `"_"`,
802+
/// e.g. `1` becomes `"0_"`, `62` becomes `"Z_"`, etc.
803+
pub(crate) fn push_integer_62(x: u64, output: &mut String) {
804+
if let Some(x) = x.checked_sub(1) {
805+
base_n::push_str(x as u128, 62, output);
806+
}
807+
output.push('_');
808+
}
809+
810+
pub(crate) fn encode_integer_62(x: u64) -> String {
811+
let mut output = String::new();
812+
push_integer_62(x, &mut output);
813+
output
814+
}
815+
816+
pub(crate) fn push_ident(ident: &str, output: &mut String) {
817+
let mut use_punycode = false;
818+
for b in ident.bytes() {
819+
match b {
820+
b'_' | b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' => {}
821+
0x80..=0xff => use_punycode = true,
822+
_ => bug!("symbol_names: bad byte {} in ident {:?}", b, ident),
823+
}
824+
}
825+
826+
let punycode_string;
827+
let ident = if use_punycode {
828+
output.push('u');
829+
830+
// FIXME(eddyb) we should probably roll our own punycode implementation.
831+
let mut punycode_bytes = match punycode::encode(ident) {
832+
Ok(s) => s.into_bytes(),
833+
Err(()) => bug!("symbol_names: punycode encoding failed for ident {:?}", ident),
834+
};
835+
836+
// Replace `-` with `_`.
837+
if let Some(c) = punycode_bytes.iter_mut().rfind(|&&mut c| c == b'-') {
838+
*c = b'_';
839+
}
840+
841+
// FIXME(eddyb) avoid rechecking UTF-8 validity.
842+
punycode_string = String::from_utf8(punycode_bytes).unwrap();
843+
&punycode_string
844+
} else {
845+
ident
846+
};
847+
848+
let _ = write!(output, "{}", ident.len());
849+
850+
// Write a separating `_` if necessary (leading digit or `_`).
851+
if let Some('_' | '0'..='9') = ident.chars().next() {
852+
output.push('_');
853+
}
854+
855+
output.push_str(ident);
856+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
include ../tools.mk
2+
3+
# ignore-cross-compile
4+
# only-linux
5+
# only-x86_64
6+
7+
NM=nm -D
8+
RLIB_NAME=liba_rlib.rlib
9+
DYLIB_NAME=liba_dylib.so
10+
SO_NAME=libb_dylib.so
11+
BIN_NAME=b_bin
12+
13+
ifeq ($(UNAME),Darwin)
14+
NM=nm -gU
15+
RLIB_NAME=liba_rlib.rlib
16+
DYLIB_NAME=liba_dylib.dylib
17+
SO_NAME=libb_dylib.dylib
18+
BIN_NAME=b_bin
19+
endif
20+
21+
ifdef IS_WINDOWS
22+
NM=nm -g
23+
RLIB_NAME=liba_rlib.dll.a
24+
DYLIB_NAME=liba_dylib.dll
25+
SO_NAME=libb_dylib.dll
26+
BIN_NAME=b_bin.exe
27+
endif
28+
29+
all:
30+
$(RUSTC) -C prefer-dynamic -Z unstable-options -C symbol-mangling-version=hashed -C metadata=foo a_dylib.rs
31+
$(RUSTC) -C prefer-dynamic -Z unstable-options -C symbol-mangling-version=hashed -C metadata=bar a_rlib.rs
32+
$(RUSTC) -C prefer-dynamic -L $(TMPDIR) b_dylib.rs
33+
$(RUSTC) -C prefer-dynamic -L $(TMPDIR) b_bin.rs
34+
35+
# Check hashed symbol name
36+
37+
[ "$$($(NM) $(TMPDIR)/$(DYLIB_NAME) | grep -c hello)" -eq "0" ]
38+
[ "$$($(NM) $(TMPDIR)/$(DYLIB_NAME) | grep _RNxC7a_dylib | grep -c ' T ')" -eq "1" ]
39+
40+
[ "$$($(NM) $(TMPDIR)/$(SO_NAME) | grep b_dylib | grep -c hello)" -eq "1" ]
41+
[ "$$($(NM) $(TMPDIR)/$(SO_NAME) | grep _RNxC6a_rlib | grep -c ' T ')" -eq "1" ]
42+
[ "$$($(NM) $(TMPDIR)/$(SO_NAME) | grep _RNxC7a_dylib | grep -c ' U ')" -eq "1" ]
43+
44+
[ "$$($(NM) $(TMPDIR)/$(BIN_NAME) | grep _RNxC6a_rlib | grep -c ' U ')" -eq "1" ]
45+
[ "$$($(NM) $(TMPDIR)/$(BIN_NAME) | grep _RNxC7a_dylib | grep -c ' U ')" -eq "1" ]
46+
[ "$$($(NM) $(TMPDIR)/$(BIN_NAME) | grep b_dylib | grep hello | grep -c ' U ')" -eq "1" ]
47+
48+
$(call RUN,$(BIN_NAME))
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
#![crate_type="dylib"]
2+
pub fn hello() {
3+
println!("hello dylib");
4+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
#![crate_type="rlib"]
2+
3+
pub fn hello() {
4+
println!("hello rlib");
5+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
extern crate a_rlib;
2+
extern crate a_dylib;
3+
extern crate b_dylib;
4+
5+
fn main() {
6+
a_rlib::hello();
7+
a_dylib::hello();
8+
b_dylib::hello();
9+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
#![crate_type="dylib"]
2+
3+
extern crate a_rlib;
4+
extern crate a_dylib;
5+
6+
pub fn hello() {
7+
a_rlib::hello();
8+
a_dylib::hello();
9+
}
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
error: incorrect value `bad-value` for codegen option `symbol-mangling-version` - either `legacy` or `v0` (RFC 2603) was expected
1+
error: incorrect value `bad-value` for codegen option `symbol-mangling-version` - one of: `legacy`, `v0` (RFC 2603), or `hashed` was expected
22

Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
error: incorrect value `` for codegen option `symbol-mangling-version` - either `legacy` or `v0` (RFC 2603) was expected
1+
error: incorrect value `` for codegen option `symbol-mangling-version` - one of: `legacy`, `v0` (RFC 2603), or `hashed` was expected
22

Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
error: codegen option `symbol-mangling-version` requires either `legacy` or `v0` (RFC 2603) (C symbol-mangling-version=<value>)
1+
error: codegen option `symbol-mangling-version` requires one of: `legacy`, `v0` (RFC 2603), or `hashed` (C symbol-mangling-version=<value>)
22

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
error: `-C symbol-mangling-version=hashed` requires `-Z unstable-options`
2+
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
1-
// revisions: legacy legacy-ok
1+
// revisions: legacy legacy-ok hashed hashed-ok
22
// [legacy] compile-flags: -Csymbol-mangling-version=legacy
33
// [legacy-ok] check-pass
44
// [legacy-ok] compile-flags: -Zunstable-options -Csymbol-mangling-version=legacy
5+
// [hashed] compile-flags: -Csymbol-mangling-version=hashed
6+
// [hashed-ok] check-pass
7+
// [hashed-ok] compile-flags: -Zunstable-options -Csymbol-mangling-version=hashed
58

69
fn main() {}

0 commit comments

Comments
 (0)