Skip to content

Commit 6e53e66

Browse files
committed
MCP #705: Provide the option -Csymbol-mangling-version=hashed -Z unstable-options to shorten symbol names by replacing them with a digest.
Enrich test cases
1 parent 7ffc697 commit 6e53e66

File tree

15 files changed

+211
-49
lines changed

15 files changed

+211
-49
lines changed

compiler/rustc_session/src/config.rs

+15
Original file line numberDiff line numberDiff line change
@@ -347,6 +347,7 @@ impl SwitchWithOptPath {
347347
pub enum SymbolManglingVersion {
348348
Legacy,
349349
V0,
350+
Hashed,
350351
}
351352

352353
#[derive(Clone, Copy, Debug, PartialEq, Hash)]
@@ -2692,6 +2693,7 @@ pub fn build_session_options(early_dcx: &mut EarlyDiagCtxt, matches: &getopts::M
26922693
match cg.symbol_mangling_version {
26932694
// Stable values:
26942695
None | Some(SymbolManglingVersion::V0) => {}
2696+
26952697
// Unstable values:
26962698
Some(SymbolManglingVersion::Legacy) => {
26972699
if !unstable_opts.unstable_options {
@@ -2700,6 +2702,13 @@ pub fn build_session_options(early_dcx: &mut EarlyDiagCtxt, matches: &getopts::M
27002702
);
27012703
}
27022704
}
2705+
Some(SymbolManglingVersion::Hashed) => {
2706+
if !unstable_opts.unstable_options {
2707+
early_dcx.early_fatal(
2708+
"`-C symbol-mangling-version=hashed` requires `-Z unstable-options`",
2709+
);
2710+
}
2711+
}
27032712
}
27042713

27052714
// Check for unstable values of `-C instrument-coverage`.
@@ -2741,6 +2750,12 @@ pub fn build_session_options(early_dcx: &mut EarlyDiagCtxt, matches: &getopts::M
27412750
);
27422751
}
27432752
Some(SymbolManglingVersion::V0) => {}
2753+
Some(SymbolManglingVersion::Hashed) => {
2754+
early_dcx.early_warn(
2755+
"-C instrument-coverage requires symbol mangling version `v0`, \
2756+
but `-C symbol-mangling-version=hashed` was specified",
2757+
);
2758+
}
27442759
}
27452760
}
27462761

compiler/rustc_session/src/options.rs

+4-2
Original file line numberDiff line numberDiff line change
@@ -407,7 +407,8 @@ mod desc {
407407
pub const parse_switch_with_opt_path: &str =
408408
"an optional path to the profiling data output directory";
409409
pub const parse_merge_functions: &str = "one of: `disabled`, `trampolines`, or `aliases`";
410-
pub const parse_symbol_mangling_version: &str = "either `legacy` or `v0` (RFC 2603)";
410+
pub const parse_symbol_mangling_version: &str =
411+
"one of: `legacy`, `v0` (RFC 2603), or `hashed`";
411412
pub const parse_src_file_hash: &str = "either `md5` or `sha1`";
412413
pub const parse_relocation_model: &str =
413414
"one of supported relocation models (`rustc --print relocation-models`)";
@@ -1180,6 +1181,7 @@ mod parse {
11801181
*slot = match v {
11811182
Some("legacy") => Some(SymbolManglingVersion::Legacy),
11821183
Some("v0") => Some(SymbolManglingVersion::V0),
1184+
Some("hashed") => Some(SymbolManglingVersion::Hashed),
11831185
_ => return false,
11841186
};
11851187
true
@@ -1504,7 +1506,7 @@ options! {
15041506
"tell the linker which information to strip (`none` (default), `debuginfo` or `symbols`)"),
15051507
symbol_mangling_version: Option<SymbolManglingVersion> = (None,
15061508
parse_symbol_mangling_version, [TRACKED],
1507-
"which mangling version to use for symbol names ('legacy' (default) or 'v0')"),
1509+
"which mangling version to use for symbol names ('legacy' (default), 'v0', or 'hashed')"),
15081510
target_cpu: Option<String> = (None, parse_opt_string, [TRACKED],
15091511
"select target processor (`rustc --print target-cpus` for details)"),
15101512
target_feature: String = (String::new(), parse_target_feature, [TRACKED],
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
use crate::v0;
2+
use rustc_data_structures::stable_hasher::{Hash64, HashStable, StableHasher};
3+
use rustc_hir::def_id::CrateNum;
4+
use rustc_middle::ty::{Instance, TyCtxt};
5+
6+
use std::fmt::Write;
7+
8+
pub(super) fn mangle<'tcx>(
9+
tcx: TyCtxt<'tcx>,
10+
instance: Instance<'tcx>,
11+
instantiating_crate: Option<CrateNum>,
12+
full_mangling_name: impl FnOnce() -> String,
13+
) -> String {
14+
// The symbol of a generic function may be scattered in multiple downstream dylibs.
15+
// If the symbol of a generic function still contains `crate name`, hash conflicts between the
16+
// generic funcion and other symbols of the same `crate` cannot be detected in time during
17+
// construction. This symbol conflict is left over until it occurs during run time.
18+
// In this case, `instantiating-crate name` is used to replace `crate name` can completely
19+
// eliminate the risk of the preceding potential hash conflict.
20+
let crate_num =
21+
if let Some(krate) = instantiating_crate { krate } else { instance.def_id().krate };
22+
23+
let mut symbol = "_RNxC".to_string();
24+
v0::push_ident(tcx.crate_name(crate_num).as_str(), &mut symbol);
25+
26+
let hash = tcx.with_stable_hashing_context(|mut hcx| {
27+
let mut hasher = StableHasher::new();
28+
full_mangling_name().hash_stable(&mut hcx, &mut hasher);
29+
hasher.finish::<Hash64>().as_u64()
30+
});
31+
32+
push_hash64(hash, &mut symbol);
33+
34+
symbol
35+
}
36+
37+
// The hash is encoded based on `base-62` and the final terminator `_` is removed because it does
38+
// not help prevent hash collisions
39+
fn push_hash64(hash: u64, output: &mut String) {
40+
let hash = v0::encode_integer_62(hash);
41+
let hash_len = hash.len();
42+
let _ = write!(output, "{hash_len}H{}", &hash[..hash_len - 1]);
43+
}

compiler/rustc_symbol_mangling/src/lib.rs

+4
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@ use rustc_middle::query::Providers;
111111
use rustc_middle::ty::{self, Instance, TyCtxt};
112112
use rustc_session::config::SymbolManglingVersion;
113113

114+
mod hashed;
114115
mod legacy;
115116
mod v0;
116117

@@ -265,6 +266,9 @@ fn compute_symbol_name<'tcx>(
265266
let symbol = match mangling_version {
266267
SymbolManglingVersion::Legacy => legacy::mangle(tcx, instance, instantiating_crate),
267268
SymbolManglingVersion::V0 => v0::mangle(tcx, instance, instantiating_crate),
269+
SymbolManglingVersion::Hashed => hashed::mangle(tcx, instance, instantiating_crate, || {
270+
v0::mangle(tcx, instance, instantiating_crate)
271+
}),
268272
};
269273

270274
debug_assert!(

compiler/rustc_symbol_mangling/src/v0.rs

+61-43
Original file line numberDiff line numberDiff line change
@@ -116,10 +116,7 @@ impl<'tcx> SymbolMangler<'tcx> {
116116
/// * `x > 0` is encoded as `x - 1` in base 62, followed by `"_"`,
117117
/// e.g. `1` becomes `"0_"`, `62` becomes `"Z_"`, etc.
118118
fn push_integer_62(&mut self, x: u64) {
119-
if let Some(x) = x.checked_sub(1) {
120-
base_n::push_str(x as u128, 62, &mut self.out);
121-
}
122-
self.push("_");
119+
push_integer_62(x, &mut self.out)
123120
}
124121

125122
/// Push a `tag`-prefixed base 62 integer, when larger than `0`, that is:
@@ -138,45 +135,7 @@ impl<'tcx> SymbolMangler<'tcx> {
138135
}
139136

140137
fn push_ident(&mut self, ident: &str) {
141-
let mut use_punycode = false;
142-
for b in ident.bytes() {
143-
match b {
144-
b'_' | b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' => {}
145-
0x80..=0xff => use_punycode = true,
146-
_ => bug!("symbol_names: bad byte {} in ident {:?}", b, ident),
147-
}
148-
}
149-
150-
let punycode_string;
151-
let ident = if use_punycode {
152-
self.push("u");
153-
154-
// FIXME(eddyb) we should probably roll our own punycode implementation.
155-
let mut punycode_bytes = match punycode::encode(ident) {
156-
Ok(s) => s.into_bytes(),
157-
Err(()) => bug!("symbol_names: punycode encoding failed for ident {:?}", ident),
158-
};
159-
160-
// Replace `-` with `_`.
161-
if let Some(c) = punycode_bytes.iter_mut().rfind(|&&mut c| c == b'-') {
162-
*c = b'_';
163-
}
164-
165-
// FIXME(eddyb) avoid rechecking UTF-8 validity.
166-
punycode_string = String::from_utf8(punycode_bytes).unwrap();
167-
&punycode_string
168-
} else {
169-
ident
170-
};
171-
172-
let _ = write!(self.out, "{}", ident.len());
173-
174-
// Write a separating `_` if necessary (leading digit or `_`).
175-
if let Some('_' | '0'..='9') = ident.chars().next() {
176-
self.push("_");
177-
}
178-
179-
self.push(ident);
138+
push_ident(ident, &mut self.out)
180139
}
181140

182141
fn path_append_ns(
@@ -836,3 +795,62 @@ impl<'tcx> Printer<'tcx> for SymbolMangler<'tcx> {
836795
Ok(())
837796
}
838797
}
798+
/// Push a `_`-terminated base 62 integer, using the format
799+
/// specified in the RFC as `<base-62-number>`, that is:
800+
/// * `x = 0` is encoded as just the `"_"` terminator
801+
/// * `x > 0` is encoded as `x - 1` in base 62, followed by `"_"`,
802+
/// e.g. `1` becomes `"0_"`, `62` becomes `"Z_"`, etc.
803+
pub(crate) fn push_integer_62(x: u64, output: &mut String) {
804+
if let Some(x) = x.checked_sub(1) {
805+
base_n::push_str(x as u128, 62, output);
806+
}
807+
output.push('_');
808+
}
809+
810+
pub(crate) fn encode_integer_62(x: u64) -> String {
811+
let mut output = String::new();
812+
push_integer_62(x, &mut output);
813+
output
814+
}
815+
816+
pub(crate) fn push_ident(ident: &str, output: &mut String) {
817+
let mut use_punycode = false;
818+
for b in ident.bytes() {
819+
match b {
820+
b'_' | b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' => {}
821+
0x80..=0xff => use_punycode = true,
822+
_ => bug!("symbol_names: bad byte {} in ident {:?}", b, ident),
823+
}
824+
}
825+
826+
let punycode_string;
827+
let ident = if use_punycode {
828+
output.push('u');
829+
830+
// FIXME(eddyb) we should probably roll our own punycode implementation.
831+
let mut punycode_bytes = match punycode::encode(ident) {
832+
Ok(s) => s.into_bytes(),
833+
Err(()) => bug!("symbol_names: punycode encoding failed for ident {:?}", ident),
834+
};
835+
836+
// Replace `-` with `_`.
837+
if let Some(c) = punycode_bytes.iter_mut().rfind(|&&mut c| c == b'-') {
838+
*c = b'_';
839+
}
840+
841+
// FIXME(eddyb) avoid rechecking UTF-8 validity.
842+
punycode_string = String::from_utf8(punycode_bytes).unwrap();
843+
&punycode_string
844+
} else {
845+
ident
846+
};
847+
848+
let _ = write!(output, "{}", ident.len());
849+
850+
// Write a separating `_` if necessary (leading digit or `_`).
851+
if let Some('_' | '0'..='9') = ident.chars().next() {
852+
output.push('_');
853+
}
854+
855+
output.push_str(ident);
856+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
include ../tools.mk
2+
3+
# ignore-cross-compile
4+
# only-linux
5+
# only-x86_64
6+
7+
NM=nm -D
8+
RLIB_NAME=liba_rlib.rlib
9+
DYLIB_NAME=liba_dylib.so
10+
SO_NAME=libb_dylib.so
11+
BIN_NAME=b_bin
12+
13+
ifeq ($(UNAME),Darwin)
14+
NM=nm -gU
15+
RLIB_NAME=liba_rlib.rlib
16+
DYLIB_NAME=liba_dylib.dylib
17+
SO_NAME=libb_dylib.dylib
18+
BIN_NAME=b_bin
19+
endif
20+
21+
ifdef IS_WINDOWS
22+
NM=nm -g
23+
RLIB_NAME=liba_rlib.dll.a
24+
DYLIB_NAME=liba_dylib.dll
25+
SO_NAME=libb_dylib.dll
26+
BIN_NAME=b_bin.exe
27+
endif
28+
29+
all:
30+
$(RUSTC) -C prefer-dynamic -Z unstable-options -C symbol-mangling-version=hashed -C metadata=foo a_dylib.rs
31+
$(RUSTC) -C prefer-dynamic -Z unstable-options -C symbol-mangling-version=hashed -C metadata=bar a_rlib.rs
32+
$(RUSTC) -C prefer-dynamic -L $(TMPDIR) b_dylib.rs
33+
$(RUSTC) -C prefer-dynamic -L $(TMPDIR) b_bin.rs
34+
35+
# Check hashed symbol name
36+
37+
[ "$$($(NM) $(TMPDIR)/$(DYLIB_NAME) | grep -c hello)" -eq "0" ]
38+
[ "$$($(NM) $(TMPDIR)/$(DYLIB_NAME) | grep _RNxC7a_dylib | grep -c ' T ')" -eq "1" ]
39+
40+
[ "$$($(NM) $(TMPDIR)/$(SO_NAME) | grep b_dylib | grep -c hello)" -eq "1" ]
41+
[ "$$($(NM) $(TMPDIR)/$(SO_NAME) | grep _RNxC6a_rlib | grep -c ' T ')" -eq "1" ]
42+
[ "$$($(NM) $(TMPDIR)/$(SO_NAME) | grep _RNxC7a_dylib | grep -c ' U ')" -eq "1" ]
43+
44+
[ "$$($(NM) $(TMPDIR)/$(BIN_NAME) | grep _RNxC6a_rlib | grep -c ' U ')" -eq "1" ]
45+
[ "$$($(NM) $(TMPDIR)/$(BIN_NAME) | grep _RNxC7a_dylib | grep -c ' U ')" -eq "1" ]
46+
[ "$$($(NM) $(TMPDIR)/$(BIN_NAME) | grep b_dylib | grep hello | grep -c ' U ')" -eq "1" ]
47+
48+
$(call RUN,$(BIN_NAME))
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
#![crate_type="dylib"]
2+
pub fn hello() {
3+
println!("hello dylib");
4+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
#![crate_type="rlib"]
2+
3+
pub fn hello() {
4+
println!("hello rlib");
5+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
extern crate a_rlib;
2+
extern crate a_dylib;
3+
extern crate b_dylib;
4+
5+
fn main() {
6+
a_rlib::hello();
7+
a_dylib::hello();
8+
b_dylib::hello();
9+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
#![crate_type="dylib"]
2+
3+
extern crate a_rlib;
4+
extern crate a_dylib;
5+
6+
pub fn hello() {
7+
a_rlib::hello();
8+
a_dylib::hello();
9+
}
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
error: incorrect value `bad-value` for codegen option `symbol-mangling-version` - either `legacy` or `v0` (RFC 2603) was expected
1+
error: incorrect value `bad-value` for codegen option `symbol-mangling-version` - one of: `legacy`, `v0` (RFC 2603), or `hashed` was expected
22

Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
error: incorrect value `` for codegen option `symbol-mangling-version` - either `legacy` or `v0` (RFC 2603) was expected
1+
error: incorrect value `` for codegen option `symbol-mangling-version` - one of: `legacy`, `v0` (RFC 2603), or `hashed` was expected
22

Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
error: codegen option `symbol-mangling-version` requires either `legacy` or `v0` (RFC 2603) (C symbol-mangling-version=<value>)
1+
error: codegen option `symbol-mangling-version` requires one of: `legacy`, `v0` (RFC 2603), or `hashed` (C symbol-mangling-version=<value>)
22

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
error: `-C symbol-mangling-version=hashed` requires `-Z unstable-options`
2+
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
1-
// revisions: legacy legacy-ok
1+
// revisions: legacy legacy-ok hashed hashed-ok
22
// [legacy] compile-flags: -Csymbol-mangling-version=legacy
33
// [legacy-ok] check-pass
44
// [legacy-ok] compile-flags: -Zunstable-options -Csymbol-mangling-version=legacy
5+
// [hashed] compile-flags: -Csymbol-mangling-version=hashed
6+
// [hashed-ok] check-pass
7+
// [hashed-ok] compile-flags: -Zunstable-options -Csymbol-mangling-version=hashed
58

69
fn main() {}

0 commit comments

Comments
 (0)