Skip to content

Commit cc705b8

Browse files
committed
Auto merge of #116046 - Zalathar:fn-cov-info, r=cjgillot
coverage: Move most per-function coverage info into `mir::Body` Currently, all of the coverage information collected by the `InstrumentCoverage` pass is smuggled through MIR in the form of individual `StatementKind::Coverage` statements, which must then be reassembled by coverage codegen. That's awkward for a number of reasons: - While some of the coverage statements do care about their specific position in the MIR control-flow graph, many of them don't, and are just tacked onto the function's first BB as metadata carriers. - MIR inlining can result in coverage statements being duplicated, so coverage codegen has to jump through hoops to avoid emitting duplicate mappings. - MIR optimizations that would delete coverage statements need to carefully copy them into the function's first BB so as not to omit them from coverage reports. - The order in which coverage codegen sees coverage statements is dependent on MIR optimizations/inlining, which can cause unnecessary churn in the emitted coverage mappings. - We don't have a good way to annotate MIR-level functions with extra coverage info that doesn't belong in a statement. --- This PR therefore takes most of the per-function coverage info and stores it in a field in `mir::Body` as `Option<Box<FunctionCoverageInfo>>`. (This adds one pointer to the size of `mir::Body`, even when coverage is not enabled.) Coverage statements still need to be injected into MIR in some cases, but only when they actually affect codegen (counters) or are needed to detect code that has been optimized away as unreachable (counters/expressions). --- By the end of this PR, the information stored in `FunctionCoverageInfo` is: - A hash of the function's source code (needed by LLVM's coverage map format) - The number of coverage counters added by coverage instrumentation - A table of coverage expressions, associating each expression ID with its operator (add or subtract) and its two operands - The list of mappings, associating each covered code region with a counter/expression/zero value --- ~~This is built on top of #115301, so I'll rebase and roll a reviewer once that lands.~~ r? `@ghost` `@rustbot` label +A-code-coverage
2 parents e1aa9ed + 33da097 commit cc705b8

26 files changed

+458
-667
lines changed

compiler/rustc_codegen_llvm/src/coverageinfo/ffi.rs

+6-17
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use rustc_middle::mir::coverage::{CounterId, ExpressionId, Operand};
1+
use rustc_middle::mir::coverage::{CounterId, CovTerm, ExpressionId};
22

33
/// Must match the layout of `LLVMRustCounterKind`.
44
#[derive(Copy, Clone, Debug)]
@@ -43,11 +43,11 @@ impl Counter {
4343
Self { kind: CounterKind::Expression, id: expression_id.as_u32() }
4444
}
4545

46-
pub(crate) fn from_operand(operand: Operand) -> Self {
47-
match operand {
48-
Operand::Zero => Self::ZERO,
49-
Operand::Counter(id) => Self::counter_value_reference(id),
50-
Operand::Expression(id) => Self::expression(id),
46+
pub(crate) fn from_term(term: CovTerm) -> Self {
47+
match term {
48+
CovTerm::Zero => Self::ZERO,
49+
CovTerm::Counter(id) => Self::counter_value_reference(id),
50+
CovTerm::Expression(id) => Self::expression(id),
5151
}
5252
}
5353
}
@@ -73,17 +73,6 @@ pub struct CounterExpression {
7373
pub rhs: Counter,
7474
}
7575

76-
impl CounterExpression {
77-
/// The dummy expression `(0 - 0)` has a representation of all zeroes,
78-
/// making it marginally more efficient to initialize than `(0 + 0)`.
79-
pub(crate) const DUMMY: Self =
80-
Self { lhs: Counter::ZERO, kind: ExprKind::Subtract, rhs: Counter::ZERO };
81-
82-
pub fn new(lhs: Counter, kind: ExprKind, rhs: Counter) -> Self {
83-
Self { kind, lhs, rhs }
84-
}
85-
}
86-
8776
/// Corresponds to enum `llvm::coverage::CounterMappingRegion::RegionKind`.
8877
///
8978
/// Must match the layout of `LLVMRustCounterMappingRegionKind`.

compiler/rustc_codegen_llvm/src/coverageinfo/map_data.rs

+153-191
Large diffs are not rendered by default.

compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs

+13-17
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,8 @@ use rustc_hir::def::DefKind;
1010
use rustc_hir::def_id::DefId;
1111
use rustc_index::IndexVec;
1212
use rustc_middle::bug;
13-
use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrFlags;
1413
use rustc_middle::mir::coverage::CodeRegion;
15-
use rustc_middle::ty::TyCtxt;
14+
use rustc_middle::ty::{self, TyCtxt};
1615
use rustc_span::Symbol;
1716

1817
/// Generates and exports the Coverage Map.
@@ -60,10 +59,8 @@ pub fn finalize(cx: &CodegenCx<'_, '_>) {
6059

6160
// Encode coverage mappings and generate function records
6261
let mut function_data = Vec::new();
63-
for (instance, mut function_coverage) in function_coverage_map {
62+
for (instance, function_coverage) in function_coverage_map {
6463
debug!("Generate function coverage for {}, {:?}", cx.codegen_unit.name(), instance);
65-
function_coverage.simplify_expressions();
66-
let function_coverage = function_coverage;
6764

6865
let mangled_function_name = tcx.symbol_name(instance).name;
6966
let source_hash = function_coverage.source_hash();
@@ -170,10 +167,11 @@ fn encode_mappings_for_function(
170167
let mut virtual_file_mapping = IndexVec::<u32, u32>::new();
171168
let mut mapping_regions = Vec::with_capacity(counter_regions.len());
172169

173-
// Sort the list of (counter, region) mapping pairs by region, so that they
174-
// can be grouped by filename. Prepare file IDs for each filename, and
175-
// prepare the mapping data so that we can pass it through FFI to LLVM.
176-
counter_regions.sort_by_key(|(_counter, region)| *region);
170+
// Sort and group the list of (counter, region) mapping pairs by filename.
171+
// (Preserve any further ordering imposed by `FunctionCoverage`.)
172+
// Prepare file IDs for each filename, and prepare the mapping data so that
173+
// we can pass it through FFI to LLVM.
174+
counter_regions.sort_by_key(|(_counter, region)| region.file_name);
177175
for counter_regions_for_file in
178176
counter_regions.group_by(|(_, a), (_, b)| a.file_name == b.file_name)
179177
{
@@ -331,16 +329,14 @@ fn add_unused_functions(cx: &CodegenCx<'_, '_>) {
331329
for non_codegenned_def_id in
332330
eligible_def_ids.into_iter().filter(|id| !codegenned_def_ids.contains(id))
333331
{
334-
let codegen_fn_attrs = tcx.codegen_fn_attrs(non_codegenned_def_id);
335-
336-
// If a function is marked `#[coverage(off)]`, then skip generating a
337-
// dead code stub for it.
338-
if codegen_fn_attrs.flags.contains(CodegenFnAttrFlags::NO_COVERAGE) {
339-
debug!("skipping unused fn marked #[coverage(off)]: {:?}", non_codegenned_def_id);
332+
// Skip any function that didn't have coverage data added to it by the
333+
// coverage instrumentor.
334+
let body = tcx.instance_mir(ty::InstanceDef::Item(non_codegenned_def_id));
335+
let Some(function_coverage_info) = body.function_coverage_info.as_deref() else {
340336
continue;
341-
}
337+
};
342338

343339
debug!("generating unused fn: {:?}", non_codegenned_def_id);
344-
cx.define_unused_fn(non_codegenned_def_id);
340+
cx.define_unused_fn(non_codegenned_def_id, function_coverage_info);
345341
}
346342
}

compiler/rustc_codegen_llvm/src/coverageinfo/mod.rs

+43-31
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ use rustc_hir as hir;
1616
use rustc_hir::def_id::DefId;
1717
use rustc_llvm::RustString;
1818
use rustc_middle::bug;
19-
use rustc_middle::mir::coverage::{CounterId, CoverageKind};
19+
use rustc_middle::mir::coverage::{CounterId, CoverageKind, FunctionCoverageInfo};
2020
use rustc_middle::mir::Coverage;
2121
use rustc_middle::ty;
2222
use rustc_middle::ty::layout::{FnAbiOf, HasTyCtxt};
@@ -88,56 +88,72 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> {
8888
/// For used/called functions, the coverageinfo was already added to the
8989
/// `function_coverage_map` (keyed by function `Instance`) during codegen.
9090
/// But in this case, since the unused function was _not_ previously
91-
/// codegenned, collect the coverage `CodeRegion`s from the MIR and add
92-
/// them. Since the function is never called, all of its `CodeRegion`s can be
93-
/// added as `unreachable_region`s.
94-
fn define_unused_fn(&self, def_id: DefId) {
91+
/// codegenned, collect the function coverage info from MIR and add an
92+
/// "unused" entry to the function coverage map.
93+
fn define_unused_fn(&self, def_id: DefId, function_coverage_info: &'tcx FunctionCoverageInfo) {
9594
let instance = declare_unused_fn(self, def_id);
9695
codegen_unused_fn_and_counter(self, instance);
97-
add_unused_function_coverage(self, instance, def_id);
96+
add_unused_function_coverage(self, instance, function_coverage_info);
9897
}
9998
}
10099

101100
impl<'tcx> CoverageInfoBuilderMethods<'tcx> for Builder<'_, '_, 'tcx> {
101+
#[instrument(level = "debug", skip(self))]
102102
fn add_coverage(&mut self, instance: Instance<'tcx>, coverage: &Coverage) {
103+
// Our caller should have already taken care of inlining subtleties,
104+
// so we can assume that counter/expression IDs in this coverage
105+
// statement are meaningful for the given instance.
106+
//
107+
// (Either the statement was not inlined and directly belongs to this
108+
// instance, or it was inlined *from* this instance.)
109+
103110
let bx = self;
104111

112+
let Some(function_coverage_info) =
113+
bx.tcx.instance_mir(instance.def).function_coverage_info.as_deref()
114+
else {
115+
debug!("function has a coverage statement but no coverage info");
116+
return;
117+
};
118+
105119
let Some(coverage_context) = bx.coverage_context() else { return };
106120
let mut coverage_map = coverage_context.function_coverage_map.borrow_mut();
107121
let func_coverage = coverage_map
108122
.entry(instance)
109-
.or_insert_with(|| FunctionCoverage::new(bx.tcx(), instance));
123+
.or_insert_with(|| FunctionCoverage::new(instance, function_coverage_info));
110124

111-
let Coverage { kind, code_regions } = coverage;
125+
let Coverage { kind } = coverage;
112126
match *kind {
113-
CoverageKind::Counter { function_source_hash, id } => {
114-
debug!(
115-
"ensuring function source hash is set for instance={:?}; function_source_hash={}",
116-
instance, function_source_hash,
117-
);
118-
func_coverage.set_function_source_hash(function_source_hash);
119-
func_coverage.add_counter(id, code_regions);
127+
CoverageKind::CounterIncrement { id } => {
128+
func_coverage.mark_counter_id_seen(id);
120129
// We need to explicitly drop the `RefMut` before calling into `instrprof_increment`,
121130
// as that needs an exclusive borrow.
122131
drop(coverage_map);
123132

124-
let coverageinfo = bx.tcx().coverageinfo(instance.def);
133+
// The number of counters passed to `llvm.instrprof.increment` might
134+
// be smaller than the number originally inserted by the instrumentor,
135+
// if some high-numbered counters were removed by MIR optimizations.
136+
// If so, LLVM's profiler runtime will use fewer physical counters.
137+
let num_counters =
138+
bx.tcx().coverage_ids_info(instance.def).max_counter_id.as_u32() + 1;
139+
assert!(
140+
num_counters as usize <= function_coverage_info.num_counters,
141+
"num_counters disagreement: query says {num_counters} but function info only has {}",
142+
function_coverage_info.num_counters
143+
);
125144

126145
let fn_name = bx.get_pgo_func_name_var(instance);
127-
let hash = bx.const_u64(function_source_hash);
128-
let num_counters = bx.const_u32(coverageinfo.num_counters);
146+
let hash = bx.const_u64(function_coverage_info.function_source_hash);
147+
let num_counters = bx.const_u32(num_counters);
129148
let index = bx.const_u32(id.as_u32());
130149
debug!(
131150
"codegen intrinsic instrprof.increment(fn_name={:?}, hash={:?}, num_counters={:?}, index={:?})",
132151
fn_name, hash, num_counters, index,
133152
);
134153
bx.instrprof_increment(fn_name, hash, num_counters, index);
135154
}
136-
CoverageKind::Expression { id, lhs, op, rhs } => {
137-
func_coverage.add_counter_expression(id, lhs, op, rhs, code_regions);
138-
}
139-
CoverageKind::Unreachable => {
140-
func_coverage.add_unreachable_regions(code_regions);
155+
CoverageKind::ExpressionUsed { id } => {
156+
func_coverage.mark_expression_id_seen(id);
141157
}
142158
}
143159
}
@@ -200,15 +216,11 @@ fn codegen_unused_fn_and_counter<'tcx>(cx: &CodegenCx<'_, 'tcx>, instance: Insta
200216
fn add_unused_function_coverage<'tcx>(
201217
cx: &CodegenCx<'_, 'tcx>,
202218
instance: Instance<'tcx>,
203-
def_id: DefId,
219+
function_coverage_info: &'tcx FunctionCoverageInfo,
204220
) {
205-
let tcx = cx.tcx;
206-
207-
let mut function_coverage = FunctionCoverage::unused(tcx, instance);
208-
for &code_region in tcx.covered_code_regions(def_id) {
209-
let code_region = std::slice::from_ref(code_region);
210-
function_coverage.add_unreachable_regions(code_region);
211-
}
221+
// An unused function's mappings will automatically be rewritten to map to
222+
// zero, because none of its counters/expressions are marked as seen.
223+
let function_coverage = FunctionCoverage::unused(instance, function_coverage_info);
212224

213225
if let Some(coverage_context) = cx.coverage_context() {
214226
coverage_context.function_coverage_map.borrow_mut().insert(instance, function_coverage);

compiler/rustc_middle/src/mir/coverage.rs

+72-33
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
//! Metadata from source code coverage analysis and instrumentation.
22
3+
use rustc_index::IndexVec;
34
use rustc_macros::HashStable;
45
use rustc_span::Symbol;
56

@@ -8,6 +9,11 @@ use std::fmt::{self, Debug, Formatter};
89
rustc_index::newtype_index! {
910
/// ID of a coverage counter. Values ascend from 0.
1011
///
12+
/// Before MIR inlining, counter IDs are local to their enclosing function.
13+
/// After MIR inlining, coverage statements may have been inlined into
14+
/// another function, so use the statement's source-scope to find which
15+
/// function/instance its IDs are meaningful for.
16+
///
1117
/// Note that LLVM handles counter IDs as `uint32_t`, so there is no need
1218
/// to use a larger representation on the Rust side.
1319
#[derive(HashStable)]
@@ -23,6 +29,11 @@ impl CounterId {
2329
rustc_index::newtype_index! {
2430
/// ID of a coverage-counter expression. Values ascend from 0.
2531
///
32+
/// Before MIR inlining, expression IDs are local to their enclosing function.
33+
/// After MIR inlining, coverage statements may have been inlined into
34+
/// another function, so use the statement's source-scope to find which
35+
/// function/instance its IDs are meaningful for.
36+
///
2637
/// Note that LLVM handles expression IDs as `uint32_t`, so there is no need
2738
/// to use a larger representation on the Rust side.
2839
#[derive(HashStable)]
@@ -35,19 +46,21 @@ impl ExpressionId {
3546
pub const START: Self = Self::from_u32(0);
3647
}
3748

38-
/// Operand of a coverage-counter expression.
49+
/// Enum that can hold a constant zero value, the ID of an physical coverage
50+
/// counter, or the ID of a coverage-counter expression.
3951
///
40-
/// Operands can be a constant zero value, an actual coverage counter, or another
41-
/// expression. Counter/expression operands are referred to by ID.
52+
/// This was originally only used for expression operands (and named `Operand`),
53+
/// but the zero/counter/expression distinction is also useful for representing
54+
/// the value of code/gap mappings, and the true/false arms of branch mappings.
4255
#[derive(Copy, Clone, PartialEq, Eq)]
4356
#[derive(TyEncodable, TyDecodable, Hash, HashStable, TypeFoldable, TypeVisitable)]
44-
pub enum Operand {
57+
pub enum CovTerm {
4558
Zero,
4659
Counter(CounterId),
4760
Expression(ExpressionId),
4861
}
4962

50-
impl Debug for Operand {
63+
impl Debug for CovTerm {
5164
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
5265
match self {
5366
Self::Zero => write!(f, "Zero"),
@@ -59,40 +72,31 @@ impl Debug for Operand {
5972

6073
#[derive(Clone, PartialEq, TyEncodable, TyDecodable, Hash, HashStable, TypeFoldable, TypeVisitable)]
6174
pub enum CoverageKind {
62-
Counter {
63-
function_source_hash: u64,
64-
/// ID of this counter within its enclosing function.
65-
/// Expressions in the same function can refer to it as an operand.
66-
id: CounterId,
67-
},
68-
Expression {
69-
/// ID of this coverage-counter expression within its enclosing function.
70-
/// Other expressions in the same function can refer to it as an operand.
71-
id: ExpressionId,
72-
lhs: Operand,
73-
op: Op,
74-
rhs: Operand,
75-
},
76-
Unreachable,
75+
/// Marks the point in MIR control flow represented by a coverage counter.
76+
///
77+
/// This is eventually lowered to `llvm.instrprof.increment` in LLVM IR.
78+
///
79+
/// If this statement does not survive MIR optimizations, any mappings that
80+
/// refer to this counter can have those references simplified to zero.
81+
CounterIncrement { id: CounterId },
82+
83+
/// Marks the point in MIR control-flow represented by a coverage expression.
84+
///
85+
/// If this statement does not survive MIR optimizations, any mappings that
86+
/// refer to this expression can have those references simplified to zero.
87+
///
88+
/// (This is only inserted for expression IDs that are directly used by
89+
/// mappings. Intermediate expressions with no direct mappings are
90+
/// retained/zeroed based on whether they are transitively used.)
91+
ExpressionUsed { id: ExpressionId },
7792
}
7893

7994
impl Debug for CoverageKind {
8095
fn fmt(&self, fmt: &mut Formatter<'_>) -> fmt::Result {
8196
use CoverageKind::*;
8297
match self {
83-
Counter { id, .. } => write!(fmt, "Counter({:?})", id.index()),
84-
Expression { id, lhs, op, rhs } => write!(
85-
fmt,
86-
"Expression({:?}) = {:?} {} {:?}",
87-
id.index(),
88-
lhs,
89-
match op {
90-
Op::Add => "+",
91-
Op::Subtract => "-",
92-
},
93-
rhs,
94-
),
95-
Unreachable => write!(fmt, "Unreachable"),
98+
CounterIncrement { id } => write!(fmt, "CounterIncrement({:?})", id.index()),
99+
ExpressionUsed { id } => write!(fmt, "ExpressionUsed({:?})", id.index()),
96100
}
97101
}
98102
}
@@ -133,3 +137,38 @@ impl Op {
133137
matches!(self, Self::Subtract)
134138
}
135139
}
140+
141+
#[derive(Clone, Debug)]
142+
#[derive(TyEncodable, TyDecodable, Hash, HashStable, TypeFoldable, TypeVisitable)]
143+
pub struct Expression {
144+
pub lhs: CovTerm,
145+
pub op: Op,
146+
pub rhs: CovTerm,
147+
}
148+
149+
#[derive(Clone, Debug)]
150+
#[derive(TyEncodable, TyDecodable, Hash, HashStable, TypeFoldable, TypeVisitable)]
151+
pub struct Mapping {
152+
pub code_region: CodeRegion,
153+
154+
/// Indicates whether this mapping uses a counter value, expression value,
155+
/// or zero value.
156+
///
157+
/// FIXME: When we add support for mapping kinds other than `Code`
158+
/// (e.g. branch regions, expansion regions), replace this with a dedicated
159+
/// mapping-kind enum.
160+
pub term: CovTerm,
161+
}
162+
163+
/// Stores per-function coverage information attached to a `mir::Body`,
164+
/// to be used in conjunction with the individual coverage statements injected
165+
/// into the function's basic blocks.
166+
#[derive(Clone, Debug)]
167+
#[derive(TyEncodable, TyDecodable, Hash, HashStable, TypeFoldable, TypeVisitable)]
168+
pub struct FunctionCoverageInfo {
169+
pub function_source_hash: u64,
170+
pub num_counters: usize,
171+
172+
pub expressions: IndexVec<ExpressionId, Expression>,
173+
pub mappings: Vec<Mapping>,
174+
}

0 commit comments

Comments
 (0)