Skip to content

Commit ab25a33

Browse files
committed
Auto merge of #3475 - RalfJung:reduce-reuse-recycle, r=RalfJung
Address reuse improvements and fixes - when an address gets reused, establish a happens-before link in the data race model - do not reuse stack addresses, and make the reuse rate configurable Fixes #3450
2 parents 5dfccc2 + f3b31c3 commit ab25a33

File tree

13 files changed

+174
-51
lines changed

13 files changed

+174
-51
lines changed

README.md

+5
Original file line numberDiff line numberDiff line change
@@ -295,6 +295,11 @@ up the sysroot. If you are using `miri` (the Miri driver) directly, see the
295295
Miri adds its own set of `-Z` flags, which are usually set via the `MIRIFLAGS`
296296
environment variable. We first document the most relevant and most commonly used flags:
297297

298+
* `-Zmiri-address-reuse-rate=<rate>` changes the probability that a freed *non-stack* allocation
299+
will be added to the pool for address reuse, and the probability that a new *non-stack* allocation
300+
will be taken from the pool. Stack allocations never get added to or taken from the pool. The
301+
default is `0.5`. Note that a very high reuse rate can mask concurrency bugs as address
302+
reuse induces synchronization between threads.
298303
* `-Zmiri-compare-exchange-weak-failure-rate=<rate>` changes the failure rate of
299304
`compare_exchange_weak` operations. The default is `0.8` (so 4 out of 5 weak ops will fail).
300305
You can change it to any value between `0.0` and `1.0`, where `1.0` means it

src/alloc_addresses/mod.rs

+34-18
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,8 @@ use rustc_span::Span;
1414
use rustc_target::abi::{Align, HasDataLayout, Size};
1515

1616
use crate::*;
17-
use reuse_pool::ReusePool;
17+
18+
use self::reuse_pool::ReusePool;
1819

1920
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
2021
pub enum ProvenanceMode {
@@ -77,7 +78,7 @@ impl GlobalStateInner {
7778
GlobalStateInner {
7879
int_to_ptr_map: Vec::default(),
7980
base_addr: FxHashMap::default(),
80-
reuse: ReusePool::new(),
81+
reuse: ReusePool::new(config.address_reuse_rate),
8182
exposed: FxHashSet::default(),
8283
next_base_addr: stack_addr,
8384
provenance_mode: config.provenance_mode,
@@ -141,7 +142,11 @@ trait EvalContextExtPriv<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
141142
}
142143
}
143144

144-
fn addr_from_alloc_id(&self, alloc_id: AllocId, _kind: MemoryKind) -> InterpResult<'tcx, u64> {
145+
fn addr_from_alloc_id(
146+
&self,
147+
alloc_id: AllocId,
148+
memory_kind: MemoryKind,
149+
) -> InterpResult<'tcx, u64> {
145150
let ecx = self.eval_context_ref();
146151
let mut global_state = ecx.machine.alloc_addresses.borrow_mut();
147152
let global_state = &mut *global_state;
@@ -159,9 +164,12 @@ trait EvalContextExtPriv<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
159164
assert!(!matches!(kind, AllocKind::Dead));
160165

161166
// This allocation does not have a base address yet, pick or reuse one.
162-
let base_addr = if let Some(reuse_addr) =
163-
global_state.reuse.take_addr(&mut *rng, size, align)
167+
let base_addr = if let Some((reuse_addr, clock)) =
168+
global_state.reuse.take_addr(&mut *rng, size, align, memory_kind)
164169
{
170+
if let Some(data_race) = &ecx.machine.data_race {
171+
data_race.validate_lock_acquire(&clock, ecx.get_active_thread());
172+
}
165173
reuse_addr
166174
} else {
167175
// We have to pick a fresh address.
@@ -329,14 +337,11 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
329337
}
330338
}
331339

332-
impl GlobalStateInner {
333-
pub fn free_alloc_id(
334-
&mut self,
335-
rng: &mut impl Rng,
336-
dead_id: AllocId,
337-
size: Size,
338-
align: Align,
339-
) {
340+
impl<'mir, 'tcx> MiriMachine<'mir, 'tcx> {
341+
pub fn free_alloc_id(&mut self, dead_id: AllocId, size: Size, align: Align, kind: MemoryKind) {
342+
let global_state = self.alloc_addresses.get_mut();
343+
let rng = self.rng.get_mut();
344+
340345
// We can *not* remove this from `base_addr`, since the interpreter design requires that we
341346
// be able to retrieve an AllocId + offset for any memory access *before* we check if the
342347
// access is valid. Specifically, `ptr_get_alloc` is called on each attempt at a memory
@@ -349,15 +354,26 @@ impl GlobalStateInner {
349354
// returns a dead allocation.
350355
// To avoid a linear scan we first look up the address in `base_addr`, and then find it in
351356
// `int_to_ptr_map`.
352-
let addr = *self.base_addr.get(&dead_id).unwrap();
353-
let pos = self.int_to_ptr_map.binary_search_by_key(&addr, |(addr, _)| *addr).unwrap();
354-
let removed = self.int_to_ptr_map.remove(pos);
357+
let addr = *global_state.base_addr.get(&dead_id).unwrap();
358+
let pos =
359+
global_state.int_to_ptr_map.binary_search_by_key(&addr, |(addr, _)| *addr).unwrap();
360+
let removed = global_state.int_to_ptr_map.remove(pos);
355361
assert_eq!(removed, (addr, dead_id)); // double-check that we removed the right thing
356362
// We can also remove it from `exposed`, since this allocation can anyway not be returned by
357363
// `alloc_id_from_addr` any more.
358-
self.exposed.remove(&dead_id);
364+
global_state.exposed.remove(&dead_id);
359365
// Also remember this address for future reuse.
360-
self.reuse.add_addr(rng, addr, size, align)
366+
global_state.reuse.add_addr(rng, addr, size, align, kind, || {
367+
let mut clock = concurrency::VClock::default();
368+
if let Some(data_race) = &self.data_race {
369+
data_race.validate_lock_release(
370+
&mut clock,
371+
self.threads.get_active_thread_id(),
372+
self.threads.active_thread_ref().current_span(),
373+
);
374+
}
375+
clock
376+
})
361377
}
362378
}
363379

src/alloc_addresses/reuse_pool.rs

+39-20
Original file line numberDiff line numberDiff line change
@@ -4,70 +4,89 @@ use rand::Rng;
44

55
use rustc_target::abi::{Align, Size};
66

7-
const MAX_POOL_SIZE: usize = 64;
7+
use crate::{concurrency::VClock, MemoryKind};
88

9-
// Just use fair coins, until we have evidence that other numbers are better.
10-
const ADDR_REMEMBER_CHANCE: f64 = 0.5;
11-
const ADDR_TAKE_CHANCE: f64 = 0.5;
9+
const MAX_POOL_SIZE: usize = 64;
1210

1311
/// The pool strikes a balance between exploring more possible executions and making it more likely
1412
/// to find bugs. The hypothesis is that bugs are more likely to occur when reuse happens for
1513
/// allocations with the same layout, since that can trigger e.g. ABA issues in a concurrent data
1614
/// structure. Therefore we only reuse allocations when size and alignment match exactly.
1715
#[derive(Debug)]
1816
pub struct ReusePool {
17+
address_reuse_rate: f64,
1918
/// The i-th element in `pool` stores allocations of alignment `2^i`. We store these reusable
2019
/// allocations as address-size pairs, the list must be sorted by the size.
2120
///
2221
/// Each of these maps has at most MAX_POOL_SIZE elements, and since alignment is limited to
2322
/// less than 64 different possible value, that bounds the overall size of the pool.
24-
pool: Vec<Vec<(u64, Size)>>,
23+
///
24+
/// We also store the clock from the thread that donated this pool element,
25+
/// to ensure synchronization with the thread that picks up this address.
26+
pool: Vec<Vec<(u64, Size, VClock)>>,
2527
}
2628

2729
impl ReusePool {
28-
pub fn new() -> Self {
29-
ReusePool { pool: vec![] }
30+
pub fn new(address_reuse_rate: f64) -> Self {
31+
ReusePool { address_reuse_rate, pool: vec![] }
3032
}
3133

32-
fn subpool(&mut self, align: Align) -> &mut Vec<(u64, Size)> {
34+
fn subpool(&mut self, align: Align) -> &mut Vec<(u64, Size, VClock)> {
3335
let pool_idx: usize = align.bytes().trailing_zeros().try_into().unwrap();
3436
if self.pool.len() <= pool_idx {
3537
self.pool.resize(pool_idx + 1, Vec::new());
3638
}
3739
&mut self.pool[pool_idx]
3840
}
3941

40-
pub fn add_addr(&mut self, rng: &mut impl Rng, addr: u64, size: Size, align: Align) {
42+
pub fn add_addr(
43+
&mut self,
44+
rng: &mut impl Rng,
45+
addr: u64,
46+
size: Size,
47+
align: Align,
48+
kind: MemoryKind,
49+
clock: impl FnOnce() -> VClock,
50+
) {
4151
// Let's see if we even want to remember this address.
42-
if !rng.gen_bool(ADDR_REMEMBER_CHANCE) {
52+
// We don't remember stack addresses: there's a lot of them (so the perf impact is big),
53+
// and we only want to reuse stack slots within the same thread or else we'll add a lot of
54+
// undesired synchronization.
55+
if kind == MemoryKind::Stack || !rng.gen_bool(self.address_reuse_rate) {
4356
return;
4457
}
4558
// Determine the pool to add this to, and where in the pool to put it.
4659
let subpool = self.subpool(align);
47-
let pos = subpool.partition_point(|(_addr, other_size)| *other_size < size);
60+
let pos = subpool.partition_point(|(_addr, other_size, _)| *other_size < size);
4861
// Make sure the pool does not grow too big.
4962
if subpool.len() >= MAX_POOL_SIZE {
5063
// Pool full. Replace existing element, or last one if this would be even bigger.
5164
let clamped_pos = pos.min(subpool.len() - 1);
52-
subpool[clamped_pos] = (addr, size);
65+
subpool[clamped_pos] = (addr, size, clock());
5366
return;
5467
}
5568
// Add address to pool, at the right position.
56-
subpool.insert(pos, (addr, size));
69+
subpool.insert(pos, (addr, size, clock()));
5770
}
5871

59-
pub fn take_addr(&mut self, rng: &mut impl Rng, size: Size, align: Align) -> Option<u64> {
60-
// Determine whether we'll even attempt a reuse.
61-
if !rng.gen_bool(ADDR_TAKE_CHANCE) {
72+
pub fn take_addr(
73+
&mut self,
74+
rng: &mut impl Rng,
75+
size: Size,
76+
align: Align,
77+
kind: MemoryKind,
78+
) -> Option<(u64, VClock)> {
79+
// Determine whether we'll even attempt a reuse. As above, we don't do reuse for stack addresses.
80+
if kind == MemoryKind::Stack || !rng.gen_bool(self.address_reuse_rate) {
6281
return None;
6382
}
6483
// Determine the pool to take this from.
6584
let subpool = self.subpool(align);
6685
// Let's see if we can find something of the right size. We want to find the full range of
6786
// such items, beginning with the first, so we can't use `binary_search_by_key`.
68-
let begin = subpool.partition_point(|(_addr, other_size)| *other_size < size);
87+
let begin = subpool.partition_point(|(_addr, other_size, _)| *other_size < size);
6988
let mut end = begin;
70-
while let Some((_addr, other_size)) = subpool.get(end) {
89+
while let Some((_addr, other_size, _)) = subpool.get(end) {
7190
if *other_size != size {
7291
break;
7392
}
@@ -80,8 +99,8 @@ impl ReusePool {
8099
// Pick a random element with the desired size.
81100
let idx = rng.gen_range(begin..end);
82101
// Remove it from the pool and return.
83-
let (chosen_addr, chosen_size) = subpool.remove(idx);
102+
let (chosen_addr, chosen_size, clock) = subpool.remove(idx);
84103
debug_assert!(chosen_size >= size && chosen_addr % align.bytes() == 0);
85-
Some(chosen_addr)
104+
Some((chosen_addr, clock))
86105
}
87106
}

src/bin/miri.rs

+14
Original file line numberDiff line numberDiff line change
@@ -542,6 +542,20 @@ fn main() {
542542
miri_config.tracked_alloc_ids.extend(ids);
543543
} else if arg == "-Zmiri-track-alloc-accesses" {
544544
miri_config.track_alloc_accesses = true;
545+
} else if let Some(param) = arg.strip_prefix("-Zmiri-address-reuse-rate=") {
546+
let rate = match param.parse::<f64>() {
547+
Ok(rate) if rate >= 0.0 && rate <= 1.0 => rate,
548+
Ok(_) =>
549+
show_error!(
550+
"-Zmiri-compare-exchange-weak-failure-rate must be between `0.0` and `1.0`"
551+
),
552+
Err(err) =>
553+
show_error!(
554+
"-Zmiri-compare-exchange-weak-failure-rate requires a `f64` between `0.0` and `1.0`: {}",
555+
err
556+
),
557+
};
558+
miri_config.address_reuse_rate = rate;
545559
} else if let Some(param) = arg.strip_prefix("-Zmiri-compare-exchange-weak-failure-rate=") {
546560
let rate = match param.parse::<f64>() {
547561
Ok(rate) if rate >= 0.0 && rate <= 1.0 => rate,

src/concurrency/mod.rs

+2
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,5 @@ pub mod init_once;
66
pub mod thread;
77
mod vector_clock;
88
pub mod weak_memory;
9+
10+
pub use vector_clock::VClock;

src/concurrency/thread.rs

+6
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,12 @@ impl<'mir, 'tcx> Thread<'mir, 'tcx> {
223223
// empty stacks.
224224
self.top_user_relevant_frame.or_else(|| self.stack.len().checked_sub(1))
225225
}
226+
227+
pub fn current_span(&self) -> Span {
228+
self.top_user_relevant_frame()
229+
.map(|frame_idx| self.stack[frame_idx].current_span())
230+
.unwrap_or(rustc_span::DUMMY_SP)
231+
}
226232
}
227233

228234
impl<'mir, 'tcx> std::fmt::Debug for Thread<'mir, 'tcx> {

src/eval.rs

+3
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,8 @@ pub struct MiriConfig {
150150
pub page_size: Option<u64>,
151151
/// Whether to collect a backtrace when each allocation is created, just in case it leaks.
152152
pub collect_leak_backtraces: bool,
153+
/// Probability for address reuse.
154+
pub address_reuse_rate: f64,
153155
}
154156

155157
impl Default for MiriConfig {
@@ -186,6 +188,7 @@ impl Default for MiriConfig {
186188
num_cpus: 1,
187189
page_size: None,
188190
collect_leak_backtraces: true,
191+
address_reuse_rate: 0.5,
189192
}
190193
}
191194
}

src/helpers.rs

+2-4
Original file line numberDiff line numberDiff line change
@@ -1280,9 +1280,7 @@ impl<'mir, 'tcx> MiriMachine<'mir, 'tcx> {
12801280
/// This function is backed by a cache, and can be assumed to be very fast.
12811281
/// It will work even when the stack is empty.
12821282
pub fn current_span(&self) -> Span {
1283-
self.top_user_relevant_frame()
1284-
.map(|frame_idx| self.stack()[frame_idx].current_span())
1285-
.unwrap_or(rustc_span::DUMMY_SP)
1283+
self.threads.active_thread_ref().current_span()
12861284
}
12871285

12881286
/// Returns the span of the *caller* of the current operation, again
@@ -1294,7 +1292,7 @@ impl<'mir, 'tcx> MiriMachine<'mir, 'tcx> {
12941292
// We need to go down at least to the caller (len - 2), or however
12951293
// far we have to go to find a frame in a local crate which is also not #[track_caller].
12961294
let frame_idx = self.top_user_relevant_frame().unwrap();
1297-
let frame_idx = cmp::min(frame_idx, self.stack().len().checked_sub(2).unwrap());
1295+
let frame_idx = cmp::min(frame_idx, self.stack().len().saturating_sub(2));
12981296
self.stack()[frame_idx].current_span()
12991297
}
13001298

src/machine.rs

+2-7
Original file line numberDiff line numberDiff line change
@@ -1282,7 +1282,7 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for MiriMachine<'mir, 'tcx> {
12821282
(alloc_id, prove_extra): (AllocId, Self::ProvenanceExtra),
12831283
size: Size,
12841284
align: Align,
1285-
_kind: MemoryKind,
1285+
kind: MemoryKind,
12861286
) -> InterpResult<'tcx> {
12871287
if machine.tracked_alloc_ids.contains(&alloc_id) {
12881288
machine.emit_diagnostic(NonHaltingDiagnostic::FreedAlloc(alloc_id));
@@ -1303,12 +1303,7 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for MiriMachine<'mir, 'tcx> {
13031303
{
13041304
*deallocated_at = Some(machine.current_span());
13051305
}
1306-
machine.alloc_addresses.get_mut().free_alloc_id(
1307-
machine.rng.get_mut(),
1308-
alloc_id,
1309-
size,
1310-
align,
1311-
);
1306+
machine.free_alloc_id(alloc_id, size, align, kind);
13121307
Ok(())
13131308
}
13141309

tests/fail/weak_memory/racing_mixed_size.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
// We want to control preemption here.
2-
//@compile-flags: -Zmiri-preemption-rate=0
2+
// Avoid accidental synchronization via address reuse.
3+
//@compile-flags: -Zmiri-preemption-rate=0 -Zmiri-address-reuse-rate=0
34

45
#![feature(core_intrinsics)]
56

tests/fail/weak_memory/racing_mixed_size_read.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
// We want to control preemption here.
2-
//@compile-flags: -Zmiri-preemption-rate=0
2+
// Avoid accidental synchronization via address reuse.
3+
//@compile-flags: -Zmiri-preemption-rate=0 -Zmiri-address-reuse-rate=0
34

45
use std::sync::atomic::Ordering::*;
56
use std::sync::atomic::{AtomicU16, AtomicU32};

0 commit comments

Comments
 (0)