Skip to content

Commit a003591

Browse files
committed
Auto merge of #83093 - the8472:smaller-instant-hammer, r=Amanieu
where available use AtomicU{64,128} instead of mutex for Instant backsliding protection This decreases the overhead of backsliding protection on x86 systems with unreliable TSC, e.g. windows. And on aarch64 systems where 128bit atomics are available. The following benchmarks were taken on x86_64 linux though by overriding `actually_monotonic()`, the numbers may look different on other platforms ``` # actually_monotonic() == true test time::tests::instant_contention_01_threads ... bench: 44 ns/iter (+/- 0) test time::tests::instant_contention_02_threads ... bench: 44 ns/iter (+/- 0) test time::tests::instant_contention_04_threads ... bench: 44 ns/iter (+/- 0) test time::tests::instant_contention_08_threads ... bench: 44 ns/iter (+/- 0) test time::tests::instant_contention_16_threads ... bench: 44 ns/iter (+/- 0) # 1x AtomicU64 test time::tests::instant_contention_01_threads ... bench: 65 ns/iter (+/- 0) test time::tests::instant_contention_02_threads ... bench: 157 ns/iter (+/- 20) test time::tests::instant_contention_04_threads ... bench: 281 ns/iter (+/- 53) test time::tests::instant_contention_08_threads ... bench: 555 ns/iter (+/- 77) test time::tests::instant_contention_16_threads ... bench: 883 ns/iter (+/- 107) # mutex test time::tests::instant_contention_01_threads ... bench: 60 ns/iter (+/- 2) test time::tests::instant_contention_02_threads ... bench: 770 ns/iter (+/- 231) test time::tests::instant_contention_04_threads ... bench: 1,347 ns/iter (+/- 45) test time::tests::instant_contention_08_threads ... bench: 2,693 ns/iter (+/- 114) test time::tests::instant_contention_16_threads ... bench: 5,244 ns/iter (+/- 487) ``` Since I don't have an arm machine with 128bit atomics I wasn't able to benchmark the AtomicU128 implementation.
2 parents 914a1e2 + cd82b42 commit a003591

File tree

3 files changed

+215
-12
lines changed

3 files changed

+215
-12
lines changed

library/std/src/time.rs

+2-10
Original file line numberDiff line numberDiff line change
@@ -12,15 +12,14 @@
1212
1313
#![stable(feature = "time", since = "1.3.0")]
1414

15+
mod monotonic;
1516
#[cfg(test)]
1617
mod tests;
1718

18-
use crate::cmp;
1919
use crate::error::Error;
2020
use crate::fmt;
2121
use crate::ops::{Add, AddAssign, Sub, SubAssign};
2222
use crate::sys::time;
23-
use crate::sys_common::mutex::StaticMutex;
2423
use crate::sys_common::FromInner;
2524

2625
#[stable(feature = "time", since = "1.3.0")]
@@ -249,14 +248,7 @@ impl Instant {
249248
return Instant(os_now);
250249
}
251250

252-
static LOCK: StaticMutex = StaticMutex::new();
253-
static mut LAST_NOW: time::Instant = time::Instant::zero();
254-
unsafe {
255-
let _lock = LOCK.lock();
256-
let now = cmp::max(LAST_NOW, os_now);
257-
LAST_NOW = now;
258-
Instant(now)
259-
}
251+
Instant(monotonic::monotonize(os_now))
260252
}
261253

262254
/// Returns the amount of time elapsed from another instant to this one.

library/std/src/time/monotonic.rs

+115
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
use crate::sys::time;
2+
3+
#[inline]
4+
pub(super) fn monotonize(raw: time::Instant) -> time::Instant {
5+
inner::monotonize(raw)
6+
}
7+
8+
#[cfg(all(target_has_atomic = "64", not(target_has_atomic = "128")))]
9+
pub mod inner {
10+
use crate::sync::atomic::AtomicU64;
11+
use crate::sync::atomic::Ordering::*;
12+
use crate::sys::time;
13+
use crate::time::Duration;
14+
15+
pub(in crate::time) const ZERO: time::Instant = time::Instant::zero();
16+
17+
// bits 30 and 31 are never used since the nanoseconds part never exceeds 10^9
18+
const UNINITIALIZED: u64 = 0b11 << 30;
19+
static MONO: AtomicU64 = AtomicU64::new(UNINITIALIZED);
20+
21+
#[inline]
22+
pub(super) fn monotonize(raw: time::Instant) -> time::Instant {
23+
monotonize_impl(&MONO, raw)
24+
}
25+
26+
#[inline]
27+
pub(in crate::time) fn monotonize_impl(mono: &AtomicU64, raw: time::Instant) -> time::Instant {
28+
let delta = raw.checked_sub_instant(&ZERO).unwrap();
29+
let secs = delta.as_secs();
30+
// occupies no more than 30 bits (10^9 seconds)
31+
let nanos = delta.subsec_nanos() as u64;
32+
33+
// This wraps around every 136 years (2^32 seconds).
34+
// To detect backsliding we use wrapping arithmetic and declare forward steps smaller
35+
// than 2^31 seconds as expected and everything else as a backslide which will be
36+
// monotonized.
37+
// This could be a problem for programs that call instants at intervals greater
38+
// than 68 years. Interstellar probes may want to ensure that actually_monotonic() is true.
39+
let packed = (secs << 32) | nanos;
40+
let old = mono.load(Relaxed);
41+
42+
if old == UNINITIALIZED || packed.wrapping_sub(old) < u64::MAX / 2 {
43+
mono.store(packed, Relaxed);
44+
raw
45+
} else {
46+
// Backslide occurred. We reconstruct monotonized time from the upper 32 bit of the
47+
// passed in value and the 64bits loaded from the atomic
48+
let seconds_lower = old >> 32;
49+
let mut seconds_upper = secs & 0xffff_ffff_0000_0000;
50+
if secs & 0xffff_ffff > seconds_lower {
51+
// Backslide caused the lower 32bit of the seconds part to wrap.
52+
// This must be the case because the seconds part is larger even though
53+
// we are in the backslide branch, i.e. the seconds count should be smaller or equal.
54+
//
55+
// We assume that backslides are smaller than 2^32 seconds
56+
// which means we need to add 1 to the upper half to restore it.
57+
//
58+
// Example:
59+
// most recent observed time: 0xA1_0000_0000_0000_0000u128
60+
// bits stored in AtomicU64: 0x0000_0000_0000_0000u64
61+
// backslide by 1s
62+
// caller time is 0xA0_ffff_ffff_0000_0000u128
63+
// -> we can fix up the upper half time by adding 1 << 32
64+
seconds_upper = seconds_upper.wrapping_add(0x1_0000_0000);
65+
}
66+
let secs = seconds_upper | seconds_lower;
67+
let nanos = old as u32;
68+
ZERO.checked_add_duration(&Duration::new(secs, nanos)).unwrap()
69+
}
70+
}
71+
}
72+
73+
#[cfg(target_has_atomic = "128")]
74+
pub mod inner {
75+
use crate::sync::atomic::AtomicU128;
76+
use crate::sync::atomic::Ordering::*;
77+
use crate::sys::time;
78+
use crate::time::Duration;
79+
80+
const ZERO: time::Instant = time::Instant::zero();
81+
static MONO: AtomicU128 = AtomicU128::new(0);
82+
83+
#[inline]
84+
pub(super) fn monotonize(raw: time::Instant) -> time::Instant {
85+
let delta = raw.checked_sub_instant(&ZERO).unwrap();
86+
// Split into seconds and nanos since Duration doesn't have a
87+
// constructor that takes an u128
88+
let secs = delta.as_secs() as u128;
89+
let nanos = delta.subsec_nanos() as u128;
90+
let timestamp: u128 = secs << 64 | nanos;
91+
let timestamp = MONO.fetch_max(timestamp, Relaxed).max(timestamp);
92+
let secs = (timestamp >> 64) as u64;
93+
let nanos = timestamp as u32;
94+
ZERO.checked_add_duration(&Duration::new(secs, nanos)).unwrap()
95+
}
96+
}
97+
98+
#[cfg(not(any(target_has_atomic = "64", target_has_atomic = "128")))]
99+
pub mod inner {
100+
use crate::cmp;
101+
use crate::sys::time;
102+
use crate::sys_common::mutex::StaticMutex;
103+
104+
#[inline]
105+
pub(super) fn monotonize(os_now: time::Instant) -> time::Instant {
106+
static LOCK: StaticMutex = StaticMutex::new();
107+
static mut LAST_NOW: time::Instant = time::Instant::zero();
108+
unsafe {
109+
let _lock = LOCK.lock();
110+
let now = cmp::max(LAST_NOW, os_now);
111+
LAST_NOW = now;
112+
now
113+
}
114+
}
115+
}

library/std/src/time/tests.rs

+98-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
use super::{Duration, Instant, SystemTime, UNIX_EPOCH};
2+
#[cfg(not(target_arch = "wasm32"))]
3+
use test::{black_box, Bencher};
24

35
macro_rules! assert_almost_eq {
46
($a:expr, $b:expr) => {{
@@ -13,8 +15,34 @@ macro_rules! assert_almost_eq {
1315
#[test]
1416
fn instant_monotonic() {
1517
let a = Instant::now();
16-
let b = Instant::now();
17-
assert!(b >= a);
18+
loop {
19+
let b = Instant::now();
20+
assert!(b >= a);
21+
if b > a {
22+
break;
23+
}
24+
}
25+
}
26+
27+
#[test]
28+
#[cfg(not(target_arch = "wasm32"))]
29+
fn instant_monotonic_concurrent() -> crate::thread::Result<()> {
30+
let threads: Vec<_> = (0..8)
31+
.map(|_| {
32+
crate::thread::spawn(|| {
33+
let mut old = Instant::now();
34+
for _ in 0..5_000_000 {
35+
let new = Instant::now();
36+
assert!(new >= old);
37+
old = new;
38+
}
39+
})
40+
})
41+
.collect();
42+
for t in threads {
43+
t.join()?;
44+
}
45+
Ok(())
1846
}
1947

2048
#[test]
@@ -163,3 +191,71 @@ fn since_epoch() {
163191
let hundred_twenty_years = thirty_years * 4;
164192
assert!(a < hundred_twenty_years);
165193
}
194+
195+
#[cfg(all(target_has_atomic = "64", not(target_has_atomic = "128")))]
196+
#[test]
197+
fn monotonizer_wrapping_backslide() {
198+
use super::monotonic::inner::{monotonize_impl, ZERO};
199+
use core::sync::atomic::AtomicU64;
200+
201+
let reference = AtomicU64::new(0);
202+
203+
let time = match ZERO.checked_add_duration(&Duration::from_secs(0xffff_ffff)) {
204+
Some(time) => time,
205+
None => {
206+
// platform cannot represent u32::MAX seconds so it won't have to deal with this kind
207+
// of overflow either
208+
return;
209+
}
210+
};
211+
212+
let monotonized = monotonize_impl(&reference, time);
213+
let expected = ZERO.checked_add_duration(&Duration::from_secs(1 << 32)).unwrap();
214+
assert_eq!(
215+
monotonized, expected,
216+
"64bit monotonizer should handle overflows in the seconds part"
217+
);
218+
}
219+
220+
macro_rules! bench_instant_threaded {
221+
($bench_name:ident, $thread_count:expr) => {
222+
#[bench]
223+
#[cfg(not(target_arch = "wasm32"))]
224+
fn $bench_name(b: &mut Bencher) -> crate::thread::Result<()> {
225+
use crate::sync::atomic::{AtomicBool, Ordering};
226+
use crate::sync::Arc;
227+
228+
let running = Arc::new(AtomicBool::new(true));
229+
230+
let threads: Vec<_> = (0..$thread_count)
231+
.map(|_| {
232+
let flag = Arc::clone(&running);
233+
crate::thread::spawn(move || {
234+
while flag.load(Ordering::Relaxed) {
235+
black_box(Instant::now());
236+
}
237+
})
238+
})
239+
.collect();
240+
241+
b.iter(|| {
242+
let a = Instant::now();
243+
let b = Instant::now();
244+
assert!(b >= a);
245+
});
246+
247+
running.store(false, Ordering::Relaxed);
248+
249+
for t in threads {
250+
t.join()?;
251+
}
252+
Ok(())
253+
}
254+
};
255+
}
256+
257+
bench_instant_threaded!(instant_contention_01_threads, 0);
258+
bench_instant_threaded!(instant_contention_02_threads, 1);
259+
bench_instant_threaded!(instant_contention_04_threads, 3);
260+
bench_instant_threaded!(instant_contention_08_threads, 7);
261+
bench_instant_threaded!(instant_contention_16_threads, 15);

0 commit comments

Comments
 (0)