|
| 1 | +// Thread parker implementation for Windows. |
| 2 | +// |
| 3 | +// This uses WaitOnAddress and WakeByAddressSingle if available (Windows 8+). |
| 4 | +// This modern API is exactly the same as the futex syscalls the Linux thread |
| 5 | +// parker uses. When These APIs are available, the implementation of this |
| 6 | +// thread parker matches the Linux thread parker exactly. |
| 7 | +// |
| 8 | +// However, when the modern API is not available, this implementation falls |
| 9 | +// back to NT Keyed Events, which are similar, but have some important |
| 10 | +// differences. These are available since Windows XP. |
| 11 | +// |
| 12 | +// WaitOnAddress first checks the state of the thread parker to make sure it no |
| 13 | +// WakeByAddressSingle calls can be missed between updating the parker state |
| 14 | +// and calling the function. |
| 15 | +// |
| 16 | +// NtWaitForKeyedEvent does not have this option, and unconditionally blocks |
| 17 | +// without checking the parker state first. Instead, NtReleaseKeyedEvent |
| 18 | +// (unlike WakeByAddressSingle) *blocks* until it woke up a thread waiting for |
| 19 | +// it by NtWaitForKeyedEvent. This way, we can be sure no events are missed, |
| 20 | +// but we need to be careful not to block unpark() if park_timeout() was woken |
| 21 | +// up by a timeout instead of unpark(). |
| 22 | +// |
| 23 | +// Unlike WaitOnAddress, NtWaitForKeyedEvent/NtReleaseKeyedEvent operate on a |
| 24 | +// HANDLE (created with NtCreateKeyedEvent). This means that we can be sure |
| 25 | +// a succesfully awoken park() was awoken by unpark() and not a |
| 26 | +// NtReleaseKeyedEvent call from some other code, as these events are not only |
| 27 | +// matched by the key (address of the parker (state)), but also by this HANDLE. |
| 28 | +// We lazily allocate this handle the first time it is needed. |
| 29 | +// |
| 30 | +// The fast path (calling park() after unpark() was already called) and the |
| 31 | +// possible states are the same for both implementations. This is used here to |
| 32 | +// make sure the fast path does not even check which API to use, but can return |
| 33 | +// right away, independent of the used API. Only the slow paths (which will |
| 34 | +// actually block/wake a thread) check which API is available and have |
| 35 | +// different implementations. |
| 36 | +// |
| 37 | +// Unfortunately, NT Keyed Events are an undocumented Windows API. However: |
| 38 | +// - This API is relatively simple with obvious behaviour, and there are |
| 39 | +// several (unofficial) articles documenting the details. [1] |
| 40 | +// - `parking_lot` has been using this API for years (on Windows versions |
| 41 | +// before Windows 8). [2] Many big projects extensively use parking_lot, |
| 42 | +// such as servo and the Rust compiler itself. |
| 43 | +// - It is the underlying API used by Windows SRW locks and Windows critical |
| 44 | +// sections. [3] [4] |
| 45 | +// - The source code of the implementations of Wine, ReactOs, and Windows XP |
| 46 | +// are available and match the expected behaviour. |
| 47 | +// - The main risk with an undocumented API is that it might change in the |
| 48 | +// future. But since we only use it for older versions of Windows, that's not |
| 49 | +// a problem. |
| 50 | +// - Even if these functions do not block or wake as we expect (which is |
| 51 | +// unlikely, see all previous points), this implementation would still be |
| 52 | +// memory safe. The NT Keyed Events API is only used to sleep/block in the |
| 53 | +// right place. |
| 54 | +// |
| 55 | +// [1]: http://www.locklessinc.com/articles/keyed_events/ |
| 56 | +// [2]: https://github.com/Amanieu/parking_lot/commit/43abbc964e |
| 57 | +// [3]: https://docs.microsoft.com/en-us/archive/msdn-magazine/2012/november/windows-with-c-the-evolution-of-synchronization-in-windows-and-c |
| 58 | +// [4]: Windows Internals, Part 1, ISBN 9780735671300 |
| 59 | + |
| 60 | +use crate::convert::TryFrom; |
| 61 | +use crate::ptr; |
| 62 | +use crate::sync::atomic::{ |
| 63 | + AtomicI8, AtomicUsize, |
| 64 | + Ordering::{Acquire, Relaxed, Release}, |
| 65 | +}; |
| 66 | +use crate::sys::{c, dur2timeout}; |
| 67 | +use crate::time::Duration; |
| 68 | + |
| 69 | +pub struct Parker { |
| 70 | + state: AtomicI8, |
| 71 | +} |
| 72 | + |
| 73 | +const PARKED: i8 = -1; |
| 74 | +const EMPTY: i8 = 0; |
| 75 | +const NOTIFIED: i8 = 1; |
| 76 | + |
| 77 | +// Notes about memory ordering: |
| 78 | +// |
| 79 | +// Memory ordering is only relevant for the relative ordering of operations |
| 80 | +// between different variables. Even Ordering::Relaxed guarantees a |
| 81 | +// monotonic/consistent order when looking at just a single atomic variable. |
| 82 | +// |
| 83 | +// So, since this parker is just a single atomic variable, we only need to look |
| 84 | +// at the ordering guarantees we need to provide to the 'outside world'. |
| 85 | +// |
| 86 | +// The only memory ordering guarantee that parking and unparking provide, is |
| 87 | +// that things which happened before unpark() are visible on the thread |
| 88 | +// returning from park() afterwards. Otherwise, it was effectively unparked |
| 89 | +// before unpark() was called while still consuming the 'token'. |
| 90 | +// |
| 91 | +// In other words, unpark() needs to synchronize with the part of park() that |
| 92 | +// consumes the token and returns. |
| 93 | +// |
| 94 | +// This is done with a release-acquire synchronization, by using |
| 95 | +// Ordering::Release when writing NOTIFIED (the 'token') in unpark(), and using |
| 96 | +// Ordering::Acquire when reading this state in park() after waking up. |
| 97 | +impl Parker { |
| 98 | + pub fn new() -> Self { |
| 99 | + Self { state: AtomicI8::new(EMPTY) } |
| 100 | + } |
| 101 | + |
| 102 | + // Assumes this is only called by the thread that owns the Parker, |
| 103 | + // which means that `self.state != PARKED`. |
| 104 | + pub unsafe fn park(&self) { |
| 105 | + // Change NOTIFIED=>EMPTY or EMPTY=>PARKED, and directly return in the |
| 106 | + // first case. |
| 107 | + if self.state.fetch_sub(1, Acquire) == NOTIFIED { |
| 108 | + return; |
| 109 | + } |
| 110 | + |
| 111 | + if c::WaitOnAddress::is_available() { |
| 112 | + loop { |
| 113 | + // Wait for something to happen, assuming it's still set to PARKED. |
| 114 | + c::WaitOnAddress(self.ptr(), &PARKED as *const _ as c::LPVOID, 1, c::INFINITE); |
| 115 | + // Change NOTIFIED=>EMPTY but leave PARKED alone. |
| 116 | + if self.state.compare_and_swap(NOTIFIED, EMPTY, Acquire) == NOTIFIED { |
| 117 | + // Actually woken up by unpark(). |
| 118 | + return; |
| 119 | + } else { |
| 120 | + // Spurious wake up. We loop to try again. |
| 121 | + } |
| 122 | + } |
| 123 | + } else { |
| 124 | + // Wait for unpark() to produce this event. |
| 125 | + c::NtWaitForKeyedEvent(keyed_event_handle(), self.ptr(), 0, ptr::null_mut()); |
| 126 | + // Set the state back to EMPTY (from either PARKED or NOTIFIED). |
| 127 | + // Note that we don't just write EMPTY, but use swap() to also |
| 128 | + // include an acquire-ordered read to synchronize with unpark()'s |
| 129 | + // release-ordered write. |
| 130 | + self.state.swap(EMPTY, Acquire); |
| 131 | + } |
| 132 | + } |
| 133 | + |
| 134 | + // Assumes this is only called by the thread that owns the Parker, |
| 135 | + // which means that `self.state != PARKED`. |
| 136 | + pub unsafe fn park_timeout(&self, timeout: Duration) { |
| 137 | + // Change NOTIFIED=>EMPTY or EMPTY=>PARKED, and directly return in the |
| 138 | + // first case. |
| 139 | + if self.state.fetch_sub(1, Acquire) == NOTIFIED { |
| 140 | + return; |
| 141 | + } |
| 142 | + |
| 143 | + if c::WaitOnAddress::is_available() { |
| 144 | + // Wait for something to happen, assuming it's still set to PARKED. |
| 145 | + c::WaitOnAddress(self.ptr(), &PARKED as *const _ as c::LPVOID, 1, dur2timeout(timeout)); |
| 146 | + // Set the state back to EMPTY (from either PARKED or NOTIFIED). |
| 147 | + // Note that we don't just write EMPTY, but use swap() to also |
| 148 | + // include an acquire-ordered read to synchronize with unpark()'s |
| 149 | + // release-ordered write. |
| 150 | + if self.state.swap(EMPTY, Acquire) == NOTIFIED { |
| 151 | + // Actually woken up by unpark(). |
| 152 | + } else { |
| 153 | + // Timeout or spurious wake up. |
| 154 | + // We return either way, because we can't easily tell if it was the |
| 155 | + // timeout or not. |
| 156 | + } |
| 157 | + } else { |
| 158 | + // Need to wait for unpark() using NtWaitForKeyedEvent. |
| 159 | + let handle = keyed_event_handle(); |
| 160 | + |
| 161 | + // NtWaitForKeyedEvent uses a unit of 100ns, and uses negative |
| 162 | + // values to indicate a relative time on the monotonic clock. |
| 163 | + // This is documented here for the underlying KeWaitForSingleObject function: |
| 164 | + // https://docs.microsoft.com/en-us/windows-hardware/drivers/ddi/wdm/nf-wdm-kewaitforsingleobject |
| 165 | + let mut timeout = match i64::try_from((timeout.as_nanos() + 99) / 100) { |
| 166 | + Ok(t) => -t, |
| 167 | + Err(_) => i64::MIN, |
| 168 | + }; |
| 169 | + |
| 170 | + // Wait for unpark() to produce this event. |
| 171 | + let unparked = |
| 172 | + c::NtWaitForKeyedEvent(handle, self.ptr(), 0, &mut timeout) == c::STATUS_SUCCESS; |
| 173 | + |
| 174 | + // Set the state back to EMPTY (from either PARKED or NOTIFIED). |
| 175 | + let prev_state = self.state.swap(EMPTY, Acquire); |
| 176 | + |
| 177 | + if !unparked && prev_state == NOTIFIED { |
| 178 | + // We were awoken by a timeout, not by unpark(), but the state |
| 179 | + // was set to NOTIFIED, which means we *just* missed an |
| 180 | + // unpark(), which is now blocked on us to wait for it. |
| 181 | + // Wait for it to consume the event and unblock that thread. |
| 182 | + c::NtWaitForKeyedEvent(handle, self.ptr(), 0, ptr::null_mut()); |
| 183 | + } |
| 184 | + } |
| 185 | + } |
| 186 | + |
| 187 | + pub fn unpark(&self) { |
| 188 | + // Change PARKED=>NOTIFIED, EMPTY=>NOTIFIED, or NOTIFIED=>NOTIFIED, and |
| 189 | + // wake the thread in the first case. |
| 190 | + // |
| 191 | + // Note that even NOTIFIED=>NOTIFIED results in a write. This is on |
| 192 | + // purpose, to make sure every unpark() has a release-acquire ordering |
| 193 | + // with park(). |
| 194 | + if self.state.swap(NOTIFIED, Release) == PARKED { |
| 195 | + if c::WakeByAddressSingle::is_available() { |
| 196 | + unsafe { |
| 197 | + c::WakeByAddressSingle(self.ptr()); |
| 198 | + } |
| 199 | + } else { |
| 200 | + // If we run NtReleaseKeyedEvent before the waiting thread runs |
| 201 | + // NtWaitForKeyedEvent, this (shortly) blocks until we can wake it up. |
| 202 | + // If the waiting thread wakes up before we run NtReleaseKeyedEvent |
| 203 | + // (e.g. due to a timeout), this blocks until we do wake up a thread. |
| 204 | + // To prevent this thread from blocking indefinitely in that case, |
| 205 | + // park_impl() will, after seeing the state set to NOTIFIED after |
| 206 | + // waking up, call NtWaitForKeyedEvent again to unblock us. |
| 207 | + unsafe { |
| 208 | + c::NtReleaseKeyedEvent(keyed_event_handle(), self.ptr(), 0, ptr::null_mut()); |
| 209 | + } |
| 210 | + } |
| 211 | + } |
| 212 | + } |
| 213 | + |
| 214 | + fn ptr(&self) -> c::LPVOID { |
| 215 | + &self.state as *const _ as c::LPVOID |
| 216 | + } |
| 217 | +} |
| 218 | + |
| 219 | +fn keyed_event_handle() -> c::HANDLE { |
| 220 | + const INVALID: usize = !0; |
| 221 | + static HANDLE: AtomicUsize = AtomicUsize::new(INVALID); |
| 222 | + match HANDLE.load(Relaxed) { |
| 223 | + INVALID => { |
| 224 | + let mut handle = c::INVALID_HANDLE_VALUE; |
| 225 | + unsafe { |
| 226 | + match c::NtCreateKeyedEvent( |
| 227 | + &mut handle, |
| 228 | + c::GENERIC_READ | c::GENERIC_WRITE, |
| 229 | + ptr::null_mut(), |
| 230 | + 0, |
| 231 | + ) { |
| 232 | + c::STATUS_SUCCESS => {} |
| 233 | + r => panic!("Unable to create keyed event handle: error {}", r), |
| 234 | + } |
| 235 | + } |
| 236 | + match HANDLE.compare_exchange(INVALID, handle as usize, Relaxed, Relaxed) { |
| 237 | + Ok(_) => handle, |
| 238 | + Err(h) => { |
| 239 | + // Lost the race to another thread initializing HANDLE before we did. |
| 240 | + // Closing our handle and using theirs instead. |
| 241 | + unsafe { |
| 242 | + c::CloseHandle(handle); |
| 243 | + } |
| 244 | + h as c::HANDLE |
| 245 | + } |
| 246 | + } |
| 247 | + } |
| 248 | + handle => handle as c::HANDLE, |
| 249 | + } |
| 250 | +} |
0 commit comments