Skip to content

Commit 5c78b33

Browse files
refactor(s2n-quic-dc): Replace fixed maps with hashbrown::HashTable (#2477)
We still preserve the fixed-size nature, albeit at (potentially) greater memory cost, since hashbrown will resize up to 2x the actual max entry count due to having tombstones in the table. After that final growth we will incur somewhat expensive table-wide rehashing periodically, but this is sufficiently fast to be acceptable in practice.
1 parent 00e3371 commit 5c78b33

File tree

8 files changed

+319
-371
lines changed

8 files changed

+319
-371
lines changed

dc/s2n-quic-dc/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ s2n-codec = { version = "=0.53.0", path = "../../common/s2n-codec", default-feat
3737
s2n-quic-core = { version = "=0.53.0", path = "../../quic/s2n-quic-core", default-features = false }
3838
s2n-quic-platform = { version = "=0.53.0", path = "../../quic/s2n-quic-platform" }
3939
slotmap = "1"
40+
hashbrown = "0.15"
4041
thiserror = "2"
4142
tokio = { version = "1", default-features = false, features = ["sync"] }
4243
tracing = "0.1"

dc/s2n-quic-dc/src/credentials.rs

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,18 @@ pub mod testing;
2525
#[repr(C)]
2626
pub struct Id([u8; 16]);
2727

28-
impl std::hash::Hash for Id {
29-
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
28+
impl Id {
29+
pub(crate) fn to_hash(self) -> u64 {
3030
// The ID has very high quality entropy already, so write just one half of it to keep hash
3131
// costs as low as possible. For the main use of the Hash impl in the fixed-size ID map
3232
// this translates to just directly using these bytes for the indexing.
33-
state.write_u64(u64::from_ne_bytes(self.0[..8].try_into().unwrap()));
33+
u64::from_ne_bytes(self.0[..8].try_into().unwrap())
34+
}
35+
}
36+
37+
impl std::hash::Hash for Id {
38+
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
39+
state.write_u64(self.to_hash());
3440
}
3541
}
3642

dc/s2n-quic-dc/src/fixed_map.rs

Lines changed: 0 additions & 228 deletions
This file was deleted.

dc/s2n-quic-dc/src/lib.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@ pub mod credentials;
99
pub mod crypto;
1010
pub mod datagram;
1111
pub mod event;
12-
mod fixed_map;
1312
pub mod msg;
1413
pub mod packet;
1514
pub mod path;

dc/s2n-quic-dc/src/path/secret/map/cleaner.rs

Lines changed: 60 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -97,66 +97,89 @@ impl Cleaner {
9797

9898
let utilization = |count: usize| (count as f32 / state.secrets_capacity() as f32) * 100.0;
9999

100-
let mut id_entries_initial = 0usize;
100+
let id_entries_initial = state.ids.len();
101101
let mut id_entries_retired = 0usize;
102102
let mut id_entries_active = 0usize;
103-
let mut address_entries_initial = 0usize;
103+
let address_entries_initial = state.peers.len();
104104
let mut address_entries_retired = 0usize;
105105
let mut address_entries_active = 0usize;
106106
let mut handshake_requests = 0usize;
107107

108-
// For non-retired entries, if it's time for them to handshake again, request a
109-
// handshake to happen. This handshake will currently happen on the next request for this
110-
// particular peer.
111-
state.ids.retain(|_, entry| {
112-
id_entries_initial += 1;
108+
// We want to avoid taking long lived locks which affect gets on the maps (where we want
109+
// p100 latency to be in microseconds at most).
110+
//
111+
// Impeding *handshake* latency is much more acceptable though since this happens at most
112+
// once a minute and handshakes are of similar magnitude (~milliseconds/handshake, this is
113+
// also expected to run for single digit milliseconds).
114+
//
115+
// Note that we expect the queue to be an exhaustive list of entries - no entry should not
116+
// be in the queue but be in the maps for more than a few microseconds during a handshake
117+
// (when we pop from the queue to remove from the maps).
118+
let mut queue = state
119+
.eviction_queue
120+
.lock()
121+
.unwrap_or_else(|e| e.into_inner());
122+
123+
// This map is only accessed with queue lock held and in cleaner, so it is in practice
124+
// single threaded. No concurrent access is permitted.
125+
state.cleaner_peer_seen.clear();
126+
127+
// FIXME: add metrics for queue depth?
128+
// These are sort of equivalent to the ID map -- so maybe not worth it for now unless we
129+
// can get something more interesting out of it.
130+
queue.retain(|entry| {
131+
let Some(entry) = entry.upgrade() else {
132+
return false;
133+
};
134+
135+
if entry.take_accessed_id() {
136+
id_entries_active += 1;
137+
}
138+
139+
// Avoid double counting by making sure we have unique peer IPs.
140+
// We clear/take the accessed bit regardless of whether we're going to count it to
141+
// preserve the property that every cleaner run snapshots last ~minute.
142+
if entry.take_accessed_addr() && state.cleaner_peer_seen.insert(entry.clone()).is_none()
143+
{
144+
address_entries_active += 1;
145+
}
113146

114147
let retained = if let Some(retired_at) = entry.retired_at() {
115148
// retain if we aren't yet ready to evict.
116149
current_epoch.saturating_sub(retired_at) < eviction_cycles
117150
} else {
118-
if entry.rehandshake_time() <= now {
119-
handshake_requests += 1;
120-
state.request_handshake(*entry.peer());
121-
entry.rehandshake_time_reschedule(state.rehandshake_period());
122-
}
123-
124-
// always retain
151+
// always retain non-retired entries.
125152
true
126153
};
127154

128155
if !retained {
129-
id_entries_retired += 1;
156+
let (id_removed, peer_removed) = state.evict(&entry);
157+
if id_removed {
158+
id_entries_retired += 1;
159+
}
160+
if peer_removed {
161+
address_entries_retired += 1;
162+
}
163+
return false;
130164
}
131165

132-
if entry.take_accessed_id() {
133-
id_entries_active += 1;
166+
// Schedule re-handshaking
167+
if entry.rehandshake_time() <= now {
168+
handshake_requests += 1;
169+
state.request_handshake(*entry.peer());
170+
entry.rehandshake_time_reschedule(state.rehandshake_period());
134171
}
135172

136-
retained
173+
true
137174
});
138175

139-
// Drop IP entries if we no longer have the path secret ID entry.
140-
// FIXME: Don't require a loop to do this. This is likely somewhat slow since it takes a
141-
// write lock + read lock essentially per-entry, but should be near-constant-time.
142-
state.peers.retain(|_, entry| {
143-
address_entries_initial += 1;
176+
// Avoid retaining entries for longer than expected.
177+
state.cleaner_peer_seen.clear();
144178

145-
let retained = state.ids.contains_key(entry.id());
146-
147-
if !retained {
148-
address_entries_retired += 1;
149-
}
150-
151-
if entry.take_accessed_addr() {
152-
address_entries_active += 1;
153-
}
154-
155-
retained
156-
});
179+
drop(queue);
157180

158-
let id_entries = id_entries_initial - id_entries_retired;
159-
let address_entries = address_entries_initial - address_entries_retired;
181+
let id_entries = state.ids.len();
182+
let address_entries = state.peers.len();
160183

161184
state.subscriber().on_path_secret_map_cleaner_cycled(
162185
event::builder::PathSecretMapCleanerCycled {

0 commit comments

Comments
 (0)