@@ -97,66 +97,89 @@ impl Cleaner {
9797
9898 let utilization = |count : usize | ( count as f32 / state. secrets_capacity ( ) as f32 ) * 100.0 ;
9999
100- let mut id_entries_initial = 0usize ;
100+ let id_entries_initial = state . ids . len ( ) ;
101101 let mut id_entries_retired = 0usize ;
102102 let mut id_entries_active = 0usize ;
103- let mut address_entries_initial = 0usize ;
103+ let address_entries_initial = state . peers . len ( ) ;
104104 let mut address_entries_retired = 0usize ;
105105 let mut address_entries_active = 0usize ;
106106 let mut handshake_requests = 0usize ;
107107
108- // For non-retired entries, if it's time for them to handshake again, request a
109- // handshake to happen. This handshake will currently happen on the next request for this
110- // particular peer.
111- state. ids . retain ( |_, entry| {
112- id_entries_initial += 1 ;
108+ // We want to avoid taking long lived locks which affect gets on the maps (where we want
109+ // p100 latency to be in microseconds at most).
110+ //
111+ // Impeding *handshake* latency is much more acceptable though since this happens at most
112+ // once a minute and handshakes are of similar magnitude (~milliseconds/handshake, this is
113+ // also expected to run for single digit milliseconds).
114+ //
115+ // Note that we expect the queue to be an exhaustive list of entries - no entry should not
116+ // be in the queue but be in the maps for more than a few microseconds during a handshake
117+ // (when we pop from the queue to remove from the maps).
118+ let mut queue = state
119+ . eviction_queue
120+ . lock ( )
121+ . unwrap_or_else ( |e| e. into_inner ( ) ) ;
122+
123+ // This map is only accessed with queue lock held and in cleaner, so it is in practice
124+ // single threaded. No concurrent access is permitted.
125+ state. cleaner_peer_seen . clear ( ) ;
126+
127+ // FIXME: add metrics for queue depth?
128+ // These are sort of equivalent to the ID map -- so maybe not worth it for now unless we
129+ // can get something more interesting out of it.
130+ queue. retain ( |entry| {
131+ let Some ( entry) = entry. upgrade ( ) else {
132+ return false ;
133+ } ;
134+
135+ if entry. take_accessed_id ( ) {
136+ id_entries_active += 1 ;
137+ }
138+
139+ // Avoid double counting by making sure we have unique peer IPs.
140+ // We clear/take the accessed bit regardless of whether we're going to count it to
141+ // preserve the property that every cleaner run snapshots last ~minute.
142+ if entry. take_accessed_addr ( ) && state. cleaner_peer_seen . insert ( entry. clone ( ) ) . is_none ( )
143+ {
144+ address_entries_active += 1 ;
145+ }
113146
114147 let retained = if let Some ( retired_at) = entry. retired_at ( ) {
115148 // retain if we aren't yet ready to evict.
116149 current_epoch. saturating_sub ( retired_at) < eviction_cycles
117150 } else {
118- if entry. rehandshake_time ( ) <= now {
119- handshake_requests += 1 ;
120- state. request_handshake ( * entry. peer ( ) ) ;
121- entry. rehandshake_time_reschedule ( state. rehandshake_period ( ) ) ;
122- }
123-
124- // always retain
151+ // always retain non-retired entries.
125152 true
126153 } ;
127154
128155 if !retained {
129- id_entries_retired += 1 ;
156+ let ( id_removed, peer_removed) = state. evict ( & entry) ;
157+ if id_removed {
158+ id_entries_retired += 1 ;
159+ }
160+ if peer_removed {
161+ address_entries_retired += 1 ;
162+ }
163+ return false ;
130164 }
131165
132- if entry. take_accessed_id ( ) {
133- id_entries_active += 1 ;
166+ // Schedule re-handshaking
167+ if entry. rehandshake_time ( ) <= now {
168+ handshake_requests += 1 ;
169+ state. request_handshake ( * entry. peer ( ) ) ;
170+ entry. rehandshake_time_reschedule ( state. rehandshake_period ( ) ) ;
134171 }
135172
136- retained
173+ true
137174 } ) ;
138175
139- // Drop IP entries if we no longer have the path secret ID entry.
140- // FIXME: Don't require a loop to do this. This is likely somewhat slow since it takes a
141- // write lock + read lock essentially per-entry, but should be near-constant-time.
142- state. peers . retain ( |_, entry| {
143- address_entries_initial += 1 ;
176+ // Avoid retaining entries for longer than expected.
177+ state. cleaner_peer_seen . clear ( ) ;
144178
145- let retained = state. ids . contains_key ( entry. id ( ) ) ;
146-
147- if !retained {
148- address_entries_retired += 1 ;
149- }
150-
151- if entry. take_accessed_addr ( ) {
152- address_entries_active += 1 ;
153- }
154-
155- retained
156- } ) ;
179+ drop ( queue) ;
157180
158- let id_entries = id_entries_initial - id_entries_retired ;
159- let address_entries = address_entries_initial - address_entries_retired ;
181+ let id_entries = state . ids . len ( ) ;
182+ let address_entries = state . peers . len ( ) ;
160183
161184 state. subscriber ( ) . on_path_secret_map_cleaner_cycled (
162185 event:: builder:: PathSecretMapCleanerCycled {
0 commit comments