Skip to content

Commit e7dff7e

Browse files
committed
Add memory::madvise::will_need_multiple_pages()
1 parent 5235d86 commit e7dff7e

3 files changed

Lines changed: 64 additions & 1 deletion

File tree

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,7 @@ integer-encoding = "4.0.2"
226226
itertools = "0.14.0"
227227
log = "0.4.27"
228228
memmap2 = "0.9.7"
229-
nix = { version = "0.29", features = ["fs"] }
229+
nix = { version = "0.29", features = ["fs", "feature"] }
230230
num-traits = "0.2.19"
231231
ordered-float = { version = "5.0.0", features = ["serde", "schemars"] }
232232
rayon = "1.11.0"

lib/common/memory/src/madvise.rs

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,3 +169,64 @@ fn populate_simple(slice: &[u8]) {
169169
.sum::<Wrapping<u8>>(),
170170
);
171171
}
172+
173+
/// Trigger readahead for a memory-mapped region by calling
174+
/// `madvise(MADV_WILLNEED)` on it.
175+
///
176+
/// Use-case: the `region` is inside `MADV_RANDOM` memory map, but it spans
177+
/// across more than one 4KiB page. If you read it in sequence, it will cause
178+
/// multiple page faults, thus multiple 4KiB I/O operations. Avoid this by
179+
/// calling this function before reading the region. It will prefetch the whole
180+
/// region in a single I/O operation. (if possible)
181+
///
182+
/// Note: if the region fits within a single page, this function is a no-op.
183+
#[cfg(unix)]
184+
pub fn will_need_multiple_pages(region: &[u8]) {
185+
let Some(page_mask) = *PAGE_SIZE_MASK else {
186+
return;
187+
};
188+
189+
// `madvise()` requires the address to be page-aligned.
190+
let addr = region.as_ptr().map_addr(|addr| addr & !page_mask);
191+
let length = region.len() + (region.as_ptr().addr() & page_mask);
192+
193+
if length <= page_mask {
194+
// Data fits within a single page, do nothing.
195+
return;
196+
}
197+
198+
// Safety: madvise(MADV_WILLNEED) is harmless. If the address is not valid
199+
// (not file-baked mmap or even if it is an arbitrary invalid address), it
200+
// will return an error, but it won't crash or cause an undefined behavior.
201+
let res = unsafe { nix::libc::madvise(addr as *mut _, length, nix::libc::MADV_WILLNEED) };
202+
if res != 0 {
203+
#[cfg(debug_assertions)]
204+
{
205+
let err = io::Error::last_os_error();
206+
panic!("Failed to call madvise(MADV_WILLNEED): {err}");
207+
}
208+
}
209+
}
210+
211+
#[cfg(not(unix))]
212+
pub fn will_need_multiple_pages(_region: &[u8]) {}
213+
214+
/// Page size mask. Typically 0xfff for 4KiB pages.
215+
#[cfg(unix)]
216+
static PAGE_SIZE_MASK: std::sync::LazyLock<Option<usize>> =
217+
std::sync::LazyLock::new(|| get_page_mask().inspect_err(|err| log::warn!("{err}")).ok());
218+
219+
#[cfg(unix)]
220+
fn get_page_mask() -> Result<usize, String> {
221+
let page_size = nix::unistd::sysconf(nix::unistd::SysconfVar::PAGE_SIZE)
222+
.map_err(|err| format!("Failed to get page size: {err}"))?
223+
.ok_or_else(|| "sysconf(PAGE_SIZE) returned None".to_string())?;
224+
let page_size = usize::try_from(page_size)
225+
.map_err(|_| format!("Failed to convert page size {page_size} to usize"))?;
226+
if !page_size.is_power_of_two() {
227+
// Assuming that page size is a power of two (which is true for all
228+
// known platforms) simplifies computations.
229+
return Err(format!("Page size {page_size} is not a power of two"));
230+
}
231+
Ok(page_size - 1)
232+
}

lib/segment/src/index/hnsw_index/graph_links/view.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,8 @@ impl GraphLinksView<'_> {
271271
let start = offsets.get(idx).unwrap() as usize;
272272
let end = offsets.get(idx + 1).unwrap() as usize;
273273

274+
memory::madvise::will_need_multiple_pages(&neighbors[start..end]);
275+
274276
// 1. The varint-encoded length (`N` in the doc).
275277
let (neighbors_count, neighbors_count_size) =
276278
u64::decode_var(&neighbors[start..end]).unwrap();

0 commit comments

Comments
 (0)