@@ -169,3 +169,64 @@ fn populate_simple(slice: &[u8]) {
169169 . sum :: < Wrapping < u8 > > ( ) ,
170170 ) ;
171171}
172+
173+ /// Trigger readahead for a memory-mapped region by calling
174+ /// `madvise(MADV_WILLNEED)` on it.
175+ ///
176+ /// Use-case: the `region` is inside `MADV_RANDOM` memory map, but it spans
177+ /// across more than one 4KiB page. If you read it in sequence, it will cause
178+ /// multiple page faults, thus multiple 4KiB I/O operations. Avoid this by
179+ /// calling this function before reading the region. It will prefetch the whole
180+ /// region in a single I/O operation. (if possible)
181+ ///
182+ /// Note: if the region fits within a single page, this function is a no-op.
183+ #[ cfg( unix) ]
184+ pub fn will_need_multiple_pages ( region : & [ u8 ] ) {
185+ let Some ( page_mask) = * PAGE_SIZE_MASK else {
186+ return ;
187+ } ;
188+
189+ // `madvise()` requires the address to be page-aligned.
190+ let addr = region. as_ptr ( ) . map_addr ( |addr| addr & !page_mask) ;
191+ let length = region. len ( ) + ( region. as_ptr ( ) . addr ( ) & page_mask) ;
192+
193+ if length <= page_mask {
194+ // Data fits within a single page, do nothing.
195+ return ;
196+ }
197+
198+ // Safety: madvise(MADV_WILLNEED) is harmless. If the address is not valid
199+ // (not file-baked mmap or even if it is an arbitrary invalid address), it
200+ // will return an error, but it won't crash or cause an undefined behavior.
201+ let res = unsafe { nix:: libc:: madvise ( addr as * mut _ , length, nix:: libc:: MADV_WILLNEED ) } ;
202+ if res != 0 {
203+ #[ cfg( debug_assertions) ]
204+ {
205+ let err = io:: Error :: last_os_error ( ) ;
206+ panic ! ( "Failed to call madvise(MADV_WILLNEED): {err}" ) ;
207+ }
208+ }
209+ }
210+
211+ #[ cfg( not( unix) ) ]
212+ pub fn will_need_multiple_pages ( _region : & [ u8 ] ) { }
213+
214+ /// Page size mask. Typically 0xfff for 4KiB pages.
215+ #[ cfg( unix) ]
216+ static PAGE_SIZE_MASK : std:: sync:: LazyLock < Option < usize > > =
217+ std:: sync:: LazyLock :: new ( || get_page_mask ( ) . inspect_err ( |err| log:: warn!( "{err}" ) ) . ok ( ) ) ;
218+
219+ #[ cfg( unix) ]
220+ fn get_page_mask ( ) -> Result < usize , String > {
221+ let page_size = nix:: unistd:: sysconf ( nix:: unistd:: SysconfVar :: PAGE_SIZE )
222+ . map_err ( |err| format ! ( "Failed to get page size: {err}" ) ) ?
223+ . ok_or_else ( || "sysconf(PAGE_SIZE) returned None" . to_string ( ) ) ?;
224+ let page_size = usize:: try_from ( page_size)
225+ . map_err ( |_| format ! ( "Failed to convert page size {page_size} to usize" ) ) ?;
226+ if !page_size. is_power_of_two ( ) {
227+ // Assuming that page size is a power of two (which is true for all
228+ // known platforms) simplifies computations.
229+ return Err ( format ! ( "Page size {page_size} is not a power of two" ) ) ;
230+ }
231+ Ok ( page_size - 1 )
232+ }
0 commit comments