Skip to content

Commit 5a3d6c9

Browse files
authored
Rollup merge of #122326 - Zoxc:win-alloc-tweak, r=ChrisDenton
Optimize `process_heap_alloc` This optimizes `process_heap_alloc` introduced in #120205. From: ``` .text:0000000180027ED0 ; std::sys::pal::windows::alloc::process_heap_alloc::h703a613b3e25ff93 .text:0000000180027ED0 public _ZN3std3sys3pal7windows5alloc18process_heap_alloc17h703a613b3e25ff93E .text:0000000180027ED0 _ZN3std3sys3pal7windows5alloc18process_heap_alloc17h703a613b3e25ff93E proc near .text:0000000180027ED0 ; CODE XREF: std::sys::pal::common::alloc::realloc_fallback::hc4c96b4c24d03e77+23↑p .text:0000000180027ED0 ; std::sys::pal::common::alloc::realloc_fallback::hc4c96b4c24d03e77+55↑p ... .text:0000000180027ED0 push rsi .text:0000000180027ED1 push rdi .text:0000000180027ED2 sub rsp, 28h .text:0000000180027ED6 mov rsi, rdx .text:0000000180027ED9 mov edi, ecx .text:0000000180027EDB mov rcx, cs:_ZN3std3sys3pal7windows5alloc4HEAP17hb53ca4010cc29b62E ; std::sys::pal::windows::alloc::HEAP::hb53ca4010cc29b62 .text:0000000180027EE2 test rcx, rcx .text:0000000180027EE5 jnz short loc_180027EFC .text:0000000180027EE7 call cs:__imp_GetProcessHeap .text:0000000180027EED test rax, rax .text:0000000180027EF0 jz short loc_180027F0E .text:0000000180027EF2 mov rcx, rax .text:0000000180027EF5 mov cs:_ZN3std3sys3pal7windows5alloc4HEAP17hb53ca4010cc29b62E, rax ; std::sys::pal::windows::alloc::HEAP::hb53ca4010cc29b62 .text:0000000180027EFC .text:0000000180027EFC loc_180027EFC: ; CODE XREF: std::sys::pal::windows::alloc::process_heap_alloc::h703a613b3e25ff93+15↑j .text:0000000180027EFC mov edx, edi .text:0000000180027EFE mov r8, rsi .text:0000000180027F01 add rsp, 28h .text:0000000180027F05 pop rdi .text:0000000180027F06 pop rsi .text:0000000180027F07 jmp cs:__imp_HeapAlloc .text:0000000180027F0E ; --------------------------------------------------------------------------- .text:0000000180027F0E .text:0000000180027F0E loc_180027F0E: ; CODE XREF: std::sys::pal::windows::alloc::process_heap_alloc::h703a613b3e25ff93+20↑j .text:0000000180027F0E xor eax, eax .text:0000000180027F10 add rsp, 28h .text:0000000180027F14 pop rdi .text:0000000180027F15 pop rsi .text:0000000180027F16 retn .text:0000000180027F16 _ZN3std3sys3pal7windows5alloc18process_heap_alloc17h703a613b3e25ff93E endp ``` to ``` .text:0000000180027EE0 ; std::sys::pal::windows::alloc::process_heap_alloc::h70f9d61a631e5c16 .text:0000000180027EE0 public _ZN3std3sys3pal7windows5alloc18process_heap_alloc17h70f9d61a631e5c16E .text:0000000180027EE0 _ZN3std3sys3pal7windows5alloc18process_heap_alloc17h70f9d61a631e5c16E proc near .text:0000000180027EE0 ; CODE XREF: std::sys::pal::common::alloc::realloc_fallback::hc4c96b4c24d03e77+23↑p .text:0000000180027EE0 ; std::sys::pal::common::alloc::realloc_fallback::hc4c96b4c24d03e77+54↑p ... .text:0000000180027EE0 mov rcx, cs:_ZN3std3sys3pal7windows5alloc4HEAP17hb53ca4010cc29b62E ; std::sys::pal::windows::alloc::HEAP::hb53ca4010cc29b62 .text:0000000180027EE7 test rcx, rcx .text:0000000180027EEA jz short loc_180027EF3 .text:0000000180027EEC jmp cs:__imp_HeapAlloc .text:0000000180027EF3 ; --------------------------------------------------------------------------- .text:0000000180027EF3 .text:0000000180027EF3 loc_180027EF3: ; CODE XREF: std::sys::pal::windows::alloc::process_heap_alloc::h70f9d61a631e5c16+A↑j .text:0000000180027EF3 mov ecx, edx .text:0000000180027EF5 mov rdx, r8 .text:0000000180027EF8 jmp std__sys__pal__windows__alloc__process_heap_init_and_alloc .text:0000000180027EF8 _ZN3std3sys3pal7windows5alloc18process_heap_alloc17h70f9d61a631e5c16E endp ``` r? `@ChrisDenton`
2 parents 6f15816 + 50760aa commit 5a3d6c9

File tree

1 file changed

+36
-19
lines changed

1 file changed

+36
-19
lines changed

library/std/src/sys/pal/windows/alloc.rs

+36-19
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ use crate::ptr;
66
use crate::sync::atomic::{AtomicPtr, Ordering};
77
use crate::sys::c;
88
use crate::sys::common::alloc::{realloc_fallback, MIN_ALIGN};
9+
use core::mem::MaybeUninit;
910

1011
#[cfg(test)]
1112
mod tests;
@@ -94,29 +95,30 @@ static HEAP: AtomicPtr<c_void> = AtomicPtr::new(ptr::null_mut());
9495
// a non-null handle returned by `GetProcessHeap`.
9596
#[inline]
9697
fn init_or_get_process_heap() -> c::HANDLE {
97-
let heap = HEAP.load(Ordering::Relaxed);
98-
if core::intrinsics::unlikely(heap.is_null()) {
99-
// `HEAP` has not yet been successfully initialized
100-
let heap = unsafe { GetProcessHeap() };
101-
if !heap.is_null() {
102-
// SAFETY: No locking is needed because within the same process,
103-
// successful calls to `GetProcessHeap` will always return the same value, even on different threads.
104-
HEAP.store(heap, Ordering::Release);
105-
106-
// SAFETY: `HEAP` contains a non-null handle returned by `GetProcessHeap`
107-
heap
108-
} else {
109-
// Could not get the current process heap.
110-
ptr::null_mut()
111-
}
112-
} else {
98+
// `HEAP` has not yet been successfully initialized
99+
let heap = unsafe { GetProcessHeap() };
100+
if !heap.is_null() {
101+
// SAFETY: No locking is needed because within the same process,
102+
// successful calls to `GetProcessHeap` will always return the same value, even on different threads.
103+
HEAP.store(heap, Ordering::Release);
104+
113105
// SAFETY: `HEAP` contains a non-null handle returned by `GetProcessHeap`
114106
heap
107+
} else {
108+
// Could not get the current process heap.
109+
ptr::null_mut()
115110
}
116111
}
117112

113+
/// This is outlined from `process_heap_alloc` so that `process_heap_alloc`
114+
/// does not need any stack allocations.
118115
#[inline(never)]
119-
fn process_heap_alloc(flags: c::DWORD, dwBytes: c::SIZE_T) -> c::LPVOID {
116+
#[cold]
117+
extern "C" fn process_heap_init_and_alloc(
118+
_heap: MaybeUninit<c::HANDLE>, // We pass this argument to match the ABI of `HeapAlloc`
119+
flags: c::DWORD,
120+
dwBytes: c::SIZE_T,
121+
) -> c::LPVOID {
120122
let heap = init_or_get_process_heap();
121123
if core::intrinsics::unlikely(heap.is_null()) {
122124
return ptr::null_mut();
@@ -125,6 +127,21 @@ fn process_heap_alloc(flags: c::DWORD, dwBytes: c::SIZE_T) -> c::LPVOID {
125127
unsafe { HeapAlloc(heap, flags, dwBytes) }
126128
}
127129

130+
#[inline(never)]
131+
fn process_heap_alloc(
132+
_heap: MaybeUninit<c::HANDLE>, // We pass this argument to match the ABI of `HeapAlloc`,
133+
flags: c::DWORD,
134+
dwBytes: c::SIZE_T,
135+
) -> c::LPVOID {
136+
let heap = HEAP.load(Ordering::Relaxed);
137+
if core::intrinsics::likely(!heap.is_null()) {
138+
// SAFETY: `heap` is a non-null handle returned by `GetProcessHeap`.
139+
unsafe { HeapAlloc(heap, flags, dwBytes) }
140+
} else {
141+
process_heap_init_and_alloc(MaybeUninit::uninit(), flags, dwBytes)
142+
}
143+
}
144+
128145
// Get a non-null handle to the default heap of the current process.
129146
// SAFETY: `HEAP` must have been successfully initialized.
130147
#[inline]
@@ -148,12 +165,12 @@ unsafe fn allocate(layout: Layout, zeroed: bool) -> *mut u8 {
148165

149166
if layout.align() <= MIN_ALIGN {
150167
// The returned pointer points to the start of an allocated block.
151-
process_heap_alloc(flags, layout.size()) as *mut u8
168+
process_heap_alloc(MaybeUninit::uninit(), flags, layout.size()) as *mut u8
152169
} else {
153170
// Allocate extra padding in order to be able to satisfy the alignment.
154171
let total = layout.align() + layout.size();
155172

156-
let ptr = process_heap_alloc(flags, total) as *mut u8;
173+
let ptr = process_heap_alloc(MaybeUninit::uninit(), flags, total) as *mut u8;
157174
if ptr.is_null() {
158175
// Allocation has failed.
159176
return ptr::null_mut();

0 commit comments

Comments
 (0)