-
Notifications
You must be signed in to change notification settings - Fork 5.3k
Description
Description
I believe the codegen for "safe constant stackallocs" could be improved, to match what can already be manually achieved by just declaring a local with [SkipLocalsInit], of a custom struct type of an explicit size. What I mean by "safe constant stackalloc":
- The stackalloc size must be known at compile time (just like for
[StructLayout(Size = ...)]) - The stackalloc must directly be assigned into a
Span<T>(Span<byte>?)
Here's the current codegen when using such a declaration in a method:
C# code (click to expand):
[SkipLocalsInit]
static void M1()
{
Span<byte> span = stackalloc byte[64];
Foo(span);
}asm x86-64 code (click to expand):
; .NET 5 (from sharplab)
Program.M1()
L0000: push rbp
L0001: sub rsp, 0x40
L0005: lea rbp, [rsp+0x20]
L000a: xor eax, eax
L000c: mov [rbp+0x10], rax
L0010: mov rax, 0xc45859a823ae
L001a: mov [rbp+8], rax
L001e: add rsp, 0x20
L0022: test [rsp], esp
L0025: sub rsp, 0x40
L0029: sub rsp, 0x20
L002d: lea rcx, [rsp+0x20]
L0032: mov eax, 0x40
L0037: lea rdx, [rbp+0x10]
L003b: mov [rdx], rcx
L003e: mov [rdx+8], eax
L0041: lea rcx, [rbp+0x10]
L0045: call Program.Foo(System.Span`1<Byte>)
L004a: mov rcx, 0xc45859a823ae
L0054: cmp [rbp+8], rcx
L0058: je short L005f
L005a: call 0x00007ffc9de2d430
L005f: nop
L0060: lea rsp, [rbp+0x20]
L0064: pop rbp
L0065: ret; .NET 6 (from a recent build, with Disasmo)
; Method Program:M1()
G_M41622_IG01:
push rbp
sub rsp, 64
lea rbp, [rsp+20H]
xor eax, eax
mov qword ptr [rbp+10H], rax
mov rax, 0xD1FFAB1E
mov qword ptr [rbp+08H], rax
;; bbWeight=1 PerfScore 4.25
G_M41622_IG02:
add rsp, 32
test dword ptr [rsp], esp
sub rsp, 64
sub rsp, 32
lea rcx, [rsp+20H]
mov eax, 64
mov bword ptr [rbp+10H], rcx
mov dword ptr [rbp+18H], eax
lea rcx, bword ptr [rbp+10H]
call Program.Program:Foo(System.Span`1[Byte])
mov rcx, 0xD1FFAB1E
cmp qword ptr [rbp+08H], rcx
je SHORT G_M41622_IG03
call CORINFO_HELP_FAIL_FAST
;; bbWeight=1 PerfScore 10.25
G_M41622_IG03:
nop
;; bbWeight=1 PerfScore 0.25
G_M41622_IG04:
lea rsp, [rbp+20H]
pop rbp
ret
;; bbWeight=1 PerfScore 2.00
; Total bytes of code: 99And here's the codegen by using that trick with a local struct type with explicit layout:
C# code (click to expand):
[StructLayout(LayoutKind.Explicit, Size = 64)]
struct Space { }
[SkipLocalsInit]
static void M2()
{
Space space;
Span<byte> span = new(&space, 64);
Foo(span);
}asm x86-64 code (click to expand):
; .NET 5 (from sharplab)
Program.M2()
L0000: sub rsp, 0x78
L0004: xor eax, eax
L0006: mov [rsp+0x28], rax
L000b: lea rcx, [rsp+0x38]
L0010: mov eax, 0x40
L0015: lea rdx, [rsp+0x28]
L001a: mov [rdx], rcx
L001d: mov [rdx+8], eax
L0020: lea rcx, [rsp+0x28]
L0025: call Program.Foo(System.Span`1<Byte>)
L002a: nop
L002b: add rsp, 0x78
L002f: ret; .NET 6 (from a recent build, with Disasmo)
; Method Program:M2()
G_M36245_IG01:
sub rsp, 120
xor eax, eax
mov qword ptr [rsp+28H], rax
;; bbWeight=1 PerfScore 1.50
G_M36245_IG02:
lea rcx, bword ptr [rsp+38H]
mov eax, 64
mov bword ptr [rsp+28H], rcx
mov dword ptr [rsp+30H], eax
lea rcx, bword ptr [rsp+28H]
call EncryptTest.Program:Foo(System.Span`1[Byte])
nop
;; bbWeight=1 PerfScore 4.50
G_M36245_IG03:
add rsp, 120
ret
;; bbWeight=1 PerfScore 1.25
; Total bytes of code: 46No stack cookie, no stack guard page check, no branches, no pushed registers - just all the good stuff 😄
It should be doable to do this under these two conditions, given that Span<T> already protects against buffer overruns (so no stack cookie is fine), and most importantly given that we can already get this same codegen, just with extra steps though?
Configuration
The .NET 6 builds with Disasmo are with a local checked build from ~2 weeks ago.
Regression?
Nope.
category:cq
theme:stack-allocation