-
Notifications
You must be signed in to change notification settings - Fork 5.3k
Closed
Labels
area-CodeGen-coreclrCLR JIT compiler in src/coreclr/src/jit and related components such as SuperPMICLR JIT compiler in src/coreclr/src/jit and related components such as SuperPMItenet-performancePerformance related issuePerformance related issue
Milestone
Description
Description
Using Unsafe.As on a struct results in a lot of unnecessary asm move instructions, especially when on method return and causes even more of them after inlining.
Configuration
Sharplab Core CLR v5.0.721.25508
Regression?
No idea
Data
using System.Runtime.Intrinsics.X86;
using System.Runtime.Intrinsics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
public static unsafe class Test
{
public static UInt128 A(UInt128 v)
{
return ShiftA(v);
}
public static Vector128<byte> B(Vector128<byte> v)
{
return ShiftB(v);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static UInt128 ShiftA(UInt128 c)
{
if (Sse2.IsSupported)
{
Vector128<byte> val1 = Sse2.ShiftLeftLogical128BitLane(
Unsafe.As<UInt128, Vector128<byte>>(ref c), 1);
return Unsafe.As<Vector128<byte>, UInt128>(ref val1);
}
// not important in this case
return default;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<byte> ShiftB(Vector128<byte> c)
{
if (Sse2.IsSupported)
{
return Sse2.ShiftLeftLogical128BitLane(c, 1);
}
// not important in this case
return default;
}
[StructLayout(LayoutKind.Explicit, Size = 16)]
public readonly struct UInt128
{
[FieldOffset(0)]
public readonly ulong long1;
[FieldOffset(8)]
public readonly ulong long2;
public UInt128(ulong a, ulong b)
{
long1 = a;
long2 = b;
}
}
}Output:
; Core CLR 5.0.921.35908 on amd64
Test.A(UInt128)
L0000: sub rsp, 0x28
L0004: vzeroupper
L0007: mov rax, [rdx]
L000a: mov rax, [rdx+8]
L000e: vmovupd xmm0, [rsp]
L0013: vpslldq xmm0, xmm0, 1
L0018: vmovapd [rsp], xmm0
L001d: vmovdqu xmm0, [rsp]
L0022: vmovdqu [rsp+0x18], xmm0
L0028: vmovdqu xmm0, [rsp+0x18]
L002e: vmovdqu [rcx], xmm0
L0032: mov rax, rcx
L0035: add rsp, 0x28
L0039: ret
Test.B(System.Runtime.Intrinsics.Vector128`1<Byte>)
L0000: vzeroupper
L0003: vmovupd xmm0, [rdx]
L0007: vpslldq xmm0, xmm0, 1
L000c: vmovupd [rcx], xmm0
L0010: mov rax, rcx
L0013: ret
Test.ShiftA(UInt128)
L0000: sub rsp, 0x18
L0004: vzeroupper
L0007: vmovupd xmm0, [rdx]
L000b: vpslldq xmm0, xmm0, 1
L0010: vmovapd [rsp], xmm0
L0015: vmovdqu xmm0, [rsp]
L001a: vmovdqu [rcx], xmm0
L001e: mov rax, rcx
L0021: add rsp, 0x18
L0025: ret
Test.ShiftB(System.Runtime.Intrinsics.Vector128`1<Byte>)
L0000: vzeroupper
L0003: vmovupd xmm0, [rdx]
L0007: vpslldq xmm0, xmm0, 1
L000c: vmovupd [rcx], xmm0
L0010: mov rax, rcx
L0013: ret
Test+UInt128..ctor(UInt64, UInt64)
L0000: mov [rcx], rdx
L0003: mov [rcx+8], r8
L0007: ret
category:cq
theme:structs
skill-level:expert
cost:medium
impact:medium
Metadata
Metadata
Assignees
Labels
area-CodeGen-coreclrCLR JIT compiler in src/coreclr/src/jit and related components such as SuperPMICLR JIT compiler in src/coreclr/src/jit and related components such as SuperPMItenet-performancePerformance related issuePerformance related issue