Skip to content

Unsafe.As resulting in lots of unnecessary moves. #55357

@MichalPetryka

Description

@MichalPetryka

Description

Using Unsafe.As on a struct results in a lot of unnecessary asm move instructions, especially when on method return and causes even more of them after inlining.

Configuration

Sharplab Core CLR v5.0.721.25508

Regression?

No idea

Data

Sharplab

using System.Runtime.Intrinsics.X86;
using System.Runtime.Intrinsics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;

public static unsafe class Test
{
    public static UInt128 A(UInt128 v)
    {
        return ShiftA(v);
    }
    
    public static Vector128<byte> B(Vector128<byte> v)
    {
        return ShiftB(v);
    }
    
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
    public static UInt128 ShiftA(UInt128 c)
    {
        if (Sse2.IsSupported)
		{
            Vector128<byte> val1 = Sse2.ShiftLeftLogical128BitLane(
                Unsafe.As<UInt128, Vector128<byte>>(ref c), 1);
            return Unsafe.As<Vector128<byte>, UInt128>(ref val1);
		}
        
        // not important in this case
        return default;
    }
    
    [MethodImpl(MethodImplOptions.AggressiveInlining)]
    public static Vector128<byte> ShiftB(Vector128<byte> c)
    {
        if (Sse2.IsSupported)
		{
            return Sse2.ShiftLeftLogical128BitLane(c, 1);
		}
        
        // not important in this case
		return default;
    }
    
    [StructLayout(LayoutKind.Explicit, Size = 16)]
    public readonly struct UInt128
    {
        [FieldOffset(0)]
        public readonly ulong long1;
        [FieldOffset(8)]
        public readonly ulong long2;
        
        public UInt128(ulong a, ulong b)
        {
            long1 = a;
            long2 = b;
        }
    }
}

Output:

; Core CLR 5.0.921.35908 on amd64

Test.A(UInt128)
    L0000: sub rsp, 0x28
    L0004: vzeroupper
    L0007: mov rax, [rdx]
    L000a: mov rax, [rdx+8]
    L000e: vmovupd xmm0, [rsp]
    L0013: vpslldq xmm0, xmm0, 1
    L0018: vmovapd [rsp], xmm0
    L001d: vmovdqu xmm0, [rsp]
    L0022: vmovdqu [rsp+0x18], xmm0
    L0028: vmovdqu xmm0, [rsp+0x18]
    L002e: vmovdqu [rcx], xmm0
    L0032: mov rax, rcx
    L0035: add rsp, 0x28
    L0039: ret

Test.B(System.Runtime.Intrinsics.Vector128`1<Byte>)
    L0000: vzeroupper
    L0003: vmovupd xmm0, [rdx]
    L0007: vpslldq xmm0, xmm0, 1
    L000c: vmovupd [rcx], xmm0
    L0010: mov rax, rcx
    L0013: ret

Test.ShiftA(UInt128)
    L0000: sub rsp, 0x18
    L0004: vzeroupper
    L0007: vmovupd xmm0, [rdx]
    L000b: vpslldq xmm0, xmm0, 1
    L0010: vmovapd [rsp], xmm0
    L0015: vmovdqu xmm0, [rsp]
    L001a: vmovdqu [rcx], xmm0
    L001e: mov rax, rcx
    L0021: add rsp, 0x18
    L0025: ret

Test.ShiftB(System.Runtime.Intrinsics.Vector128`1<Byte>)
    L0000: vzeroupper
    L0003: vmovupd xmm0, [rdx]
    L0007: vpslldq xmm0, xmm0, 1
    L000c: vmovupd [rcx], xmm0
    L0010: mov rax, rcx
    L0013: ret

Test+UInt128..ctor(UInt64, UInt64)
    L0000: mov [rcx], rdx
    L0003: mov [rcx+8], r8
    L0007: ret

category:cq
theme:structs
skill-level:expert
cost:medium
impact:medium

Metadata

Metadata

Assignees

Labels

area-CodeGen-coreclrCLR JIT compiler in src/coreclr/src/jit and related components such as SuperPMItenet-performancePerformance related issue

Type

No type

Projects

No projects

Milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions