Skip to content

[X86] Add atomic vector store tests for unaligned >1 sizes.#197372

Merged
jofrn merged 1 commit into
mainfrom
users/jofrn/atomic-store-unaligned-tests
May 18, 2026
Merged

[X86] Add atomic vector store tests for unaligned >1 sizes.#197372
jofrn merged 1 commit into
mainfrom
users/jofrn/atomic-store-unaligned-tests

Conversation

@jofrn

@jofrn jofrn commented May 13, 2026

Copy link
Copy Markdown
Contributor

Unaligned atomic vector stores with size >1 are lowered to calls.
Adding their tests separately here.

Store-side counterpart to #148896. Stacked below #197165.

Unaligned atomic vector stores with size >1 are lowered to calls.
Adding their tests separately here.
@llvmorg-github-actions

Copy link
Copy Markdown

@llvm/pr-subscribers-backend-x86

Author: jofrn

Changes

Unaligned atomic vector stores with size >1 are lowered to calls.
Adding their tests separately here.

Store-side counterpart to #148896.


Patch is 51.46 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/197372.diff

1 Files Affected:

  • (modified) llvm/test/CodeGen/X86/atomic-load-store.ll (+1068)
diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll
index 867a4acb791bc..75b694ddc774d 100644
--- a/llvm/test/CodeGen/X86/atomic-load-store.ll
+++ b/llvm/test/CodeGen/X86/atomic-load-store.ll
@@ -1102,3 +1102,1071 @@ define <32 x half> @atomic_vec32_half(ptr %x) nounwind {
   %ret = load atomic <32 x half>, ptr %x acquire, align 4
   ret <32 x half> %ret
 }
+
+define void @store_atomic_vec1_ptr_unaligned(ptr %x, <1 x ptr> %v) nounwind {
+; CHECK-O3-LABEL: store_atomic_vec1_ptr_unaligned:
+; CHECK-O3:       # %bb.0:
+; CHECK-O3-NEXT:    pushq %rax
+; CHECK-O3-NEXT:    movq %rdi, %rax
+; CHECK-O3-NEXT:    movq %rsi, (%rsp)
+; CHECK-O3-NEXT:    movq %rsp, %rdx
+; CHECK-O3-NEXT:    movl $8, %edi
+; CHECK-O3-NEXT:    movq %rax, %rsi
+; CHECK-O3-NEXT:    movl $3, %ecx
+; CHECK-O3-NEXT:    callq __atomic_store@PLT
+; CHECK-O3-NEXT:    popq %rax
+; CHECK-O3-NEXT:    retq
+;
+; CHECK-O0-LABEL: store_atomic_vec1_ptr_unaligned:
+; CHECK-O0:       # %bb.0:
+; CHECK-O0-NEXT:    pushq %rax
+; CHECK-O0-NEXT:    movq %rsi, %rax
+; CHECK-O0-NEXT:    movq %rdi, %rsi
+; CHECK-O0-NEXT:    movq %rax, (%rsp)
+; CHECK-O0-NEXT:    movl $8, %edi
+; CHECK-O0-NEXT:    movq %rsp, %rdx
+; CHECK-O0-NEXT:    movl $3, %ecx
+; CHECK-O0-NEXT:    callq __atomic_store@PLT
+; CHECK-O0-NEXT:    popq %rax
+; CHECK-O0-NEXT:    retq
+  store atomic <1 x ptr> %v, ptr %x release, align 4
+  ret void
+}
+
+define void @store_atomic_vec1_i64_unaligned(ptr %x, <1 x i64> %v) nounwind {
+; CHECK-O3-LABEL: store_atomic_vec1_i64_unaligned:
+; CHECK-O3:       # %bb.0:
+; CHECK-O3-NEXT:    pushq %rax
+; CHECK-O3-NEXT:    movq %rdi, %rax
+; CHECK-O3-NEXT:    movq %rsi, (%rsp)
+; CHECK-O3-NEXT:    movq %rsp, %rdx
+; CHECK-O3-NEXT:    movl $8, %edi
+; CHECK-O3-NEXT:    movq %rax, %rsi
+; CHECK-O3-NEXT:    movl $3, %ecx
+; CHECK-O3-NEXT:    callq __atomic_store@PLT
+; CHECK-O3-NEXT:    popq %rax
+; CHECK-O3-NEXT:    retq
+;
+; CHECK-O0-LABEL: store_atomic_vec1_i64_unaligned:
+; CHECK-O0:       # %bb.0:
+; CHECK-O0-NEXT:    pushq %rax
+; CHECK-O0-NEXT:    movq %rsi, %rax
+; CHECK-O0-NEXT:    movq %rdi, %rsi
+; CHECK-O0-NEXT:    movq %rax, (%rsp)
+; CHECK-O0-NEXT:    movl $8, %edi
+; CHECK-O0-NEXT:    movq %rsp, %rdx
+; CHECK-O0-NEXT:    movl $3, %ecx
+; CHECK-O0-NEXT:    callq __atomic_store@PLT
+; CHECK-O0-NEXT:    popq %rax
+; CHECK-O0-NEXT:    retq
+  store atomic <1 x i64> %v, ptr %x release, align 4
+  ret void
+}
+
+define void @store_atomic_vec1_double_unaligned(ptr %x, <1 x double> %v) nounwind {
+; CHECK-SSE-O3-LABEL: store_atomic_vec1_double_unaligned:
+; CHECK-SSE-O3:       # %bb.0:
+; CHECK-SSE-O3-NEXT:    pushq %rax
+; CHECK-SSE-O3-NEXT:    movq %rdi, %rsi
+; CHECK-SSE-O3-NEXT:    movsd %xmm0, (%rsp)
+; CHECK-SSE-O3-NEXT:    movq %rsp, %rdx
+; CHECK-SSE-O3-NEXT:    movl $8, %edi
+; CHECK-SSE-O3-NEXT:    movl $3, %ecx
+; CHECK-SSE-O3-NEXT:    callq __atomic_store@PLT
+; CHECK-SSE-O3-NEXT:    popq %rax
+; CHECK-SSE-O3-NEXT:    retq
+;
+; CHECK-AVX-O3-LABEL: store_atomic_vec1_double_unaligned:
+; CHECK-AVX-O3:       # %bb.0:
+; CHECK-AVX-O3-NEXT:    pushq %rax
+; CHECK-AVX-O3-NEXT:    movq %rdi, %rsi
+; CHECK-AVX-O3-NEXT:    vmovsd %xmm0, (%rsp)
+; CHECK-AVX-O3-NEXT:    movq %rsp, %rdx
+; CHECK-AVX-O3-NEXT:    movl $8, %edi
+; CHECK-AVX-O3-NEXT:    movl $3, %ecx
+; CHECK-AVX-O3-NEXT:    callq __atomic_store@PLT
+; CHECK-AVX-O3-NEXT:    popq %rax
+; CHECK-AVX-O3-NEXT:    retq
+;
+; CHECK-SSE-O0-LABEL: store_atomic_vec1_double_unaligned:
+; CHECK-SSE-O0:       # %bb.0:
+; CHECK-SSE-O0-NEXT:    pushq %rax
+; CHECK-SSE-O0-NEXT:    movq %rdi, %rsi
+; CHECK-SSE-O0-NEXT:    movsd %xmm0, (%rsp)
+; CHECK-SSE-O0-NEXT:    movl $8, %edi
+; CHECK-SSE-O0-NEXT:    movq %rsp, %rdx
+; CHECK-SSE-O0-NEXT:    movl $3, %ecx
+; CHECK-SSE-O0-NEXT:    callq __atomic_store@PLT
+; CHECK-SSE-O0-NEXT:    popq %rax
+; CHECK-SSE-O0-NEXT:    retq
+;
+; CHECK-AVX-O0-LABEL: store_atomic_vec1_double_unaligned:
+; CHECK-AVX-O0:       # %bb.0:
+; CHECK-AVX-O0-NEXT:    pushq %rax
+; CHECK-AVX-O0-NEXT:    movq %rdi, %rsi
+; CHECK-AVX-O0-NEXT:    vmovsd %xmm0, (%rsp)
+; CHECK-AVX-O0-NEXT:    movl $8, %edi
+; CHECK-AVX-O0-NEXT:    movq %rsp, %rdx
+; CHECK-AVX-O0-NEXT:    movl $3, %ecx
+; CHECK-AVX-O0-NEXT:    callq __atomic_store@PLT
+; CHECK-AVX-O0-NEXT:    popq %rax
+; CHECK-AVX-O0-NEXT:    retq
+  store atomic <1 x double> %v, ptr %x release, align 4
+  ret void
+}
+
+define void @store_atomic_vec2_i32_unaligned(ptr %x, <2 x i32> %v) nounwind {
+; CHECK-SSE-O3-LABEL: store_atomic_vec2_i32_unaligned:
+; CHECK-SSE-O3:       # %bb.0:
+; CHECK-SSE-O3-NEXT:    pushq %rax
+; CHECK-SSE-O3-NEXT:    movq %rdi, %rsi
+; CHECK-SSE-O3-NEXT:    movlps %xmm0, (%rsp)
+; CHECK-SSE-O3-NEXT:    movq %rsp, %rdx
+; CHECK-SSE-O3-NEXT:    movl $8, %edi
+; CHECK-SSE-O3-NEXT:    movl $3, %ecx
+; CHECK-SSE-O3-NEXT:    callq __atomic_store@PLT
+; CHECK-SSE-O3-NEXT:    popq %rax
+; CHECK-SSE-O3-NEXT:    retq
+;
+; CHECK-AVX-O3-LABEL: store_atomic_vec2_i32_unaligned:
+; CHECK-AVX-O3:       # %bb.0:
+; CHECK-AVX-O3-NEXT:    pushq %rax
+; CHECK-AVX-O3-NEXT:    movq %rdi, %rsi
+; CHECK-AVX-O3-NEXT:    vmovlps %xmm0, (%rsp)
+; CHECK-AVX-O3-NEXT:    movq %rsp, %rdx
+; CHECK-AVX-O3-NEXT:    movl $8, %edi
+; CHECK-AVX-O3-NEXT:    movl $3, %ecx
+; CHECK-AVX-O3-NEXT:    callq __atomic_store@PLT
+; CHECK-AVX-O3-NEXT:    popq %rax
+; CHECK-AVX-O3-NEXT:    retq
+;
+; CHECK-SSE-O0-LABEL: store_atomic_vec2_i32_unaligned:
+; CHECK-SSE-O0:       # %bb.0:
+; CHECK-SSE-O0-NEXT:    pushq %rax
+; CHECK-SSE-O0-NEXT:    movq %rdi, %rsi
+; CHECK-SSE-O0-NEXT:    movq %xmm0, (%rsp)
+; CHECK-SSE-O0-NEXT:    movl $8, %edi
+; CHECK-SSE-O0-NEXT:    movq %rsp, %rdx
+; CHECK-SSE-O0-NEXT:    movl $3, %ecx
+; CHECK-SSE-O0-NEXT:    callq __atomic_store@PLT
+; CHECK-SSE-O0-NEXT:    popq %rax
+; CHECK-SSE-O0-NEXT:    retq
+;
+; CHECK-AVX-O0-LABEL: store_atomic_vec2_i32_unaligned:
+; CHECK-AVX-O0:       # %bb.0:
+; CHECK-AVX-O0-NEXT:    pushq %rax
+; CHECK-AVX-O0-NEXT:    movq %rdi, %rsi
+; CHECK-AVX-O0-NEXT:    vmovq %xmm0, (%rsp)
+; CHECK-AVX-O0-NEXT:    movl $8, %edi
+; CHECK-AVX-O0-NEXT:    movq %rsp, %rdx
+; CHECK-AVX-O0-NEXT:    movl $3, %ecx
+; CHECK-AVX-O0-NEXT:    callq __atomic_store@PLT
+; CHECK-AVX-O0-NEXT:    popq %rax
+; CHECK-AVX-O0-NEXT:    retq
+  store atomic <2 x i32> %v, ptr %x release, align 4
+  ret void
+}
+
+define void @store_atomic_vec4_float_unaligned(ptr %x, <4 x float> %v) nounwind {
+; CHECK-SSE-O3-LABEL: store_atomic_vec4_float_unaligned:
+; CHECK-SSE-O3:       # %bb.0:
+; CHECK-SSE-O3-NEXT:    subq $24, %rsp
+; CHECK-SSE-O3-NEXT:    movq %rdi, %rsi
+; CHECK-SSE-O3-NEXT:    movaps %xmm0, (%rsp)
+; CHECK-SSE-O3-NEXT:    movq %rsp, %rdx
+; CHECK-SSE-O3-NEXT:    movl $16, %edi
+; CHECK-SSE-O3-NEXT:    movl $3, %ecx
+; CHECK-SSE-O3-NEXT:    callq __atomic_store@PLT
+; CHECK-SSE-O3-NEXT:    addq $24, %rsp
+; CHECK-SSE-O3-NEXT:    retq
+;
+; CHECK-AVX-O3-LABEL: store_atomic_vec4_float_unaligned:
+; CHECK-AVX-O3:       # %bb.0:
+; CHECK-AVX-O3-NEXT:    subq $24, %rsp
+; CHECK-AVX-O3-NEXT:    movq %rdi, %rsi
+; CHECK-AVX-O3-NEXT:    vmovaps %xmm0, (%rsp)
+; CHECK-AVX-O3-NEXT:    movq %rsp, %rdx
+; CHECK-AVX-O3-NEXT:    movl $16, %edi
+; CHECK-AVX-O3-NEXT:    movl $3, %ecx
+; CHECK-AVX-O3-NEXT:    callq __atomic_store@PLT
+; CHECK-AVX-O3-NEXT:    addq $24, %rsp
+; CHECK-AVX-O3-NEXT:    retq
+;
+; CHECK-SSE-O0-LABEL: store_atomic_vec4_float_unaligned:
+; CHECK-SSE-O0:       # %bb.0:
+; CHECK-SSE-O0-NEXT:    subq $24, %rsp
+; CHECK-SSE-O0-NEXT:    movq %rdi, %rsi
+; CHECK-SSE-O0-NEXT:    movaps %xmm0, (%rsp)
+; CHECK-SSE-O0-NEXT:    movl $16, %edi
+; CHECK-SSE-O0-NEXT:    movq %rsp, %rdx
+; CHECK-SSE-O0-NEXT:    movl $3, %ecx
+; CHECK-SSE-O0-NEXT:    callq __atomic_store@PLT
+; CHECK-SSE-O0-NEXT:    addq $24, %rsp
+; CHECK-SSE-O0-NEXT:    retq
+;
+; CHECK-AVX-O0-LABEL: store_atomic_vec4_float_unaligned:
+; CHECK-AVX-O0:       # %bb.0:
+; CHECK-AVX-O0-NEXT:    subq $24, %rsp
+; CHECK-AVX-O0-NEXT:    movq %rdi, %rsi
+; CHECK-AVX-O0-NEXT:    vmovaps %xmm0, (%rsp)
+; CHECK-AVX-O0-NEXT:    movl $16, %edi
+; CHECK-AVX-O0-NEXT:    movq %rsp, %rdx
+; CHECK-AVX-O0-NEXT:    movl $3, %ecx
+; CHECK-AVX-O0-NEXT:    callq __atomic_store@PLT
+; CHECK-AVX-O0-NEXT:    addq $24, %rsp
+; CHECK-AVX-O0-NEXT:    retq
+  store atomic <4 x float> %v, ptr %x release, align 4
+  ret void
+}
+
+define void @store_atomic_vec8_double_unaligned(ptr %x, <8 x double> %v) nounwind {
+; CHECK-SSE-O3-LABEL: store_atomic_vec8_double_unaligned:
+; CHECK-SSE-O3:       # %bb.0:
+; CHECK-SSE-O3-NEXT:    subq $72, %rsp
+; CHECK-SSE-O3-NEXT:    movq %rdi, %rsi
+; CHECK-SSE-O3-NEXT:    movaps %xmm3, {{[0-9]+}}(%rsp)
+; CHECK-SSE-O3-NEXT:    movaps %xmm2, {{[0-9]+}}(%rsp)
+; CHECK-SSE-O3-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp)
+; CHECK-SSE-O3-NEXT:    movaps %xmm0, (%rsp)
+; CHECK-SSE-O3-NEXT:    movq %rsp, %rdx
+; CHECK-SSE-O3-NEXT:    movl $64, %edi
+; CHECK-SSE-O3-NEXT:    movl $3, %ecx
+; CHECK-SSE-O3-NEXT:    callq __atomic_store@PLT
+; CHECK-SSE-O3-NEXT:    addq $72, %rsp
+; CHECK-SSE-O3-NEXT:    retq
+;
+; CHECK-AVX2-O3-LABEL: store_atomic_vec8_double_unaligned:
+; CHECK-AVX2-O3:       # %bb.0:
+; CHECK-AVX2-O3-NEXT:    subq $72, %rsp
+; CHECK-AVX2-O3-NEXT:    movq %rdi, %rsi
+; CHECK-AVX2-O3-NEXT:    vmovups %ymm1, {{[0-9]+}}(%rsp)
+; CHECK-AVX2-O3-NEXT:    vmovups %ymm0, (%rsp)
+; CHECK-AVX2-O3-NEXT:    movq %rsp, %rdx
+; CHECK-AVX2-O3-NEXT:    movl $64, %edi
+; CHECK-AVX2-O3-NEXT:    movl $3, %ecx
+; CHECK-AVX2-O3-NEXT:    vzeroupper
+; CHECK-AVX2-O3-NEXT:    callq __atomic_store@PLT
+; CHECK-AVX2-O3-NEXT:    addq $72, %rsp
+; CHECK-AVX2-O3-NEXT:    retq
+;
+; CHECK-AVX512-O3-LABEL: store_atomic_vec8_double_unaligned:
+; CHECK-AVX512-O3:       # %bb.0:
+; CHECK-AVX512-O3-NEXT:    subq $72, %rsp
+; CHECK-AVX512-O3-NEXT:    movq %rdi, %rsi
+; CHECK-AVX512-O3-NEXT:    vmovups %zmm0, (%rsp)
+; CHECK-AVX512-O3-NEXT:    movq %rsp, %rdx
+; CHECK-AVX512-O3-NEXT:    movl $64, %edi
+; CHECK-AVX512-O3-NEXT:    movl $3, %ecx
+; CHECK-AVX512-O3-NEXT:    vzeroupper
+; CHECK-AVX512-O3-NEXT:    callq __atomic_store@PLT
+; CHECK-AVX512-O3-NEXT:    addq $72, %rsp
+; CHECK-AVX512-O3-NEXT:    retq
+;
+; CHECK-SSE-O0-LABEL: store_atomic_vec8_double_unaligned:
+; CHECK-SSE-O0:       # %bb.0:
+; CHECK-SSE-O0-NEXT:    subq $72, %rsp
+; CHECK-SSE-O0-NEXT:    movq %rdi, %rsi
+; CHECK-SSE-O0-NEXT:    movapd %xmm0, (%rsp)
+; CHECK-SSE-O0-NEXT:    movapd %xmm1, {{[0-9]+}}(%rsp)
+; CHECK-SSE-O0-NEXT:    movapd %xmm2, {{[0-9]+}}(%rsp)
+; CHECK-SSE-O0-NEXT:    movapd %xmm3, {{[0-9]+}}(%rsp)
+; CHECK-SSE-O0-NEXT:    movl $64, %edi
+; CHECK-SSE-O0-NEXT:    movq %rsp, %rdx
+; CHECK-SSE-O0-NEXT:    movl $3, %ecx
+; CHECK-SSE-O0-NEXT:    callq __atomic_store@PLT
+; CHECK-SSE-O0-NEXT:    addq $72, %rsp
+; CHECK-SSE-O0-NEXT:    retq
+;
+; CHECK-AVX2-O0-LABEL: store_atomic_vec8_double_unaligned:
+; CHECK-AVX2-O0:       # %bb.0:
+; CHECK-AVX2-O0-NEXT:    subq $72, %rsp
+; CHECK-AVX2-O0-NEXT:    movq %rdi, %rsi
+; CHECK-AVX2-O0-NEXT:    vmovupd %ymm0, (%rsp)
+; CHECK-AVX2-O0-NEXT:    vmovupd %ymm1, {{[0-9]+}}(%rsp)
+; CHECK-AVX2-O0-NEXT:    movl $64, %edi
+; CHECK-AVX2-O0-NEXT:    movq %rsp, %rdx
+; CHECK-AVX2-O0-NEXT:    movl $3, %ecx
+; CHECK-AVX2-O0-NEXT:    vzeroupper
+; CHECK-AVX2-O0-NEXT:    callq __atomic_store@PLT
+; CHECK-AVX2-O0-NEXT:    addq $72, %rsp
+; CHECK-AVX2-O0-NEXT:    retq
+;
+; CHECK-AVX512-O0-LABEL: store_atomic_vec8_double_unaligned:
+; CHECK-AVX512-O0:       # %bb.0:
+; CHECK-AVX512-O0-NEXT:    subq $72, %rsp
+; CHECK-AVX512-O0-NEXT:    movq %rdi, %rsi
+; CHECK-AVX512-O0-NEXT:    vmovupd %zmm0, (%rsp)
+; CHECK-AVX512-O0-NEXT:    movl $64, %edi
+; CHECK-AVX512-O0-NEXT:    movq %rsp, %rdx
+; CHECK-AVX512-O0-NEXT:    movl $3, %ecx
+; CHECK-AVX512-O0-NEXT:    vzeroupper
+; CHECK-AVX512-O0-NEXT:    callq __atomic_store@PLT
+; CHECK-AVX512-O0-NEXT:    addq $72, %rsp
+; CHECK-AVX512-O0-NEXT:    retq
+  store atomic <8 x double> %v, ptr %x release, align 4
+  ret void
+}
+
+define void @store_atomic_vec16_bfloat_unaligned(ptr %x, <16 x bfloat> %v) nounwind {
+; CHECK-SSE-O3-LABEL: store_atomic_vec16_bfloat_unaligned:
+; CHECK-SSE-O3:       # %bb.0:
+; CHECK-SSE-O3-NEXT:    subq $40, %rsp
+; CHECK-SSE-O3-NEXT:    movq %rdi, %rsi
+; CHECK-SSE-O3-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp)
+; CHECK-SSE-O3-NEXT:    movaps %xmm0, (%rsp)
+; CHECK-SSE-O3-NEXT:    movq %rsp, %rdx
+; CHECK-SSE-O3-NEXT:    movl $32, %edi
+; CHECK-SSE-O3-NEXT:    movl $3, %ecx
+; CHECK-SSE-O3-NEXT:    callq __atomic_store@PLT
+; CHECK-SSE-O3-NEXT:    addq $40, %rsp
+; CHECK-SSE-O3-NEXT:    retq
+;
+; CHECK-AVX-O3-LABEL: store_atomic_vec16_bfloat_unaligned:
+; CHECK-AVX-O3:       # %bb.0:
+; CHECK-AVX-O3-NEXT:    subq $40, %rsp
+; CHECK-AVX-O3-NEXT:    movq %rdi, %rsi
+; CHECK-AVX-O3-NEXT:    vmovups %ymm0, (%rsp)
+; CHECK-AVX-O3-NEXT:    movq %rsp, %rdx
+; CHECK-AVX-O3-NEXT:    movl $32, %edi
+; CHECK-AVX-O3-NEXT:    movl $3, %ecx
+; CHECK-AVX-O3-NEXT:    vzeroupper
+; CHECK-AVX-O3-NEXT:    callq __atomic_store@PLT
+; CHECK-AVX-O3-NEXT:    addq $40, %rsp
+; CHECK-AVX-O3-NEXT:    retq
+;
+; CHECK-SSE2-O0-LABEL: store_atomic_vec16_bfloat_unaligned:
+; CHECK-SSE2-O0:       # %bb.0:
+; CHECK-SSE2-O0-NEXT:    subq $120, %rsp
+; CHECK-SSE2-O0-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; CHECK-SSE2-O0-NEXT:    pextrw $7, %xmm1, %eax
+; CHECK-SSE2-O0-NEXT:    shll $16, %eax
+; CHECK-SSE2-O0-NEXT:    movd %eax, %xmm2
+; CHECK-SSE2-O0-NEXT:    movss %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; CHECK-SSE2-O0-NEXT:    pextrw $6, %xmm1, %eax
+; CHECK-SSE2-O0-NEXT:    shll $16, %eax
+; CHECK-SSE2-O0-NEXT:    movd %eax, %xmm2
+; CHECK-SSE2-O0-NEXT:    movss %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; CHECK-SSE2-O0-NEXT:    pextrw $5, %xmm1, %eax
+; CHECK-SSE2-O0-NEXT:    shll $16, %eax
+; CHECK-SSE2-O0-NEXT:    movd %eax, %xmm2
+; CHECK-SSE2-O0-NEXT:    movss %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; CHECK-SSE2-O0-NEXT:    pextrw $4, %xmm1, %eax
+; CHECK-SSE2-O0-NEXT:    shll $16, %eax
+; CHECK-SSE2-O0-NEXT:    movd %eax, %xmm2
+; CHECK-SSE2-O0-NEXT:    movss %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; CHECK-SSE2-O0-NEXT:    pextrw $3, %xmm1, %eax
+; CHECK-SSE2-O0-NEXT:    shll $16, %eax
+; CHECK-SSE2-O0-NEXT:    movd %eax, %xmm2
+; CHECK-SSE2-O0-NEXT:    movss %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; CHECK-SSE2-O0-NEXT:    pextrw $2, %xmm1, %eax
+; CHECK-SSE2-O0-NEXT:    shll $16, %eax
+; CHECK-SSE2-O0-NEXT:    movd %eax, %xmm2
+; CHECK-SSE2-O0-NEXT:    movss %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; CHECK-SSE2-O0-NEXT:    pextrw $1, %xmm1, %eax
+; CHECK-SSE2-O0-NEXT:    shll $16, %eax
+; CHECK-SSE2-O0-NEXT:    movd %eax, %xmm2
+; CHECK-SSE2-O0-NEXT:    movss %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; CHECK-SSE2-O0-NEXT:    movd %xmm1, %eax
+; CHECK-SSE2-O0-NEXT:    shll $16, %eax
+; CHECK-SSE2-O0-NEXT:    movd %eax, %xmm1
+; CHECK-SSE2-O0-NEXT:    movss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; CHECK-SSE2-O0-NEXT:    pextrw $7, %xmm0, %eax
+; CHECK-SSE2-O0-NEXT:    shll $16, %eax
+; CHECK-SSE2-O0-NEXT:    movd %eax, %xmm1
+; CHECK-SSE2-O0-NEXT:    movss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; CHECK-SSE2-O0-NEXT:    pextrw $6, %xmm0, %eax
+; CHECK-SSE2-O0-NEXT:    shll $16, %eax
+; CHECK-SSE2-O0-NEXT:    movd %eax, %xmm1
+; CHECK-SSE2-O0-NEXT:    movss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; CHECK-SSE2-O0-NEXT:    pextrw $5, %xmm0, %eax
+; CHECK-SSE2-O0-NEXT:    shll $16, %eax
+; CHECK-SSE2-O0-NEXT:    movd %eax, %xmm1
+; CHECK-SSE2-O0-NEXT:    movss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; CHECK-SSE2-O0-NEXT:    pextrw $4, %xmm0, %eax
+; CHECK-SSE2-O0-NEXT:    shll $16, %eax
+; CHECK-SSE2-O0-NEXT:    movd %eax, %xmm1
+; CHECK-SSE2-O0-NEXT:    movss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; CHECK-SSE2-O0-NEXT:    pextrw $3, %xmm0, %eax
+; CHECK-SSE2-O0-NEXT:    shll $16, %eax
+; CHECK-SSE2-O0-NEXT:    movd %eax, %xmm1
+; CHECK-SSE2-O0-NEXT:    movss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; CHECK-SSE2-O0-NEXT:    pextrw $2, %xmm0, %eax
+; CHECK-SSE2-O0-NEXT:    shll $16, %eax
+; CHECK-SSE2-O0-NEXT:    movd %eax, %xmm1
+; CHECK-SSE2-O0-NEXT:    movss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; CHECK-SSE2-O0-NEXT:    pextrw $1, %xmm0, %eax
+; CHECK-SSE2-O0-NEXT:    shll $16, %eax
+; CHECK-SSE2-O0-NEXT:    movd %eax, %xmm1
+; CHECK-SSE2-O0-NEXT:    movss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
+; CHECK-SSE2-O0-NEXT:    movd %xmm0, %eax
+; CHECK-SSE2-O0-NEXT:    shll $16, %eax
+; CHECK-SSE2-O0-NEXT:    movd %eax, %xmm0
+; CHECK-SSE2-O0-NEXT:    callq __truncsfbf2@PLT
+; CHECK-SSE2-O0-NEXT:    movaps %xmm0, %xmm1
+; CHECK-SSE2-O0-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; CHECK-SSE2-O0-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; CHECK-SSE2-O0-NEXT:    pextrw $0, %xmm1, %eax
+; CHECK-SSE2-O0-NEXT:    # kill: def $ax killed $ax killed $eax
+; CHECK-SSE2-O0-NEXT:    movw %ax, {{[0-9]+}}(%rsp)
+; CHECK-SSE2-O0-NEXT:    callq __truncsfbf2@PLT
+; CHECK-SSE2-O0-NEXT:    movaps %xmm0, %xmm1
+; CHECK-SSE2-O0-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; CHECK-SSE2-O0-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; CHECK-SSE2-O0-NEXT:    pextrw $0, %xmm1, %eax
+; CHECK-SSE2-O0-NEXT:    # kill: def $ax killed $ax killed $eax
+; CHECK-SSE2-O0-NEXT:    movw %ax, {{[0-9]+}}(%rsp)
+; CHECK-SSE2-O0-NEXT:    callq __truncsfbf2@PLT
+; CHECK-SSE2-O0-NEXT:    movaps %xmm0, %xmm1
+; CHECK-SSE2-O0-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; CHECK-SSE2-O0-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; CHECK-SSE2-O0-NEXT:    pextrw $0, %xmm1, %eax
+; CHECK-SSE2-O0-NEXT:    # kill: def $ax killed $ax killed $eax
+; CHECK-SSE2-O0-NEXT:    movw %ax, {{[0-9]+}}(%rsp)
+; CHECK-SSE2-O0-NEXT:    callq __truncsfbf2@PLT
+; CHECK-SSE2-O0-NEXT:    movaps %xmm0, %xmm1
+; CHECK-SSE2-O0-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; CHECK-SSE2-O0-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; CHECK-SSE2-O0-NEXT:    pextrw $0, %xmm1, %eax
+; CHECK-SSE2-O0-NEXT:    # kill: def $ax killed $ax killed $eax
+; CHECK-SSE2-O0-NEXT:    movw %ax, {{[0-9]+}}(%rsp)
+; CHECK-SSE2-O0-NEXT:    callq __truncsfbf2@PLT
+; CHECK-SSE2-O0-NEXT:    movaps %xmm0, %xmm1
+; CHECK-SSE2-O0-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; CHECK-SSE2-O0-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; CHECK-SSE2-O0-NEXT:    pextrw $0, %xmm1, %eax
+; CHECK-SSE2-O0-NEXT:    # kill: def $ax killed $ax killed $eax
+; CHECK-SSE2-O0-NEXT:    movw %ax, {{[0-9]+}}(%rsp)
+; CHECK-SSE2-O0-NEXT:    callq __truncsfbf2@PLT
+; CHECK-SSE2-O0-NEXT:    movaps %xmm0, %xmm1
+; CHECK-SSE2-O0-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; CHECK-SSE2-O0-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; CHECK-SSE2-O0-NEXT:    pextrw $0, %xmm1, %eax
+; CHECK-SSE2-O0-NEXT:    # kill: def $ax killed $ax killed $eax
+; CHECK-SSE2-O0-NEXT:    movw %ax, {{[0-9]+}}(%rsp)
+; CHECK-SSE2-O0-NEXT:    callq __truncsfbf2@PLT
+; CHECK-SSE2-O0-NEXT:    movaps %xmm0, %xmm1
+; CHECK-SSE2-O0-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; CHECK-SSE2-O0-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; CHECK-SSE2-O0-NEXT:    pextrw $0, %xmm1, %eax
+; CHECK-SSE2-O0-NEXT:    # kill: def $ax killed $ax killed $eax
+; CHECK-SSE2-O0-NEXT:    movw %ax, {{[0-9]+}}(%rsp)
+; CHECK-SSE2-O0-NEXT:    callq __truncsfbf2@PLT
+; CHECK-SSE2-O0-NEXT:    movaps %xmm0, %xmm1
+; CHECK-SSE2-O0-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; CHECK-SSE2-O0-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; CHECK-SSE2-O0-NEXT:    pextrw $0, %xmm1, %eax
+; CHECK-SSE2-O0-NEXT:    # kill: def $ax killed $ax killed $eax
+; CHECK-SSE2-O0-NEXT:    movw %ax, {{[0-9]+}}(%rsp)
+; CHECK-SSE2-O0-NEXT:    callq __truncsfbf2@PLT
+; CHECK-SSE2-O0-NEXT:    movaps %xmm0, %xmm1
+; CHECK-SSE2-O0-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; CHECK-SSE2-O0-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; CHECK-SSE2-O0-NEXT:    pextrw $0, %xmm1, %eax
+; CHECK-SSE2-O0-NEXT:    # kill: def $ax killed $ax killed $eax
+; CHECK-SSE2-O0-NEXT:    movw %ax, {{[0-9]+}}(%rsp)
+; CHECK-SSE2-O0-NEXT:    callq __truncsfbf2@PLT
+; CHECK-SSE2-O0-NEXT:    movaps %xmm0, %xmm1
+; CHECK-SSE2-O0-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
+; CHECK-SSE2-O0-NEXT:    # xmm0 = mem[0],zero,zero,zero
+; CHECK-SSE2-O0-NEXT:    pextrw $0, %xmm1, %eax
+; CHECK-SSE2-O0-NEXT:   ...
[truncated]

@github-actions

Copy link
Copy Markdown

⚠️ We detected that you are using a GitHub private e-mail address to contribute to the repo.
Please turn off Keep my email addresses private setting in your account.
See LLVM Developer Policy and LLVM Discourse for more information.

@RKSimon RKSimon left a comment

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

@jofrn jofrn merged commit 73ddc72 into main May 18, 2026
12 checks passed
@jofrn jofrn deleted the users/jofrn/atomic-store-unaligned-tests branch May 18, 2026 12:01
pedroMVicente pushed a commit to pedroMVicente/llvm-project that referenced this pull request May 19, 2026
)

Unaligned atomic vector stores with size >1 are lowered to calls.
Adding their tests separately here.

Store-side counterpart to llvm#148896. Stacked below llvm#197165.
jofrn added a commit that referenced this pull request May 19, 2026
`store atomic <1 x T>` is not valid. This change legalizes
vector types of atomic store via scalarization in SelectionDAG
so that it can, for example, translate from `v1i32` to `i32`.

This is the store-side counterpart to #148894. Stacked on top of
#197372; and below #197166.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

2 participants