Skip to content

[X86] Add atomic vector tests for unaligned >1 sizes.#148896

Merged
jofrn merged 1 commit into
mainfrom
users/jofrn/gt/07-15-_x86_add_atomic_vector_tests_for_unaligned_1_sizes
Oct 23, 2025
Merged

[X86] Add atomic vector tests for unaligned >1 sizes.#148896
jofrn merged 1 commit into
mainfrom
users/jofrn/gt/07-15-_x86_add_atomic_vector_tests_for_unaligned_1_sizes

Conversation

@jofrn

@jofrn jofrn commented Jul 15, 2025

Copy link
Copy Markdown
Contributor

Unaligned atomic vectors with size >1 are lowered to calls.
Adding their tests separately here.

@llvmbot

llvmbot commented Jul 15, 2025

Copy link
Copy Markdown
Member

@llvm/pr-subscribers-backend-x86

Author: None (jofrn)

Changes

Unaligned atomic vectors with size >1 are lowered to calls.
Adding their tests separately here.


Patch is 21.43 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/148896.diff

1 Files Affected:

  • (modified) llvm/test/CodeGen/X86/atomic-load-store.ll (+588)
diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll
index 9fab8b98b4af0..3e7b73a65fe07 100644
--- a/llvm/test/CodeGen/X86/atomic-load-store.ll
+++ b/llvm/test/CodeGen/X86/atomic-load-store.ll
@@ -270,6 +270,82 @@ define <1 x i64> @atomic_vec1_i64_align(ptr %x) nounwind {
   ret <1 x i64> %ret
 }
 
+define <1 x ptr> @atomic_vec1_ptr(ptr %x) nounwind {
+; CHECK-O3-LABEL: atomic_vec1_ptr:
+; CHECK-O3:       # %bb.0:
+; CHECK-O3-NEXT:    pushq %rax
+; CHECK-O3-NEXT:    movq %rdi, %rsi
+; CHECK-O3-NEXT:    movq %rsp, %rdx
+; CHECK-O3-NEXT:    movl $8, %edi
+; CHECK-O3-NEXT:    movl $2, %ecx
+; CHECK-O3-NEXT:    callq __atomic_load@PLT
+; CHECK-O3-NEXT:    movq (%rsp), %rax
+; CHECK-O3-NEXT:    popq %rcx
+; CHECK-O3-NEXT:    retq
+;
+; CHECK-SSE-O3-LABEL: atomic_vec1_ptr:
+; CHECK-SSE-O3:       # %bb.0:
+; CHECK-SSE-O3-NEXT:    pushq %rax
+; CHECK-SSE-O3-NEXT:    movq %rdi, %rsi
+; CHECK-SSE-O3-NEXT:    movq %rsp, %rdx
+; CHECK-SSE-O3-NEXT:    movl $8, %edi
+; CHECK-SSE-O3-NEXT:    movl $2, %ecx
+; CHECK-SSE-O3-NEXT:    callq __atomic_load@PLT
+; CHECK-SSE-O3-NEXT:    movq (%rsp), %rax
+; CHECK-SSE-O3-NEXT:    popq %rcx
+; CHECK-SSE-O3-NEXT:    retq
+;
+; CHECK-AVX-O3-LABEL: atomic_vec1_ptr:
+; CHECK-AVX-O3:       # %bb.0:
+; CHECK-AVX-O3-NEXT:    pushq %rax
+; CHECK-AVX-O3-NEXT:    movq %rdi, %rsi
+; CHECK-AVX-O3-NEXT:    movq %rsp, %rdx
+; CHECK-AVX-O3-NEXT:    movl $8, %edi
+; CHECK-AVX-O3-NEXT:    movl $2, %ecx
+; CHECK-AVX-O3-NEXT:    callq __atomic_load@PLT
+; CHECK-AVX-O3-NEXT:    movq (%rsp), %rax
+; CHECK-AVX-O3-NEXT:    popq %rcx
+; CHECK-AVX-O3-NEXT:    retq
+;
+; CHECK-O0-LABEL: atomic_vec1_ptr:
+; CHECK-O0:       # %bb.0:
+; CHECK-O0-NEXT:    pushq %rax
+; CHECK-O0-NEXT:    movq %rdi, %rsi
+; CHECK-O0-NEXT:    movl $8, %edi
+; CHECK-O0-NEXT:    movq %rsp, %rdx
+; CHECK-O0-NEXT:    movl $2, %ecx
+; CHECK-O0-NEXT:    callq __atomic_load@PLT
+; CHECK-O0-NEXT:    movq (%rsp), %rax
+; CHECK-O0-NEXT:    popq %rcx
+; CHECK-O0-NEXT:    retq
+;
+; CHECK-SSE-O0-LABEL: atomic_vec1_ptr:
+; CHECK-SSE-O0:       # %bb.0:
+; CHECK-SSE-O0-NEXT:    pushq %rax
+; CHECK-SSE-O0-NEXT:    movq %rdi, %rsi
+; CHECK-SSE-O0-NEXT:    movl $8, %edi
+; CHECK-SSE-O0-NEXT:    movq %rsp, %rdx
+; CHECK-SSE-O0-NEXT:    movl $2, %ecx
+; CHECK-SSE-O0-NEXT:    callq __atomic_load@PLT
+; CHECK-SSE-O0-NEXT:    movq (%rsp), %rax
+; CHECK-SSE-O0-NEXT:    popq %rcx
+; CHECK-SSE-O0-NEXT:    retq
+;
+; CHECK-AVX-O0-LABEL: atomic_vec1_ptr:
+; CHECK-AVX-O0:       # %bb.0:
+; CHECK-AVX-O0-NEXT:    pushq %rax
+; CHECK-AVX-O0-NEXT:    movq %rdi, %rsi
+; CHECK-AVX-O0-NEXT:    movl $8, %edi
+; CHECK-AVX-O0-NEXT:    movq %rsp, %rdx
+; CHECK-AVX-O0-NEXT:    movl $2, %ecx
+; CHECK-AVX-O0-NEXT:    callq __atomic_load@PLT
+; CHECK-AVX-O0-NEXT:    movq (%rsp), %rax
+; CHECK-AVX-O0-NEXT:    popq %rcx
+; CHECK-AVX-O0-NEXT:    retq
+  %ret = load atomic <1 x ptr>, ptr %x acquire, align 4
+  ret <1 x ptr> %ret
+}
+
 define <1 x half> @atomic_vec1_half(ptr %x) {
 ; CHECK-O3-LABEL: atomic_vec1_half:
 ; CHECK-O3:       # %bb.0:
@@ -386,3 +462,515 @@ define <1 x double> @atomic_vec1_double_align(ptr %x) nounwind {
   %ret = load atomic <1 x double>, ptr %x acquire, align 8
   ret <1 x double> %ret
 }
+
+define <1 x i64> @atomic_vec1_i64(ptr %x) nounwind {
+; CHECK-O3-LABEL: atomic_vec1_i64:
+; CHECK-O3:       # %bb.0:
+; CHECK-O3-NEXT:    pushq %rax
+; CHECK-O3-NEXT:    movq %rdi, %rsi
+; CHECK-O3-NEXT:    movq %rsp, %rdx
+; CHECK-O3-NEXT:    movl $8, %edi
+; CHECK-O3-NEXT:    movl $2, %ecx
+; CHECK-O3-NEXT:    callq __atomic_load@PLT
+; CHECK-O3-NEXT:    movq (%rsp), %rax
+; CHECK-O3-NEXT:    popq %rcx
+; CHECK-O3-NEXT:    retq
+;
+; CHECK-SSE-O3-LABEL: atomic_vec1_i64:
+; CHECK-SSE-O3:       # %bb.0:
+; CHECK-SSE-O3-NEXT:    pushq %rax
+; CHECK-SSE-O3-NEXT:    movq %rdi, %rsi
+; CHECK-SSE-O3-NEXT:    movq %rsp, %rdx
+; CHECK-SSE-O3-NEXT:    movl $8, %edi
+; CHECK-SSE-O3-NEXT:    movl $2, %ecx
+; CHECK-SSE-O3-NEXT:    callq __atomic_load@PLT
+; CHECK-SSE-O3-NEXT:    movq (%rsp), %rax
+; CHECK-SSE-O3-NEXT:    popq %rcx
+; CHECK-SSE-O3-NEXT:    retq
+;
+; CHECK-AVX-O3-LABEL: atomic_vec1_i64:
+; CHECK-AVX-O3:       # %bb.0:
+; CHECK-AVX-O3-NEXT:    pushq %rax
+; CHECK-AVX-O3-NEXT:    movq %rdi, %rsi
+; CHECK-AVX-O3-NEXT:    movq %rsp, %rdx
+; CHECK-AVX-O3-NEXT:    movl $8, %edi
+; CHECK-AVX-O3-NEXT:    movl $2, %ecx
+; CHECK-AVX-O3-NEXT:    callq __atomic_load@PLT
+; CHECK-AVX-O3-NEXT:    movq (%rsp), %rax
+; CHECK-AVX-O3-NEXT:    popq %rcx
+; CHECK-AVX-O3-NEXT:    retq
+;
+; CHECK-O0-LABEL: atomic_vec1_i64:
+; CHECK-O0:       # %bb.0:
+; CHECK-O0-NEXT:    pushq %rax
+; CHECK-O0-NEXT:    movq %rdi, %rsi
+; CHECK-O0-NEXT:    movl $8, %edi
+; CHECK-O0-NEXT:    movq %rsp, %rdx
+; CHECK-O0-NEXT:    movl $2, %ecx
+; CHECK-O0-NEXT:    callq __atomic_load@PLT
+; CHECK-O0-NEXT:    movq (%rsp), %rax
+; CHECK-O0-NEXT:    popq %rcx
+; CHECK-O0-NEXT:    retq
+;
+; CHECK-SSE-O0-LABEL: atomic_vec1_i64:
+; CHECK-SSE-O0:       # %bb.0:
+; CHECK-SSE-O0-NEXT:    pushq %rax
+; CHECK-SSE-O0-NEXT:    movq %rdi, %rsi
+; CHECK-SSE-O0-NEXT:    movl $8, %edi
+; CHECK-SSE-O0-NEXT:    movq %rsp, %rdx
+; CHECK-SSE-O0-NEXT:    movl $2, %ecx
+; CHECK-SSE-O0-NEXT:    callq __atomic_load@PLT
+; CHECK-SSE-O0-NEXT:    movq (%rsp), %rax
+; CHECK-SSE-O0-NEXT:    popq %rcx
+; CHECK-SSE-O0-NEXT:    retq
+;
+; CHECK-AVX-O0-LABEL: atomic_vec1_i64:
+; CHECK-AVX-O0:       # %bb.0:
+; CHECK-AVX-O0-NEXT:    pushq %rax
+; CHECK-AVX-O0-NEXT:    movq %rdi, %rsi
+; CHECK-AVX-O0-NEXT:    movl $8, %edi
+; CHECK-AVX-O0-NEXT:    movq %rsp, %rdx
+; CHECK-AVX-O0-NEXT:    movl $2, %ecx
+; CHECK-AVX-O0-NEXT:    callq __atomic_load@PLT
+; CHECK-AVX-O0-NEXT:    movq (%rsp), %rax
+; CHECK-AVX-O0-NEXT:    popq %rcx
+; CHECK-AVX-O0-NEXT:    retq
+  %ret = load atomic <1 x i64>, ptr %x acquire, align 4
+  ret <1 x i64> %ret
+}
+
+define <1 x double> @atomic_vec1_double(ptr %x) nounwind {
+; CHECK-O3-LABEL: atomic_vec1_double:
+; CHECK-O3:       # %bb.0:
+; CHECK-O3-NEXT:    pushq %rax
+; CHECK-O3-NEXT:    movq %rdi, %rsi
+; CHECK-O3-NEXT:    movq %rsp, %rdx
+; CHECK-O3-NEXT:    movl $8, %edi
+; CHECK-O3-NEXT:    movl $2, %ecx
+; CHECK-O3-NEXT:    callq __atomic_load@PLT
+; CHECK-O3-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-O3-NEXT:    popq %rax
+; CHECK-O3-NEXT:    retq
+;
+; CHECK-SSE-O3-LABEL: atomic_vec1_double:
+; CHECK-SSE-O3:       # %bb.0:
+; CHECK-SSE-O3-NEXT:    pushq %rax
+; CHECK-SSE-O3-NEXT:    movq %rdi, %rsi
+; CHECK-SSE-O3-NEXT:    movq %rsp, %rdx
+; CHECK-SSE-O3-NEXT:    movl $8, %edi
+; CHECK-SSE-O3-NEXT:    movl $2, %ecx
+; CHECK-SSE-O3-NEXT:    callq __atomic_load@PLT
+; CHECK-SSE-O3-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-SSE-O3-NEXT:    popq %rax
+; CHECK-SSE-O3-NEXT:    retq
+;
+; CHECK-AVX-O3-LABEL: atomic_vec1_double:
+; CHECK-AVX-O3:       # %bb.0:
+; CHECK-AVX-O3-NEXT:    pushq %rax
+; CHECK-AVX-O3-NEXT:    movq %rdi, %rsi
+; CHECK-AVX-O3-NEXT:    movq %rsp, %rdx
+; CHECK-AVX-O3-NEXT:    movl $8, %edi
+; CHECK-AVX-O3-NEXT:    movl $2, %ecx
+; CHECK-AVX-O3-NEXT:    callq __atomic_load@PLT
+; CHECK-AVX-O3-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-AVX-O3-NEXT:    popq %rax
+; CHECK-AVX-O3-NEXT:    retq
+;
+; CHECK-O0-LABEL: atomic_vec1_double:
+; CHECK-O0:       # %bb.0:
+; CHECK-O0-NEXT:    pushq %rax
+; CHECK-O0-NEXT:    movq %rdi, %rsi
+; CHECK-O0-NEXT:    movl $8, %edi
+; CHECK-O0-NEXT:    movq %rsp, %rdx
+; CHECK-O0-NEXT:    movl $2, %ecx
+; CHECK-O0-NEXT:    callq __atomic_load@PLT
+; CHECK-O0-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-O0-NEXT:    popq %rax
+; CHECK-O0-NEXT:    retq
+;
+; CHECK-SSE-O0-LABEL: atomic_vec1_double:
+; CHECK-SSE-O0:       # %bb.0:
+; CHECK-SSE-O0-NEXT:    pushq %rax
+; CHECK-SSE-O0-NEXT:    movq %rdi, %rsi
+; CHECK-SSE-O0-NEXT:    movl $8, %edi
+; CHECK-SSE-O0-NEXT:    movq %rsp, %rdx
+; CHECK-SSE-O0-NEXT:    movl $2, %ecx
+; CHECK-SSE-O0-NEXT:    callq __atomic_load@PLT
+; CHECK-SSE-O0-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-SSE-O0-NEXT:    popq %rax
+; CHECK-SSE-O0-NEXT:    retq
+;
+; CHECK-AVX-O0-LABEL: atomic_vec1_double:
+; CHECK-AVX-O0:       # %bb.0:
+; CHECK-AVX-O0-NEXT:    pushq %rax
+; CHECK-AVX-O0-NEXT:    movq %rdi, %rsi
+; CHECK-AVX-O0-NEXT:    movl $8, %edi
+; CHECK-AVX-O0-NEXT:    movq %rsp, %rdx
+; CHECK-AVX-O0-NEXT:    movl $2, %ecx
+; CHECK-AVX-O0-NEXT:    callq __atomic_load@PLT
+; CHECK-AVX-O0-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-AVX-O0-NEXT:    popq %rax
+; CHECK-AVX-O0-NEXT:    retq
+  %ret = load atomic <1 x double>, ptr %x acquire, align 4
+  ret <1 x double> %ret
+}
+
+define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind {
+; CHECK-O3-LABEL: atomic_vec2_i32:
+; CHECK-O3:       # %bb.0:
+; CHECK-O3-NEXT:    pushq %rax
+; CHECK-O3-NEXT:    movq %rdi, %rsi
+; CHECK-O3-NEXT:    movq %rsp, %rdx
+; CHECK-O3-NEXT:    movl $8, %edi
+; CHECK-O3-NEXT:    movl $2, %ecx
+; CHECK-O3-NEXT:    callq __atomic_load@PLT
+; CHECK-O3-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-O3-NEXT:    popq %rax
+; CHECK-O3-NEXT:    retq
+;
+; CHECK-SSE-O3-LABEL: atomic_vec2_i32:
+; CHECK-SSE-O3:       # %bb.0:
+; CHECK-SSE-O3-NEXT:    pushq %rax
+; CHECK-SSE-O3-NEXT:    movq %rdi, %rsi
+; CHECK-SSE-O3-NEXT:    movq %rsp, %rdx
+; CHECK-SSE-O3-NEXT:    movl $8, %edi
+; CHECK-SSE-O3-NEXT:    movl $2, %ecx
+; CHECK-SSE-O3-NEXT:    callq __atomic_load@PLT
+; CHECK-SSE-O3-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-SSE-O3-NEXT:    popq %rax
+; CHECK-SSE-O3-NEXT:    retq
+;
+; CHECK-AVX-O3-LABEL: atomic_vec2_i32:
+; CHECK-AVX-O3:       # %bb.0:
+; CHECK-AVX-O3-NEXT:    pushq %rax
+; CHECK-AVX-O3-NEXT:    movq %rdi, %rsi
+; CHECK-AVX-O3-NEXT:    movq %rsp, %rdx
+; CHECK-AVX-O3-NEXT:    movl $8, %edi
+; CHECK-AVX-O3-NEXT:    movl $2, %ecx
+; CHECK-AVX-O3-NEXT:    callq __atomic_load@PLT
+; CHECK-AVX-O3-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-AVX-O3-NEXT:    popq %rax
+; CHECK-AVX-O3-NEXT:    retq
+;
+; CHECK-O0-LABEL: atomic_vec2_i32:
+; CHECK-O0:       # %bb.0:
+; CHECK-O0-NEXT:    pushq %rax
+; CHECK-O0-NEXT:    movq %rdi, %rsi
+; CHECK-O0-NEXT:    movl $8, %edi
+; CHECK-O0-NEXT:    movq %rsp, %rdx
+; CHECK-O0-NEXT:    movl $2, %ecx
+; CHECK-O0-NEXT:    callq __atomic_load@PLT
+; CHECK-O0-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
+; CHECK-O0-NEXT:    popq %rax
+; CHECK-O0-NEXT:    retq
+;
+; CHECK-SSE-O0-LABEL: atomic_vec2_i32:
+; CHECK-SSE-O0:       # %bb.0:
+; CHECK-SSE-O0-NEXT:    pushq %rax
+; CHECK-SSE-O0-NEXT:    movq %rdi, %rsi
+; CHECK-SSE-O0-NEXT:    movl $8, %edi
+; CHECK-SSE-O0-NEXT:    movq %rsp, %rdx
+; CHECK-SSE-O0-NEXT:    movl $2, %ecx
+; CHECK-SSE-O0-NEXT:    callq __atomic_load@PLT
+; CHECK-SSE-O0-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
+; CHECK-SSE-O0-NEXT:    popq %rax
+; CHECK-SSE-O0-NEXT:    retq
+;
+; CHECK-AVX-O0-LABEL: atomic_vec2_i32:
+; CHECK-AVX-O0:       # %bb.0:
+; CHECK-AVX-O0-NEXT:    pushq %rax
+; CHECK-AVX-O0-NEXT:    movq %rdi, %rsi
+; CHECK-AVX-O0-NEXT:    movl $8, %edi
+; CHECK-AVX-O0-NEXT:    movq %rsp, %rdx
+; CHECK-AVX-O0-NEXT:    movl $2, %ecx
+; CHECK-AVX-O0-NEXT:    callq __atomic_load@PLT
+; CHECK-AVX-O0-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
+; CHECK-AVX-O0-NEXT:    popq %rax
+; CHECK-AVX-O0-NEXT:    retq
+  %ret = load atomic <2 x i32>, ptr %x acquire, align 4
+  ret <2 x i32> %ret
+}
+
+define <4 x float> @atomic_vec4_float(ptr %x) nounwind {
+; CHECK-O3-LABEL: atomic_vec4_float:
+; CHECK-O3:       # %bb.0:
+; CHECK-O3-NEXT:    subq $24, %rsp
+; CHECK-O3-NEXT:    movq %rdi, %rsi
+; CHECK-O3-NEXT:    movq %rsp, %rdx
+; CHECK-O3-NEXT:    movl $16, %edi
+; CHECK-O3-NEXT:    movl $2, %ecx
+; CHECK-O3-NEXT:    callq __atomic_load@PLT
+; CHECK-O3-NEXT:    movaps (%rsp), %xmm0
+; CHECK-O3-NEXT:    addq $24, %rsp
+; CHECK-O3-NEXT:    retq
+;
+; CHECK-SSE-O3-LABEL: atomic_vec4_float:
+; CHECK-SSE-O3:       # %bb.0:
+; CHECK-SSE-O3-NEXT:    subq $24, %rsp
+; CHECK-SSE-O3-NEXT:    movq %rdi, %rsi
+; CHECK-SSE-O3-NEXT:    movq %rsp, %rdx
+; CHECK-SSE-O3-NEXT:    movl $16, %edi
+; CHECK-SSE-O3-NEXT:    movl $2, %ecx
+; CHECK-SSE-O3-NEXT:    callq __atomic_load@PLT
+; CHECK-SSE-O3-NEXT:    movaps (%rsp), %xmm0
+; CHECK-SSE-O3-NEXT:    addq $24, %rsp
+; CHECK-SSE-O3-NEXT:    retq
+;
+; CHECK-AVX-O3-LABEL: atomic_vec4_float:
+; CHECK-AVX-O3:       # %bb.0:
+; CHECK-AVX-O3-NEXT:    subq $24, %rsp
+; CHECK-AVX-O3-NEXT:    movq %rdi, %rsi
+; CHECK-AVX-O3-NEXT:    movq %rsp, %rdx
+; CHECK-AVX-O3-NEXT:    movl $16, %edi
+; CHECK-AVX-O3-NEXT:    movl $2, %ecx
+; CHECK-AVX-O3-NEXT:    callq __atomic_load@PLT
+; CHECK-AVX-O3-NEXT:    vmovaps (%rsp), %xmm0
+; CHECK-AVX-O3-NEXT:    addq $24, %rsp
+; CHECK-AVX-O3-NEXT:    retq
+;
+; CHECK-O0-LABEL: atomic_vec4_float:
+; CHECK-O0:       # %bb.0:
+; CHECK-O0-NEXT:    subq $24, %rsp
+; CHECK-O0-NEXT:    movq %rdi, %rsi
+; CHECK-O0-NEXT:    movl $16, %edi
+; CHECK-O0-NEXT:    movq %rsp, %rdx
+; CHECK-O0-NEXT:    movl $2, %ecx
+; CHECK-O0-NEXT:    callq __atomic_load@PLT
+; CHECK-O0-NEXT:    movaps (%rsp), %xmm0
+; CHECK-O0-NEXT:    addq $24, %rsp
+; CHECK-O0-NEXT:    retq
+;
+; CHECK-SSE-O0-LABEL: atomic_vec4_float:
+; CHECK-SSE-O0:       # %bb.0:
+; CHECK-SSE-O0-NEXT:    subq $24, %rsp
+; CHECK-SSE-O0-NEXT:    movq %rdi, %rsi
+; CHECK-SSE-O0-NEXT:    movl $16, %edi
+; CHECK-SSE-O0-NEXT:    movq %rsp, %rdx
+; CHECK-SSE-O0-NEXT:    movl $2, %ecx
+; CHECK-SSE-O0-NEXT:    callq __atomic_load@PLT
+; CHECK-SSE-O0-NEXT:    movaps (%rsp), %xmm0
+; CHECK-SSE-O0-NEXT:    addq $24, %rsp
+; CHECK-SSE-O0-NEXT:    retq
+;
+; CHECK-AVX-O0-LABEL: atomic_vec4_float:
+; CHECK-AVX-O0:       # %bb.0:
+; CHECK-AVX-O0-NEXT:    subq $24, %rsp
+; CHECK-AVX-O0-NEXT:    movq %rdi, %rsi
+; CHECK-AVX-O0-NEXT:    movl $16, %edi
+; CHECK-AVX-O0-NEXT:    movq %rsp, %rdx
+; CHECK-AVX-O0-NEXT:    movl $2, %ecx
+; CHECK-AVX-O0-NEXT:    callq __atomic_load@PLT
+; CHECK-AVX-O0-NEXT:    vmovaps (%rsp), %xmm0
+; CHECK-AVX-O0-NEXT:    addq $24, %rsp
+; CHECK-AVX-O0-NEXT:    retq
+  %ret = load atomic <4 x float>, ptr %x acquire, align 4
+  ret <4 x float> %ret
+}
+
+define <8 x double> @atomic_vec8_double(ptr %x) nounwind {
+; CHECK-O3-LABEL: atomic_vec8_double:
+; CHECK-O3:       # %bb.0:
+; CHECK-O3-NEXT:    subq $72, %rsp
+; CHECK-O3-NEXT:    movq %rdi, %rsi
+; CHECK-O3-NEXT:    movq %rsp, %rdx
+; CHECK-O3-NEXT:    movl $64, %edi
+; CHECK-O3-NEXT:    movl $2, %ecx
+; CHECK-O3-NEXT:    callq __atomic_load@PLT
+; CHECK-O3-NEXT:    movaps (%rsp), %xmm0
+; CHECK-O3-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm1
+; CHECK-O3-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm2
+; CHECK-O3-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm3
+; CHECK-O3-NEXT:    addq $72, %rsp
+; CHECK-O3-NEXT:    retq
+;
+; CHECK-SSE-O3-LABEL: atomic_vec8_double:
+; CHECK-SSE-O3:       # %bb.0:
+; CHECK-SSE-O3-NEXT:    subq $72, %rsp
+; CHECK-SSE-O3-NEXT:    movq %rdi, %rsi
+; CHECK-SSE-O3-NEXT:    movq %rsp, %rdx
+; CHECK-SSE-O3-NEXT:    movl $64, %edi
+; CHECK-SSE-O3-NEXT:    movl $2, %ecx
+; CHECK-SSE-O3-NEXT:    callq __atomic_load@PLT
+; CHECK-SSE-O3-NEXT:    movaps (%rsp), %xmm0
+; CHECK-SSE-O3-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm1
+; CHECK-SSE-O3-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm2
+; CHECK-SSE-O3-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm3
+; CHECK-SSE-O3-NEXT:    addq $72, %rsp
+; CHECK-SSE-O3-NEXT:    retq
+;
+; CHECK-O0-LABEL: atomic_vec8_double:
+; CHECK-O0:       # %bb.0:
+; CHECK-O0-NEXT:    subq $72, %rsp
+; CHECK-O0-NEXT:    movq %rdi, %rsi
+; CHECK-O0-NEXT:    movl $64, %edi
+; CHECK-O0-NEXT:    movq %rsp, %rdx
+; CHECK-O0-NEXT:    movl $2, %ecx
+; CHECK-O0-NEXT:    callq __atomic_load@PLT
+; CHECK-O0-NEXT:    movapd (%rsp), %xmm0
+; CHECK-O0-NEXT:    movapd {{[0-9]+}}(%rsp), %xmm1
+; CHECK-O0-NEXT:    movapd {{[0-9]+}}(%rsp), %xmm2
+; CHECK-O0-NEXT:    movapd {{[0-9]+}}(%rsp), %xmm3
+; CHECK-O0-NEXT:    addq $72, %rsp
+; CHECK-O0-NEXT:    retq
+;
+; CHECK-SSE-O0-LABEL: atomic_vec8_double:
+; CHECK-SSE-O0:       # %bb.0:
+; CHECK-SSE-O0-NEXT:    subq $72, %rsp
+; CHECK-SSE-O0-NEXT:    movq %rdi, %rsi
+; CHECK-SSE-O0-NEXT:    movl $64, %edi
+; CHECK-SSE-O0-NEXT:    movq %rsp, %rdx
+; CHECK-SSE-O0-NEXT:    movl $2, %ecx
+; CHECK-SSE-O0-NEXT:    callq __atomic_load@PLT
+; CHECK-SSE-O0-NEXT:    movapd (%rsp), %xmm0
+; CHECK-SSE-O0-NEXT:    movapd {{[0-9]+}}(%rsp), %xmm1
+; CHECK-SSE-O0-NEXT:    movapd {{[0-9]+}}(%rsp), %xmm2
+; CHECK-SSE-O0-NEXT:    movapd {{[0-9]+}}(%rsp), %xmm3
+; CHECK-SSE-O0-NEXT:    addq $72, %rsp
+; CHECK-SSE-O0-NEXT:    retq
+  %ret = load atomic <8 x double>, ptr %x acquire, align 4
+  ret <8 x double> %ret
+}
+
+define <16 x bfloat> @atomic_vec16_bfloat(ptr %x) nounwind {
+; CHECK-O3-LABEL: atomic_vec16_bfloat:
+; CHECK-O3:       # %bb.0:
+; CHECK-O3-NEXT:    subq $40, %rsp
+; CHECK-O3-NEXT:    movq %rdi, %rsi
+; CHECK-O3-NEXT:    movq %rsp, %rdx
+; CHECK-O3-NEXT:    movl $32, %edi
+; CHECK-O3-NEXT:    movl $2, %ecx
+; CHECK-O3-NEXT:    callq __atomic_load@PLT
+; CHECK-O3-NEXT:    movaps (%rsp), %xmm0
+; CHECK-O3-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm1
+; CHECK-O3-NEXT:    addq $40, %rsp
+; CHECK-O3-NEXT:    retq
+;
+; CHECK-SSE-O3-LABEL: atomic_vec16_bfloat:
+; CHECK-SSE-O3:       # %bb.0:
+; CHECK-SSE-O3-NEXT:    subq $40, %rsp
+; CHECK-SSE-O3-NEXT:    movq %rdi, %rsi
+; CHECK-SSE-O3-NEXT:    movq %rsp, %rdx
+; CHECK-SSE-O3-NEXT:    movl $32, %edi
+; CHECK-SSE-O3-NEXT:    movl $2, %ecx
+; CHECK-SSE-O3-NEXT:    callq __atomic_load@PLT
+; CHECK-SSE-O3-NEXT:    movaps (%rsp), %xmm0
+; CHECK-SSE-O3-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm1
+; CHECK-SSE-O3-NEXT:    addq $40, %rsp
+; CHECK-SSE-O3-NEXT:    retq
+;
+; CHECK-AVX-O3-LABEL: atomic_vec16_bfloat:
+; CHECK-AVX-O3:       # %bb.0:
+; CHECK-AVX-O3-NEXT:    subq $40, %rsp
+; CHECK-AVX-O3-NEXT:    movq %rdi, %rsi
+; CHECK-AVX-O3-NEXT:    movq %rsp, %rdx
+; CHECK-AVX-O3-NEXT:    movl $32, %edi
+; CHECK-AVX-O3-NEXT:    movl $2, %ecx
+; CHECK-AVX-O3-NEXT:    callq __atomic_load@PLT
+; CHECK-AVX-O3-NEXT:    vmovups (%rsp), %ymm0
+; CHECK-AVX-O3-NEXT:    addq $40, %rsp
+; CHECK-AVX-O3-NEXT:    retq
+;
+; CHECK-O0-LABEL: atomic_vec16_bfloat:
+; CHECK-O0:       # %bb.0:
+; CHECK-O0-NEXT:    subq $40, %rsp
+; CHECK-O0-NEXT:    movq %rdi, %rsi
+; CHECK-O0-NEXT:    movl $32, %edi
+; CHECK-O0-NEXT:    movq %rsp, %rdx
+; CHECK-O0-NEXT:    movl $2, %ecx
+; CHECK-O0-NEXT:    callq __atomic_load@PLT
+; CHECK-O0-NEXT:    movaps (%rsp), %xmm0
+; CHECK-O0-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm1
+; CHECK-O0-NEXT:    addq $40, %rsp
+; CHECK-O0-NEXT:    retq
+;
+; CHECK-SSE-O0-LABEL: atomic_vec16_bfloat:
+; CHECK-SSE-O0:       # %bb.0:
+; CHECK-SSE-O0-NEXT:    subq $40, %rsp
+; CHECK-SSE-O0-NEXT:    movq %rdi, %rsi
+; CHECK-SSE-O0-NEXT:    movl $32, %edi
+; CHECK-SSE-O0-NEXT:    movq %rsp, %rdx
+; CHECK-SSE-O0-NEXT:    movl $2, %ecx
+; CHECK-SSE-O0-NEXT:    callq __atomic_load@PLT
+; CHECK-SSE-O0-NEXT:    movaps (%rsp), %xmm0
+; CHECK-SSE-O0-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm1
+; CHECK-SSE-O0-NEXT:    addq $40, %rsp
+; CHECK-SSE-O0-NEXT:    retq
+;
+; CHECK-AVX-O0-LABEL: atomic_vec16_bfloat:
+; CHECK-AVX-O0:       # %bb.0:
+; CHECK-AVX-O0-NEXT:    subq $40, %rsp
+; CHECK-AVX-O0-NEXT:    movq %rdi, %rsi
+; CHECK-AVX-O0-NEXT:    movl $32, %edi
+; CHECK-AVX-O0-NEXT:    movq %rsp, %rdx
+; CHECK-AVX-O0-NEXT:    movl $2, %ecx
+; CHECK-AVX-O0-NEXT:    callq __atomic_load@PLT
+; CHECK-AVX-O0-NEXT:    vmovups (%rsp), %ymm0
+; CHECK-AVX-O0-NEXT:    addq $40, %rsp
+; CHECK-AVX-O0-NEXT:    retq
+  %ret = load atomic <16 x bfloat>, ptr %x acquire, align 4
+  ret <16 x bfloat> %ret
+}
+
+define <32 x half> @atomic_vec32_half(ptr %x) nounwind {
+; CHECK-O3-LABEL: atomic_vec32_half:
+; CHECK-O3:       # %bb.0:
+; CHECK-O3-NEXT:    subq $72, %rsp
+; CHECK-O3-NEXT:    movq %rdi, %rsi
+; CHECK-O3-NEXT:    movq %rsp, %rdx
+; CHECK-O3-NEXT:    movl $64, %edi
+; CHECK-O3-NEXT:    movl $2, %ecx
+; CHECK-O3-NEXT:    callq __atomic_load@PLT
+; CHECK-O3-NEXT:    movaps (%rsp), %xmm0
+; CHECK-O3-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm1
+; CHECK-O3-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm2
+; CHECK-O3-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm3
+; CHECK-O3-NEXT:    addq $72, %rsp
+; CHECK-O3-NEXT:    retq
+;
+; CHECK-SSE-O3-LABEL: atomic_vec32_half:
+; CHECK-SSE-O3:       # %bb.0:
+; CHECK-SSE-O3-NEXT:    subq $72, %rsp
+; CHECK-SSE-O3-NEXT:    movq %rdi, %rsi
+; CHECK-SSE-O3-NEXT:    movq %rsp, %rdx
+; CHECK-SSE-O3-NEXT:    movl $64, %edi
+; CHECK-SSE-O3-NEXT:    movl $2, %ecx
+; CHECK-SSE-O3-NEXT:    callq __atomic_load@PLT
+; CHECK-SSE-O3-NEXT:    movaps (%rsp), %xmm0
+; CHECK-SSE-O3-NEXT:    mov...
[truncated]

@jofrn jofrn force-pushed the users/jofrn/gt/07-15-_x86_add_atomic_vector_tests_for_unaligned_1_sizes branch from 27e1e69 to e9ba6ed Compare July 16, 2025 13:46
@jofrn jofrn force-pushed the users/jofrn/gt/07-15-_x86_manage_atomic_load_of_fp_-_int_promotion_in_dag branch from 0e8e100 to 261a6d6 Compare July 16, 2025 13:46
@jofrn jofrn force-pushed the users/jofrn/gt/07-15-_x86_manage_atomic_load_of_fp_-_int_promotion_in_dag branch from 261a6d6 to d25a454 Compare October 15, 2025 19:19
@jofrn jofrn force-pushed the users/jofrn/gt/07-15-_x86_add_atomic_vector_tests_for_unaligned_1_sizes branch from e9ba6ed to a162961 Compare October 15, 2025 19:19
@jofrn jofrn force-pushed the users/jofrn/gt/07-15-_x86_manage_atomic_load_of_fp_-_int_promotion_in_dag branch from d25a454 to 7779dac Compare October 16, 2025 15:29
@jofrn jofrn force-pushed the users/jofrn/gt/07-15-_x86_add_atomic_vector_tests_for_unaligned_1_sizes branch from a162961 to 2f09512 Compare October 16, 2025 15:29
@jofrn jofrn force-pushed the users/jofrn/gt/07-15-_x86_manage_atomic_load_of_fp_-_int_promotion_in_dag branch from 7779dac to 9174bd5 Compare October 20, 2025 21:13
@jofrn jofrn force-pushed the users/jofrn/gt/07-15-_x86_add_atomic_vector_tests_for_unaligned_1_sizes branch from 2f09512 to 206603c Compare October 20, 2025 21:13
@jofrn jofrn force-pushed the users/jofrn/gt/07-15-_x86_manage_atomic_load_of_fp_-_int_promotion_in_dag branch from 9174bd5 to 4fda381 Compare October 21, 2025 10:42
@jofrn jofrn force-pushed the users/jofrn/gt/07-15-_x86_add_atomic_vector_tests_for_unaligned_1_sizes branch from 206603c to 0f3bacd Compare October 21, 2025 10:42
@jofrn jofrn force-pushed the users/jofrn/gt/07-15-_x86_manage_atomic_load_of_fp_-_int_promotion_in_dag branch from 4fda381 to b3f7d0b Compare October 21, 2025 15:37
@jofrn jofrn force-pushed the users/jofrn/gt/07-15-_x86_add_atomic_vector_tests_for_unaligned_1_sizes branch from 0f3bacd to 1ed917f Compare October 21, 2025 15:37
@jofrn jofrn requested a review from arsenm October 21, 2025 15:47

jofrn commented Oct 23, 2025

Copy link
Copy Markdown
Contributor Author

Merge activity

  • Oct 23, 3:43 AM UTC: A user started a stack merge that includes this pull request via Graphite.
  • Oct 23, 10:14 AM UTC: Graphite couldn't merge this pull request because a downstack PR [X86] Manage atomic load of fp -> int promotion in DAG #148895 failed to merge.
  • Oct 23, 5:52 PM UTC: Graphite rebased this pull request, because this pull request is set to merge when ready.
  • Oct 23, 6:29 PM UTC: Graphite rebased this pull request as part of a merge.
  • Oct 23, 6:34 PM UTC: Graphite rebased this pull request as part of a merge.
  • Oct 23, 6:49 PM UTC: Graphite rebased this pull request as part of a merge.
  • Oct 23, 6:51 PM UTC: Graphite rebased this pull request as part of a merge.
  • Oct 23, 7:01 PM UTC: Graphite rebased this pull request as part of a merge.
  • Oct 23, 7:03 PM UTC: Graphite rebased this pull request as part of a merge.
  • Oct 23, 7:04 PM UTC: Graphite rebased this pull request as part of a merge.
  • Oct 23, 7:28 PM UTC: Graphite rebased this pull request as part of a merge.
  • Oct 23, 7:38 PM UTC: Graphite rebased this pull request as part of a merge.
  • Oct 23, 8:11 PM UTC: @jofrn merged this pull request with Graphite.

@jofrn jofrn force-pushed the users/jofrn/gt/07-15-_x86_manage_atomic_load_of_fp_-_int_promotion_in_dag branch 4 times, most recently from 6a590e1 to a9e1bc6 Compare October 23, 2025 06:53
@jofrn jofrn force-pushed the users/jofrn/gt/07-15-_x86_manage_atomic_load_of_fp_-_int_promotion_in_dag branch 9 times, most recently from c1fcbd3 to 2848791 Compare October 23, 2025 17:20
Base automatically changed from users/jofrn/gt/07-15-_x86_manage_atomic_load_of_fp_-_int_promotion_in_dag to main October 23, 2025 17:50
@jofrn jofrn force-pushed the users/jofrn/gt/07-15-_x86_add_atomic_vector_tests_for_unaligned_1_sizes branch 9 times, most recently from d48afdf to da000fe Compare October 23, 2025 19:28
Unaligned atomic vectors with size >1 are lowered to calls.
Adding their tests separately here.
@jofrn jofrn force-pushed the users/jofrn/gt/07-15-_x86_add_atomic_vector_tests_for_unaligned_1_sizes branch from da000fe to cff15ed Compare October 23, 2025 19:37
@jofrn jofrn merged commit d87200e into main Oct 23, 2025
7 of 9 checks passed
@jofrn jofrn deleted the users/jofrn/gt/07-15-_x86_add_atomic_vector_tests_for_unaligned_1_sizes branch October 23, 2025 20:11
dvbuka pushed a commit to dvbuka/llvm-project that referenced this pull request Oct 27, 2025
Unaligned atomic vectors with size >1 are lowered to calls.
Adding their tests separately here.
Lukacma pushed a commit to Lukacma/llvm-project that referenced this pull request Oct 29, 2025
Unaligned atomic vectors with size >1 are lowered to calls.
Adding their tests separately here.
jofrn added a commit that referenced this pull request May 18, 2026
Unaligned atomic vector stores with size >1 are lowered to calls.
Adding their tests separately here.

Store-side counterpart to #148896. Stacked below #197165.
pedroMVicente pushed a commit to pedroMVicente/llvm-project that referenced this pull request May 19, 2026
)

Unaligned atomic vector stores with size >1 are lowered to calls.
Adding their tests separately here.

Store-side counterpart to llvm#148896. Stacked below llvm#197165.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants