[X86] Add atomic vector tests for unaligned >1 sizes.#148896
Merged
jofrn merged 1 commit intoOct 23, 2025
Merged
Conversation
This was referenced Jul 15, 2025
Contributor
Author
This was referenced Jul 15, 2025
Member
|
@llvm/pr-subscribers-backend-x86 Author: None (jofrn) ChangesUnaligned atomic vectors with size >1 are lowered to calls. Patch is 21.43 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/148896.diff 1 Files Affected:
diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll
index 9fab8b98b4af0..3e7b73a65fe07 100644
--- a/llvm/test/CodeGen/X86/atomic-load-store.ll
+++ b/llvm/test/CodeGen/X86/atomic-load-store.ll
@@ -270,6 +270,82 @@ define <1 x i64> @atomic_vec1_i64_align(ptr %x) nounwind {
ret <1 x i64> %ret
}
+define <1 x ptr> @atomic_vec1_ptr(ptr %x) nounwind {
+; CHECK-O3-LABEL: atomic_vec1_ptr:
+; CHECK-O3: # %bb.0:
+; CHECK-O3-NEXT: pushq %rax
+; CHECK-O3-NEXT: movq %rdi, %rsi
+; CHECK-O3-NEXT: movq %rsp, %rdx
+; CHECK-O3-NEXT: movl $8, %edi
+; CHECK-O3-NEXT: movl $2, %ecx
+; CHECK-O3-NEXT: callq __atomic_load@PLT
+; CHECK-O3-NEXT: movq (%rsp), %rax
+; CHECK-O3-NEXT: popq %rcx
+; CHECK-O3-NEXT: retq
+;
+; CHECK-SSE-O3-LABEL: atomic_vec1_ptr:
+; CHECK-SSE-O3: # %bb.0:
+; CHECK-SSE-O3-NEXT: pushq %rax
+; CHECK-SSE-O3-NEXT: movq %rdi, %rsi
+; CHECK-SSE-O3-NEXT: movq %rsp, %rdx
+; CHECK-SSE-O3-NEXT: movl $8, %edi
+; CHECK-SSE-O3-NEXT: movl $2, %ecx
+; CHECK-SSE-O3-NEXT: callq __atomic_load@PLT
+; CHECK-SSE-O3-NEXT: movq (%rsp), %rax
+; CHECK-SSE-O3-NEXT: popq %rcx
+; CHECK-SSE-O3-NEXT: retq
+;
+; CHECK-AVX-O3-LABEL: atomic_vec1_ptr:
+; CHECK-AVX-O3: # %bb.0:
+; CHECK-AVX-O3-NEXT: pushq %rax
+; CHECK-AVX-O3-NEXT: movq %rdi, %rsi
+; CHECK-AVX-O3-NEXT: movq %rsp, %rdx
+; CHECK-AVX-O3-NEXT: movl $8, %edi
+; CHECK-AVX-O3-NEXT: movl $2, %ecx
+; CHECK-AVX-O3-NEXT: callq __atomic_load@PLT
+; CHECK-AVX-O3-NEXT: movq (%rsp), %rax
+; CHECK-AVX-O3-NEXT: popq %rcx
+; CHECK-AVX-O3-NEXT: retq
+;
+; CHECK-O0-LABEL: atomic_vec1_ptr:
+; CHECK-O0: # %bb.0:
+; CHECK-O0-NEXT: pushq %rax
+; CHECK-O0-NEXT: movq %rdi, %rsi
+; CHECK-O0-NEXT: movl $8, %edi
+; CHECK-O0-NEXT: movq %rsp, %rdx
+; CHECK-O0-NEXT: movl $2, %ecx
+; CHECK-O0-NEXT: callq __atomic_load@PLT
+; CHECK-O0-NEXT: movq (%rsp), %rax
+; CHECK-O0-NEXT: popq %rcx
+; CHECK-O0-NEXT: retq
+;
+; CHECK-SSE-O0-LABEL: atomic_vec1_ptr:
+; CHECK-SSE-O0: # %bb.0:
+; CHECK-SSE-O0-NEXT: pushq %rax
+; CHECK-SSE-O0-NEXT: movq %rdi, %rsi
+; CHECK-SSE-O0-NEXT: movl $8, %edi
+; CHECK-SSE-O0-NEXT: movq %rsp, %rdx
+; CHECK-SSE-O0-NEXT: movl $2, %ecx
+; CHECK-SSE-O0-NEXT: callq __atomic_load@PLT
+; CHECK-SSE-O0-NEXT: movq (%rsp), %rax
+; CHECK-SSE-O0-NEXT: popq %rcx
+; CHECK-SSE-O0-NEXT: retq
+;
+; CHECK-AVX-O0-LABEL: atomic_vec1_ptr:
+; CHECK-AVX-O0: # %bb.0:
+; CHECK-AVX-O0-NEXT: pushq %rax
+; CHECK-AVX-O0-NEXT: movq %rdi, %rsi
+; CHECK-AVX-O0-NEXT: movl $8, %edi
+; CHECK-AVX-O0-NEXT: movq %rsp, %rdx
+; CHECK-AVX-O0-NEXT: movl $2, %ecx
+; CHECK-AVX-O0-NEXT: callq __atomic_load@PLT
+; CHECK-AVX-O0-NEXT: movq (%rsp), %rax
+; CHECK-AVX-O0-NEXT: popq %rcx
+; CHECK-AVX-O0-NEXT: retq
+ %ret = load atomic <1 x ptr>, ptr %x acquire, align 4
+ ret <1 x ptr> %ret
+}
+
define <1 x half> @atomic_vec1_half(ptr %x) {
; CHECK-O3-LABEL: atomic_vec1_half:
; CHECK-O3: # %bb.0:
@@ -386,3 +462,515 @@ define <1 x double> @atomic_vec1_double_align(ptr %x) nounwind {
%ret = load atomic <1 x double>, ptr %x acquire, align 8
ret <1 x double> %ret
}
+
+define <1 x i64> @atomic_vec1_i64(ptr %x) nounwind {
+; CHECK-O3-LABEL: atomic_vec1_i64:
+; CHECK-O3: # %bb.0:
+; CHECK-O3-NEXT: pushq %rax
+; CHECK-O3-NEXT: movq %rdi, %rsi
+; CHECK-O3-NEXT: movq %rsp, %rdx
+; CHECK-O3-NEXT: movl $8, %edi
+; CHECK-O3-NEXT: movl $2, %ecx
+; CHECK-O3-NEXT: callq __atomic_load@PLT
+; CHECK-O3-NEXT: movq (%rsp), %rax
+; CHECK-O3-NEXT: popq %rcx
+; CHECK-O3-NEXT: retq
+;
+; CHECK-SSE-O3-LABEL: atomic_vec1_i64:
+; CHECK-SSE-O3: # %bb.0:
+; CHECK-SSE-O3-NEXT: pushq %rax
+; CHECK-SSE-O3-NEXT: movq %rdi, %rsi
+; CHECK-SSE-O3-NEXT: movq %rsp, %rdx
+; CHECK-SSE-O3-NEXT: movl $8, %edi
+; CHECK-SSE-O3-NEXT: movl $2, %ecx
+; CHECK-SSE-O3-NEXT: callq __atomic_load@PLT
+; CHECK-SSE-O3-NEXT: movq (%rsp), %rax
+; CHECK-SSE-O3-NEXT: popq %rcx
+; CHECK-SSE-O3-NEXT: retq
+;
+; CHECK-AVX-O3-LABEL: atomic_vec1_i64:
+; CHECK-AVX-O3: # %bb.0:
+; CHECK-AVX-O3-NEXT: pushq %rax
+; CHECK-AVX-O3-NEXT: movq %rdi, %rsi
+; CHECK-AVX-O3-NEXT: movq %rsp, %rdx
+; CHECK-AVX-O3-NEXT: movl $8, %edi
+; CHECK-AVX-O3-NEXT: movl $2, %ecx
+; CHECK-AVX-O3-NEXT: callq __atomic_load@PLT
+; CHECK-AVX-O3-NEXT: movq (%rsp), %rax
+; CHECK-AVX-O3-NEXT: popq %rcx
+; CHECK-AVX-O3-NEXT: retq
+;
+; CHECK-O0-LABEL: atomic_vec1_i64:
+; CHECK-O0: # %bb.0:
+; CHECK-O0-NEXT: pushq %rax
+; CHECK-O0-NEXT: movq %rdi, %rsi
+; CHECK-O0-NEXT: movl $8, %edi
+; CHECK-O0-NEXT: movq %rsp, %rdx
+; CHECK-O0-NEXT: movl $2, %ecx
+; CHECK-O0-NEXT: callq __atomic_load@PLT
+; CHECK-O0-NEXT: movq (%rsp), %rax
+; CHECK-O0-NEXT: popq %rcx
+; CHECK-O0-NEXT: retq
+;
+; CHECK-SSE-O0-LABEL: atomic_vec1_i64:
+; CHECK-SSE-O0: # %bb.0:
+; CHECK-SSE-O0-NEXT: pushq %rax
+; CHECK-SSE-O0-NEXT: movq %rdi, %rsi
+; CHECK-SSE-O0-NEXT: movl $8, %edi
+; CHECK-SSE-O0-NEXT: movq %rsp, %rdx
+; CHECK-SSE-O0-NEXT: movl $2, %ecx
+; CHECK-SSE-O0-NEXT: callq __atomic_load@PLT
+; CHECK-SSE-O0-NEXT: movq (%rsp), %rax
+; CHECK-SSE-O0-NEXT: popq %rcx
+; CHECK-SSE-O0-NEXT: retq
+;
+; CHECK-AVX-O0-LABEL: atomic_vec1_i64:
+; CHECK-AVX-O0: # %bb.0:
+; CHECK-AVX-O0-NEXT: pushq %rax
+; CHECK-AVX-O0-NEXT: movq %rdi, %rsi
+; CHECK-AVX-O0-NEXT: movl $8, %edi
+; CHECK-AVX-O0-NEXT: movq %rsp, %rdx
+; CHECK-AVX-O0-NEXT: movl $2, %ecx
+; CHECK-AVX-O0-NEXT: callq __atomic_load@PLT
+; CHECK-AVX-O0-NEXT: movq (%rsp), %rax
+; CHECK-AVX-O0-NEXT: popq %rcx
+; CHECK-AVX-O0-NEXT: retq
+ %ret = load atomic <1 x i64>, ptr %x acquire, align 4
+ ret <1 x i64> %ret
+}
+
+define <1 x double> @atomic_vec1_double(ptr %x) nounwind {
+; CHECK-O3-LABEL: atomic_vec1_double:
+; CHECK-O3: # %bb.0:
+; CHECK-O3-NEXT: pushq %rax
+; CHECK-O3-NEXT: movq %rdi, %rsi
+; CHECK-O3-NEXT: movq %rsp, %rdx
+; CHECK-O3-NEXT: movl $8, %edi
+; CHECK-O3-NEXT: movl $2, %ecx
+; CHECK-O3-NEXT: callq __atomic_load@PLT
+; CHECK-O3-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-O3-NEXT: popq %rax
+; CHECK-O3-NEXT: retq
+;
+; CHECK-SSE-O3-LABEL: atomic_vec1_double:
+; CHECK-SSE-O3: # %bb.0:
+; CHECK-SSE-O3-NEXT: pushq %rax
+; CHECK-SSE-O3-NEXT: movq %rdi, %rsi
+; CHECK-SSE-O3-NEXT: movq %rsp, %rdx
+; CHECK-SSE-O3-NEXT: movl $8, %edi
+; CHECK-SSE-O3-NEXT: movl $2, %ecx
+; CHECK-SSE-O3-NEXT: callq __atomic_load@PLT
+; CHECK-SSE-O3-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-SSE-O3-NEXT: popq %rax
+; CHECK-SSE-O3-NEXT: retq
+;
+; CHECK-AVX-O3-LABEL: atomic_vec1_double:
+; CHECK-AVX-O3: # %bb.0:
+; CHECK-AVX-O3-NEXT: pushq %rax
+; CHECK-AVX-O3-NEXT: movq %rdi, %rsi
+; CHECK-AVX-O3-NEXT: movq %rsp, %rdx
+; CHECK-AVX-O3-NEXT: movl $8, %edi
+; CHECK-AVX-O3-NEXT: movl $2, %ecx
+; CHECK-AVX-O3-NEXT: callq __atomic_load@PLT
+; CHECK-AVX-O3-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-AVX-O3-NEXT: popq %rax
+; CHECK-AVX-O3-NEXT: retq
+;
+; CHECK-O0-LABEL: atomic_vec1_double:
+; CHECK-O0: # %bb.0:
+; CHECK-O0-NEXT: pushq %rax
+; CHECK-O0-NEXT: movq %rdi, %rsi
+; CHECK-O0-NEXT: movl $8, %edi
+; CHECK-O0-NEXT: movq %rsp, %rdx
+; CHECK-O0-NEXT: movl $2, %ecx
+; CHECK-O0-NEXT: callq __atomic_load@PLT
+; CHECK-O0-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-O0-NEXT: popq %rax
+; CHECK-O0-NEXT: retq
+;
+; CHECK-SSE-O0-LABEL: atomic_vec1_double:
+; CHECK-SSE-O0: # %bb.0:
+; CHECK-SSE-O0-NEXT: pushq %rax
+; CHECK-SSE-O0-NEXT: movq %rdi, %rsi
+; CHECK-SSE-O0-NEXT: movl $8, %edi
+; CHECK-SSE-O0-NEXT: movq %rsp, %rdx
+; CHECK-SSE-O0-NEXT: movl $2, %ecx
+; CHECK-SSE-O0-NEXT: callq __atomic_load@PLT
+; CHECK-SSE-O0-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-SSE-O0-NEXT: popq %rax
+; CHECK-SSE-O0-NEXT: retq
+;
+; CHECK-AVX-O0-LABEL: atomic_vec1_double:
+; CHECK-AVX-O0: # %bb.0:
+; CHECK-AVX-O0-NEXT: pushq %rax
+; CHECK-AVX-O0-NEXT: movq %rdi, %rsi
+; CHECK-AVX-O0-NEXT: movl $8, %edi
+; CHECK-AVX-O0-NEXT: movq %rsp, %rdx
+; CHECK-AVX-O0-NEXT: movl $2, %ecx
+; CHECK-AVX-O0-NEXT: callq __atomic_load@PLT
+; CHECK-AVX-O0-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-AVX-O0-NEXT: popq %rax
+; CHECK-AVX-O0-NEXT: retq
+ %ret = load atomic <1 x double>, ptr %x acquire, align 4
+ ret <1 x double> %ret
+}
+
+define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind {
+; CHECK-O3-LABEL: atomic_vec2_i32:
+; CHECK-O3: # %bb.0:
+; CHECK-O3-NEXT: pushq %rax
+; CHECK-O3-NEXT: movq %rdi, %rsi
+; CHECK-O3-NEXT: movq %rsp, %rdx
+; CHECK-O3-NEXT: movl $8, %edi
+; CHECK-O3-NEXT: movl $2, %ecx
+; CHECK-O3-NEXT: callq __atomic_load@PLT
+; CHECK-O3-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-O3-NEXT: popq %rax
+; CHECK-O3-NEXT: retq
+;
+; CHECK-SSE-O3-LABEL: atomic_vec2_i32:
+; CHECK-SSE-O3: # %bb.0:
+; CHECK-SSE-O3-NEXT: pushq %rax
+; CHECK-SSE-O3-NEXT: movq %rdi, %rsi
+; CHECK-SSE-O3-NEXT: movq %rsp, %rdx
+; CHECK-SSE-O3-NEXT: movl $8, %edi
+; CHECK-SSE-O3-NEXT: movl $2, %ecx
+; CHECK-SSE-O3-NEXT: callq __atomic_load@PLT
+; CHECK-SSE-O3-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-SSE-O3-NEXT: popq %rax
+; CHECK-SSE-O3-NEXT: retq
+;
+; CHECK-AVX-O3-LABEL: atomic_vec2_i32:
+; CHECK-AVX-O3: # %bb.0:
+; CHECK-AVX-O3-NEXT: pushq %rax
+; CHECK-AVX-O3-NEXT: movq %rdi, %rsi
+; CHECK-AVX-O3-NEXT: movq %rsp, %rdx
+; CHECK-AVX-O3-NEXT: movl $8, %edi
+; CHECK-AVX-O3-NEXT: movl $2, %ecx
+; CHECK-AVX-O3-NEXT: callq __atomic_load@PLT
+; CHECK-AVX-O3-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-AVX-O3-NEXT: popq %rax
+; CHECK-AVX-O3-NEXT: retq
+;
+; CHECK-O0-LABEL: atomic_vec2_i32:
+; CHECK-O0: # %bb.0:
+; CHECK-O0-NEXT: pushq %rax
+; CHECK-O0-NEXT: movq %rdi, %rsi
+; CHECK-O0-NEXT: movl $8, %edi
+; CHECK-O0-NEXT: movq %rsp, %rdx
+; CHECK-O0-NEXT: movl $2, %ecx
+; CHECK-O0-NEXT: callq __atomic_load@PLT
+; CHECK-O0-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; CHECK-O0-NEXT: popq %rax
+; CHECK-O0-NEXT: retq
+;
+; CHECK-SSE-O0-LABEL: atomic_vec2_i32:
+; CHECK-SSE-O0: # %bb.0:
+; CHECK-SSE-O0-NEXT: pushq %rax
+; CHECK-SSE-O0-NEXT: movq %rdi, %rsi
+; CHECK-SSE-O0-NEXT: movl $8, %edi
+; CHECK-SSE-O0-NEXT: movq %rsp, %rdx
+; CHECK-SSE-O0-NEXT: movl $2, %ecx
+; CHECK-SSE-O0-NEXT: callq __atomic_load@PLT
+; CHECK-SSE-O0-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; CHECK-SSE-O0-NEXT: popq %rax
+; CHECK-SSE-O0-NEXT: retq
+;
+; CHECK-AVX-O0-LABEL: atomic_vec2_i32:
+; CHECK-AVX-O0: # %bb.0:
+; CHECK-AVX-O0-NEXT: pushq %rax
+; CHECK-AVX-O0-NEXT: movq %rdi, %rsi
+; CHECK-AVX-O0-NEXT: movl $8, %edi
+; CHECK-AVX-O0-NEXT: movq %rsp, %rdx
+; CHECK-AVX-O0-NEXT: movl $2, %ecx
+; CHECK-AVX-O0-NEXT: callq __atomic_load@PLT
+; CHECK-AVX-O0-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
+; CHECK-AVX-O0-NEXT: popq %rax
+; CHECK-AVX-O0-NEXT: retq
+ %ret = load atomic <2 x i32>, ptr %x acquire, align 4
+ ret <2 x i32> %ret
+}
+
+define <4 x float> @atomic_vec4_float(ptr %x) nounwind {
+; CHECK-O3-LABEL: atomic_vec4_float:
+; CHECK-O3: # %bb.0:
+; CHECK-O3-NEXT: subq $24, %rsp
+; CHECK-O3-NEXT: movq %rdi, %rsi
+; CHECK-O3-NEXT: movq %rsp, %rdx
+; CHECK-O3-NEXT: movl $16, %edi
+; CHECK-O3-NEXT: movl $2, %ecx
+; CHECK-O3-NEXT: callq __atomic_load@PLT
+; CHECK-O3-NEXT: movaps (%rsp), %xmm0
+; CHECK-O3-NEXT: addq $24, %rsp
+; CHECK-O3-NEXT: retq
+;
+; CHECK-SSE-O3-LABEL: atomic_vec4_float:
+; CHECK-SSE-O3: # %bb.0:
+; CHECK-SSE-O3-NEXT: subq $24, %rsp
+; CHECK-SSE-O3-NEXT: movq %rdi, %rsi
+; CHECK-SSE-O3-NEXT: movq %rsp, %rdx
+; CHECK-SSE-O3-NEXT: movl $16, %edi
+; CHECK-SSE-O3-NEXT: movl $2, %ecx
+; CHECK-SSE-O3-NEXT: callq __atomic_load@PLT
+; CHECK-SSE-O3-NEXT: movaps (%rsp), %xmm0
+; CHECK-SSE-O3-NEXT: addq $24, %rsp
+; CHECK-SSE-O3-NEXT: retq
+;
+; CHECK-AVX-O3-LABEL: atomic_vec4_float:
+; CHECK-AVX-O3: # %bb.0:
+; CHECK-AVX-O3-NEXT: subq $24, %rsp
+; CHECK-AVX-O3-NEXT: movq %rdi, %rsi
+; CHECK-AVX-O3-NEXT: movq %rsp, %rdx
+; CHECK-AVX-O3-NEXT: movl $16, %edi
+; CHECK-AVX-O3-NEXT: movl $2, %ecx
+; CHECK-AVX-O3-NEXT: callq __atomic_load@PLT
+; CHECK-AVX-O3-NEXT: vmovaps (%rsp), %xmm0
+; CHECK-AVX-O3-NEXT: addq $24, %rsp
+; CHECK-AVX-O3-NEXT: retq
+;
+; CHECK-O0-LABEL: atomic_vec4_float:
+; CHECK-O0: # %bb.0:
+; CHECK-O0-NEXT: subq $24, %rsp
+; CHECK-O0-NEXT: movq %rdi, %rsi
+; CHECK-O0-NEXT: movl $16, %edi
+; CHECK-O0-NEXT: movq %rsp, %rdx
+; CHECK-O0-NEXT: movl $2, %ecx
+; CHECK-O0-NEXT: callq __atomic_load@PLT
+; CHECK-O0-NEXT: movaps (%rsp), %xmm0
+; CHECK-O0-NEXT: addq $24, %rsp
+; CHECK-O0-NEXT: retq
+;
+; CHECK-SSE-O0-LABEL: atomic_vec4_float:
+; CHECK-SSE-O0: # %bb.0:
+; CHECK-SSE-O0-NEXT: subq $24, %rsp
+; CHECK-SSE-O0-NEXT: movq %rdi, %rsi
+; CHECK-SSE-O0-NEXT: movl $16, %edi
+; CHECK-SSE-O0-NEXT: movq %rsp, %rdx
+; CHECK-SSE-O0-NEXT: movl $2, %ecx
+; CHECK-SSE-O0-NEXT: callq __atomic_load@PLT
+; CHECK-SSE-O0-NEXT: movaps (%rsp), %xmm0
+; CHECK-SSE-O0-NEXT: addq $24, %rsp
+; CHECK-SSE-O0-NEXT: retq
+;
+; CHECK-AVX-O0-LABEL: atomic_vec4_float:
+; CHECK-AVX-O0: # %bb.0:
+; CHECK-AVX-O0-NEXT: subq $24, %rsp
+; CHECK-AVX-O0-NEXT: movq %rdi, %rsi
+; CHECK-AVX-O0-NEXT: movl $16, %edi
+; CHECK-AVX-O0-NEXT: movq %rsp, %rdx
+; CHECK-AVX-O0-NEXT: movl $2, %ecx
+; CHECK-AVX-O0-NEXT: callq __atomic_load@PLT
+; CHECK-AVX-O0-NEXT: vmovaps (%rsp), %xmm0
+; CHECK-AVX-O0-NEXT: addq $24, %rsp
+; CHECK-AVX-O0-NEXT: retq
+ %ret = load atomic <4 x float>, ptr %x acquire, align 4
+ ret <4 x float> %ret
+}
+
+define <8 x double> @atomic_vec8_double(ptr %x) nounwind {
+; CHECK-O3-LABEL: atomic_vec8_double:
+; CHECK-O3: # %bb.0:
+; CHECK-O3-NEXT: subq $72, %rsp
+; CHECK-O3-NEXT: movq %rdi, %rsi
+; CHECK-O3-NEXT: movq %rsp, %rdx
+; CHECK-O3-NEXT: movl $64, %edi
+; CHECK-O3-NEXT: movl $2, %ecx
+; CHECK-O3-NEXT: callq __atomic_load@PLT
+; CHECK-O3-NEXT: movaps (%rsp), %xmm0
+; CHECK-O3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
+; CHECK-O3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2
+; CHECK-O3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm3
+; CHECK-O3-NEXT: addq $72, %rsp
+; CHECK-O3-NEXT: retq
+;
+; CHECK-SSE-O3-LABEL: atomic_vec8_double:
+; CHECK-SSE-O3: # %bb.0:
+; CHECK-SSE-O3-NEXT: subq $72, %rsp
+; CHECK-SSE-O3-NEXT: movq %rdi, %rsi
+; CHECK-SSE-O3-NEXT: movq %rsp, %rdx
+; CHECK-SSE-O3-NEXT: movl $64, %edi
+; CHECK-SSE-O3-NEXT: movl $2, %ecx
+; CHECK-SSE-O3-NEXT: callq __atomic_load@PLT
+; CHECK-SSE-O3-NEXT: movaps (%rsp), %xmm0
+; CHECK-SSE-O3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
+; CHECK-SSE-O3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2
+; CHECK-SSE-O3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm3
+; CHECK-SSE-O3-NEXT: addq $72, %rsp
+; CHECK-SSE-O3-NEXT: retq
+;
+; CHECK-O0-LABEL: atomic_vec8_double:
+; CHECK-O0: # %bb.0:
+; CHECK-O0-NEXT: subq $72, %rsp
+; CHECK-O0-NEXT: movq %rdi, %rsi
+; CHECK-O0-NEXT: movl $64, %edi
+; CHECK-O0-NEXT: movq %rsp, %rdx
+; CHECK-O0-NEXT: movl $2, %ecx
+; CHECK-O0-NEXT: callq __atomic_load@PLT
+; CHECK-O0-NEXT: movapd (%rsp), %xmm0
+; CHECK-O0-NEXT: movapd {{[0-9]+}}(%rsp), %xmm1
+; CHECK-O0-NEXT: movapd {{[0-9]+}}(%rsp), %xmm2
+; CHECK-O0-NEXT: movapd {{[0-9]+}}(%rsp), %xmm3
+; CHECK-O0-NEXT: addq $72, %rsp
+; CHECK-O0-NEXT: retq
+;
+; CHECK-SSE-O0-LABEL: atomic_vec8_double:
+; CHECK-SSE-O0: # %bb.0:
+; CHECK-SSE-O0-NEXT: subq $72, %rsp
+; CHECK-SSE-O0-NEXT: movq %rdi, %rsi
+; CHECK-SSE-O0-NEXT: movl $64, %edi
+; CHECK-SSE-O0-NEXT: movq %rsp, %rdx
+; CHECK-SSE-O0-NEXT: movl $2, %ecx
+; CHECK-SSE-O0-NEXT: callq __atomic_load@PLT
+; CHECK-SSE-O0-NEXT: movapd (%rsp), %xmm0
+; CHECK-SSE-O0-NEXT: movapd {{[0-9]+}}(%rsp), %xmm1
+; CHECK-SSE-O0-NEXT: movapd {{[0-9]+}}(%rsp), %xmm2
+; CHECK-SSE-O0-NEXT: movapd {{[0-9]+}}(%rsp), %xmm3
+; CHECK-SSE-O0-NEXT: addq $72, %rsp
+; CHECK-SSE-O0-NEXT: retq
+ %ret = load atomic <8 x double>, ptr %x acquire, align 4
+ ret <8 x double> %ret
+}
+
+define <16 x bfloat> @atomic_vec16_bfloat(ptr %x) nounwind {
+; CHECK-O3-LABEL: atomic_vec16_bfloat:
+; CHECK-O3: # %bb.0:
+; CHECK-O3-NEXT: subq $40, %rsp
+; CHECK-O3-NEXT: movq %rdi, %rsi
+; CHECK-O3-NEXT: movq %rsp, %rdx
+; CHECK-O3-NEXT: movl $32, %edi
+; CHECK-O3-NEXT: movl $2, %ecx
+; CHECK-O3-NEXT: callq __atomic_load@PLT
+; CHECK-O3-NEXT: movaps (%rsp), %xmm0
+; CHECK-O3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
+; CHECK-O3-NEXT: addq $40, %rsp
+; CHECK-O3-NEXT: retq
+;
+; CHECK-SSE-O3-LABEL: atomic_vec16_bfloat:
+; CHECK-SSE-O3: # %bb.0:
+; CHECK-SSE-O3-NEXT: subq $40, %rsp
+; CHECK-SSE-O3-NEXT: movq %rdi, %rsi
+; CHECK-SSE-O3-NEXT: movq %rsp, %rdx
+; CHECK-SSE-O3-NEXT: movl $32, %edi
+; CHECK-SSE-O3-NEXT: movl $2, %ecx
+; CHECK-SSE-O3-NEXT: callq __atomic_load@PLT
+; CHECK-SSE-O3-NEXT: movaps (%rsp), %xmm0
+; CHECK-SSE-O3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
+; CHECK-SSE-O3-NEXT: addq $40, %rsp
+; CHECK-SSE-O3-NEXT: retq
+;
+; CHECK-AVX-O3-LABEL: atomic_vec16_bfloat:
+; CHECK-AVX-O3: # %bb.0:
+; CHECK-AVX-O3-NEXT: subq $40, %rsp
+; CHECK-AVX-O3-NEXT: movq %rdi, %rsi
+; CHECK-AVX-O3-NEXT: movq %rsp, %rdx
+; CHECK-AVX-O3-NEXT: movl $32, %edi
+; CHECK-AVX-O3-NEXT: movl $2, %ecx
+; CHECK-AVX-O3-NEXT: callq __atomic_load@PLT
+; CHECK-AVX-O3-NEXT: vmovups (%rsp), %ymm0
+; CHECK-AVX-O3-NEXT: addq $40, %rsp
+; CHECK-AVX-O3-NEXT: retq
+;
+; CHECK-O0-LABEL: atomic_vec16_bfloat:
+; CHECK-O0: # %bb.0:
+; CHECK-O0-NEXT: subq $40, %rsp
+; CHECK-O0-NEXT: movq %rdi, %rsi
+; CHECK-O0-NEXT: movl $32, %edi
+; CHECK-O0-NEXT: movq %rsp, %rdx
+; CHECK-O0-NEXT: movl $2, %ecx
+; CHECK-O0-NEXT: callq __atomic_load@PLT
+; CHECK-O0-NEXT: movaps (%rsp), %xmm0
+; CHECK-O0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
+; CHECK-O0-NEXT: addq $40, %rsp
+; CHECK-O0-NEXT: retq
+;
+; CHECK-SSE-O0-LABEL: atomic_vec16_bfloat:
+; CHECK-SSE-O0: # %bb.0:
+; CHECK-SSE-O0-NEXT: subq $40, %rsp
+; CHECK-SSE-O0-NEXT: movq %rdi, %rsi
+; CHECK-SSE-O0-NEXT: movl $32, %edi
+; CHECK-SSE-O0-NEXT: movq %rsp, %rdx
+; CHECK-SSE-O0-NEXT: movl $2, %ecx
+; CHECK-SSE-O0-NEXT: callq __atomic_load@PLT
+; CHECK-SSE-O0-NEXT: movaps (%rsp), %xmm0
+; CHECK-SSE-O0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
+; CHECK-SSE-O0-NEXT: addq $40, %rsp
+; CHECK-SSE-O0-NEXT: retq
+;
+; CHECK-AVX-O0-LABEL: atomic_vec16_bfloat:
+; CHECK-AVX-O0: # %bb.0:
+; CHECK-AVX-O0-NEXT: subq $40, %rsp
+; CHECK-AVX-O0-NEXT: movq %rdi, %rsi
+; CHECK-AVX-O0-NEXT: movl $32, %edi
+; CHECK-AVX-O0-NEXT: movq %rsp, %rdx
+; CHECK-AVX-O0-NEXT: movl $2, %ecx
+; CHECK-AVX-O0-NEXT: callq __atomic_load@PLT
+; CHECK-AVX-O0-NEXT: vmovups (%rsp), %ymm0
+; CHECK-AVX-O0-NEXT: addq $40, %rsp
+; CHECK-AVX-O0-NEXT: retq
+ %ret = load atomic <16 x bfloat>, ptr %x acquire, align 4
+ ret <16 x bfloat> %ret
+}
+
+define <32 x half> @atomic_vec32_half(ptr %x) nounwind {
+; CHECK-O3-LABEL: atomic_vec32_half:
+; CHECK-O3: # %bb.0:
+; CHECK-O3-NEXT: subq $72, %rsp
+; CHECK-O3-NEXT: movq %rdi, %rsi
+; CHECK-O3-NEXT: movq %rsp, %rdx
+; CHECK-O3-NEXT: movl $64, %edi
+; CHECK-O3-NEXT: movl $2, %ecx
+; CHECK-O3-NEXT: callq __atomic_load@PLT
+; CHECK-O3-NEXT: movaps (%rsp), %xmm0
+; CHECK-O3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
+; CHECK-O3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2
+; CHECK-O3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm3
+; CHECK-O3-NEXT: addq $72, %rsp
+; CHECK-O3-NEXT: retq
+;
+; CHECK-SSE-O3-LABEL: atomic_vec32_half:
+; CHECK-SSE-O3: # %bb.0:
+; CHECK-SSE-O3-NEXT: subq $72, %rsp
+; CHECK-SSE-O3-NEXT: movq %rdi, %rsi
+; CHECK-SSE-O3-NEXT: movq %rsp, %rdx
+; CHECK-SSE-O3-NEXT: movl $64, %edi
+; CHECK-SSE-O3-NEXT: movl $2, %ecx
+; CHECK-SSE-O3-NEXT: callq __atomic_load@PLT
+; CHECK-SSE-O3-NEXT: movaps (%rsp), %xmm0
+; CHECK-SSE-O3-NEXT: mov...
[truncated]
|
27e1e69 to
e9ba6ed
Compare
0e8e100 to
261a6d6
Compare
261a6d6 to
d25a454
Compare
e9ba6ed to
a162961
Compare
d25a454 to
7779dac
Compare
a162961 to
2f09512
Compare
7779dac to
9174bd5
Compare
2f09512 to
206603c
Compare
9174bd5 to
4fda381
Compare
206603c to
0f3bacd
Compare
4fda381 to
b3f7d0b
Compare
0f3bacd to
1ed917f
Compare
arsenm
approved these changes
Oct 22, 2025
Contributor
Author
Merge activity
|
6a590e1 to
a9e1bc6
Compare
c1fcbd3 to
2848791
Compare
Base automatically changed from
users/jofrn/gt/07-15-_x86_manage_atomic_load_of_fp_-_int_promotion_in_dag
to
main
October 23, 2025 17:50
d48afdf to
da000fe
Compare
Unaligned atomic vectors with size >1 are lowered to calls. Adding their tests separately here.
da000fe to
cff15ed
Compare
dvbuka
pushed a commit
to dvbuka/llvm-project
that referenced
this pull request
Oct 27, 2025
Unaligned atomic vectors with size >1 are lowered to calls. Adding their tests separately here.
Lukacma
pushed a commit
to Lukacma/llvm-project
that referenced
this pull request
Oct 29, 2025
Unaligned atomic vectors with size >1 are lowered to calls. Adding their tests separately here.
jofrn
added a commit
that referenced
this pull request
May 18, 2026
pedroMVicente
pushed a commit
to pedroMVicente/llvm-project
that referenced
this pull request
May 19, 2026
) Unaligned atomic vector stores with size >1 are lowered to calls. Adding their tests separately here. Store-side counterpart to llvm#148896. Stacked below llvm#197165.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.This suggestion is invalid because no changes were made to the code.Suggestions cannot be applied while the pull request is closed.Suggestions cannot be applied while viewing a subset of changes.Only one suggestion per line can be applied in a batch.Add this suggestion to a batch that can be applied as a single commit.Applying suggestions on deleted lines is not supported.You must change the existing code in this line in order to create a valid suggestion.Outdated suggestions cannot be applied.This suggestion has been applied or marked resolved.Suggestions cannot be applied from pending reviews.Suggestions cannot be applied on multi-line comments.Suggestions cannot be applied while the pull request is queued to merge.Suggestion cannot be applied right now. Please check back later.

Unaligned atomic vectors with size >1 are lowered to calls.
Adding their tests separately here.