[X86] Manage atomic store of fp -> int promotion in DAG#197166
Conversation
|
@llvm/pr-subscribers-backend-x86 Author: jofrn ChangesWhen lowering atomic <1 x T> vector types with floats, selection can fail since Store-side counterpart to #148895. Stacked on top of #197165. 2 Files Affected:
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index aa5b864df5936..fea1caf0854f5 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -2753,6 +2753,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f32, MVT::i32);
setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f64, MVT::i64);
+ setOperationPromotedToType(ISD::ATOMIC_STORE, MVT::f16, MVT::i16);
+ setOperationPromotedToType(ISD::ATOMIC_STORE, MVT::f32, MVT::i32);
+ setOperationPromotedToType(ISD::ATOMIC_STORE, MVT::f64, MVT::i64);
+
// We have target-specific dag combine patterns for the following nodes:
setTargetDAGCombine({ISD::VECTOR_SHUFFLE,
ISD::SCALAR_TO_VECTOR,
diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll
index 8e74276019e17..549015c37e2d0 100644
--- a/llvm/test/CodeGen/X86/atomic-load-store.ll
+++ b/llvm/test/CodeGen/X86/atomic-load-store.ll
@@ -222,6 +222,140 @@ define void @store_atomic_vec1_ptr(ptr %x, <1 x ptr> %v) nounwind {
ret void
}
+define void @store_atomic_vec1_bfloat(ptr %x, <1 x bfloat> %v) {
+; CHECK-SSE-O3-LABEL: store_atomic_vec1_bfloat:
+; CHECK-SSE-O3: # %bb.0:
+; CHECK-SSE-O3-NEXT: pextrw $0, %xmm0, %eax
+; CHECK-SSE-O3-NEXT: movw %ax, (%rdi)
+; CHECK-SSE-O3-NEXT: retq
+;
+; CHECK-AVX-O3-LABEL: store_atomic_vec1_bfloat:
+; CHECK-AVX-O3: # %bb.0:
+; CHECK-AVX-O3-NEXT: vpextrw $0, %xmm0, %eax
+; CHECK-AVX-O3-NEXT: movw %ax, (%rdi)
+; CHECK-AVX-O3-NEXT: retq
+;
+; CHECK-SSE-O0-LABEL: store_atomic_vec1_bfloat:
+; CHECK-SSE-O0: # %bb.0:
+; CHECK-SSE-O0-NEXT: pushq %rax
+; CHECK-SSE-O0-NEXT: .cfi_def_cfa_offset 16
+; CHECK-SSE-O0-NEXT: movq %rdi, (%rsp) # 8-byte Spill
+; CHECK-SSE-O0-NEXT: pextrw $0, %xmm0, %eax
+; CHECK-SSE-O0-NEXT: movw %ax, %cx
+; CHECK-SSE-O0-NEXT: # implicit-def: $eax
+; CHECK-SSE-O0-NEXT: movw %cx, %ax
+; CHECK-SSE-O0-NEXT: shll $16, %eax
+; CHECK-SSE-O0-NEXT: movd %eax, %xmm0
+; CHECK-SSE-O0-NEXT: callq __truncsfbf2@PLT
+; CHECK-SSE-O0-NEXT: movq (%rsp), %rdi # 8-byte Reload
+; CHECK-SSE-O0-NEXT: pextrw $0, %xmm0, %eax
+; CHECK-SSE-O0-NEXT: # kill: def $ax killed $ax killed $eax
+; CHECK-SSE-O0-NEXT: movw %ax, (%rdi)
+; CHECK-SSE-O0-NEXT: popq %rax
+; CHECK-SSE-O0-NEXT: .cfi_def_cfa_offset 8
+; CHECK-SSE-O0-NEXT: retq
+;
+; CHECK-AVX-O0-LABEL: store_atomic_vec1_bfloat:
+; CHECK-AVX-O0: # %bb.0:
+; CHECK-AVX-O0-NEXT: pushq %rax
+; CHECK-AVX-O0-NEXT: .cfi_def_cfa_offset 16
+; CHECK-AVX-O0-NEXT: movq %rdi, (%rsp) # 8-byte Spill
+; CHECK-AVX-O0-NEXT: vpextrw $0, %xmm0, %eax
+; CHECK-AVX-O0-NEXT: movw %ax, %cx
+; CHECK-AVX-O0-NEXT: # implicit-def: $eax
+; CHECK-AVX-O0-NEXT: movw %cx, %ax
+; CHECK-AVX-O0-NEXT: shll $16, %eax
+; CHECK-AVX-O0-NEXT: vmovd %eax, %xmm0
+; CHECK-AVX-O0-NEXT: callq __truncsfbf2@PLT
+; CHECK-AVX-O0-NEXT: movq (%rsp), %rdi # 8-byte Reload
+; CHECK-AVX-O0-NEXT: vpextrw $0, %xmm0, %eax
+; CHECK-AVX-O0-NEXT: # kill: def $ax killed $ax killed $eax
+; CHECK-AVX-O0-NEXT: movw %ax, (%rdi)
+; CHECK-AVX-O0-NEXT: popq %rax
+; CHECK-AVX-O0-NEXT: .cfi_def_cfa_offset 8
+; CHECK-AVX-O0-NEXT: retq
+ store atomic <1 x bfloat> %v, ptr %x release, align 2
+ ret void
+}
+
+define void @store_atomic_vec1_half(ptr %x, <1 x half> %v) {
+; CHECK-SSE-O3-LABEL: store_atomic_vec1_half:
+; CHECK-SSE-O3: # %bb.0:
+; CHECK-SSE-O3-NEXT: pextrw $0, %xmm0, %eax
+; CHECK-SSE-O3-NEXT: movw %ax, (%rdi)
+; CHECK-SSE-O3-NEXT: retq
+;
+; CHECK-AVX-O3-LABEL: store_atomic_vec1_half:
+; CHECK-AVX-O3: # %bb.0:
+; CHECK-AVX-O3-NEXT: vpextrw $0, %xmm0, %eax
+; CHECK-AVX-O3-NEXT: movw %ax, (%rdi)
+; CHECK-AVX-O3-NEXT: retq
+;
+; CHECK-SSE-O0-LABEL: store_atomic_vec1_half:
+; CHECK-SSE-O0: # %bb.0:
+; CHECK-SSE-O0-NEXT: pextrw $0, %xmm0, %eax
+; CHECK-SSE-O0-NEXT: # kill: def $ax killed $ax killed $eax
+; CHECK-SSE-O0-NEXT: movw %ax, (%rdi)
+; CHECK-SSE-O0-NEXT: retq
+;
+; CHECK-AVX-O0-LABEL: store_atomic_vec1_half:
+; CHECK-AVX-O0: # %bb.0:
+; CHECK-AVX-O0-NEXT: vpextrw $0, %xmm0, %eax
+; CHECK-AVX-O0-NEXT: # kill: def $ax killed $ax killed $eax
+; CHECK-AVX-O0-NEXT: movw %ax, (%rdi)
+; CHECK-AVX-O0-NEXT: retq
+ store atomic <1 x half> %v, ptr %x release, align 2
+ ret void
+}
+
+define void @store_atomic_vec1_float(ptr %x, <1 x float> %v) {
+; CHECK-SSE-O3-LABEL: store_atomic_vec1_float:
+; CHECK-SSE-O3: # %bb.0:
+; CHECK-SSE-O3-NEXT: movss %xmm0, (%rdi)
+; CHECK-SSE-O3-NEXT: retq
+;
+; CHECK-AVX-O3-LABEL: store_atomic_vec1_float:
+; CHECK-AVX-O3: # %bb.0:
+; CHECK-AVX-O3-NEXT: vmovss %xmm0, (%rdi)
+; CHECK-AVX-O3-NEXT: retq
+;
+; CHECK-SSE-O0-LABEL: store_atomic_vec1_float:
+; CHECK-SSE-O0: # %bb.0:
+; CHECK-SSE-O0-NEXT: movss %xmm0, (%rdi)
+; CHECK-SSE-O0-NEXT: retq
+;
+; CHECK-AVX-O0-LABEL: store_atomic_vec1_float:
+; CHECK-AVX-O0: # %bb.0:
+; CHECK-AVX-O0-NEXT: vmovss %xmm0, (%rdi)
+; CHECK-AVX-O0-NEXT: retq
+ store atomic <1 x float> %v, ptr %x release, align 4
+ ret void
+}
+
+define void @store_atomic_vec1_double_align(ptr %x, <1 x double> %v) nounwind {
+; CHECK-SSE-O3-LABEL: store_atomic_vec1_double_align:
+; CHECK-SSE-O3: # %bb.0:
+; CHECK-SSE-O3-NEXT: movsd %xmm0, (%rdi)
+; CHECK-SSE-O3-NEXT: retq
+;
+; CHECK-AVX-O3-LABEL: store_atomic_vec1_double_align:
+; CHECK-AVX-O3: # %bb.0:
+; CHECK-AVX-O3-NEXT: vmovsd %xmm0, (%rdi)
+; CHECK-AVX-O3-NEXT: retq
+;
+; CHECK-SSE-O0-LABEL: store_atomic_vec1_double_align:
+; CHECK-SSE-O0: # %bb.0:
+; CHECK-SSE-O0-NEXT: movsd %xmm0, (%rdi)
+; CHECK-SSE-O0-NEXT: retq
+;
+; CHECK-AVX-O0-LABEL: store_atomic_vec1_double_align:
+; CHECK-AVX-O0: # %bb.0:
+; CHECK-AVX-O0-NEXT: vmovsd %xmm0, (%rdi)
+; CHECK-AVX-O0-NEXT: retq
+ store atomic <1 x double> %v, ptr %x release, align 8
+ ret void
+}
+
define <2 x i8> @atomic_vec2_i8(ptr %x) {
; CHECK-SSE-O3-LABEL: atomic_vec2_i8:
; CHECK-SSE-O3: # %bb.0:
|
|
|
| ret void | ||
| } | ||
|
|
||
| define void @store_atomic_vec1_bfloat(ptr %x, <1 x bfloat> %v) { |
There was a problem hiding this comment.
The tests look more like they are for vector scalarization. This also isn't removing any of the IR lowering TargetLowering hooks, so I doubt this patch is doing anything as it is
There was a problem hiding this comment.
They are for vector scalarization. It casts the types instead of removing hooks. Please check out how it was done for the loads in #148895.
There was a problem hiding this comment.
This doesn't make sense, you only changed the rules for the scalar types. This also will not be reached since AtomicExpand will have replaced the float atomics unless you do something to shouldExpandAtomicStoreInIR / shouldCastAtomicStoreInIR
There was a problem hiding this comment.
That makes sense. The hooks weren't modified, so we don't need these.
There was a problem hiding this comment.
I think the hooks should be modified (and by modified, I mean fully deleted).
I tried doing this a long time ago, I think load and store shouldn't be too much trouble to move the casting into codegen. The tricky one is atomicrmw, since when expanding into cmpxchg loops, putting the cast inside the loop is bad for some targets including x86
There was a problem hiding this comment.
The hook in x86 isel lowering (i.e. the base class) only handles scalar floating point types.
virtual AtomicExpansionKind shouldCastAtomicStoreInIR(StoreInst *SI) const { if (SI->getValueOperand()->getType()->isFloatingPointTy()) return AtomicExpansionKind::CastToInteger; return AtomicExpansionKind::None; }
Therefore, we do need these. I understand what you are saying ; it will be much more difficult to modify the base class though. This statement was accurate: #197166 (comment)
There was a problem hiding this comment.
OK, this is a convoluted set of circumstances. shouldCastAtomicStoreInIR is buggy, and missed the 1 x vector edge case (it ought to be checking isFPOrFPVectorTy). This patch exclusively fixes the handling of the 1x vector, which is not obvious from the title and description. I'd prefer to fix this by first fixing the buggy cast logic to handle the vector case, and then proceed with the migration to the DAG based promotion
There was a problem hiding this comment.
Ok, sure. We can try specializing in it x86.
There was a problem hiding this comment.
No, not in x86. In the base implementation (but this is still just pre-work to deleting it)
There was a problem hiding this comment.
Right, work prior to the final state is redundant. will leave this as it is then until we get to the final state. thank you.
| ret void | ||
| } | ||
|
|
||
| define void @store_atomic_vec1_bfloat(ptr %x, <1 x bfloat> %v) { |
There was a problem hiding this comment.
| define void @store_atomic_vec1_bfloat(ptr %x, <1 x bfloat> %v) { | |
| define void @store_atomic_vec1_bfloat(ptr %x, <1 x bfloat> %v) nounwind { |
same for others to remove cfi noise
There was a problem hiding this comment.
Should it be there for those that don't require it? i.e. those that do not have CFI noise currently.
There was a problem hiding this comment.
I tend to blanket add it to most tests - but if that will cause large diffs, just add it to cases that need it
Unaligned atomic vector stores with size >1 are lowered to calls. Adding their tests separately here.
3e365f3 to
3e49a56
Compare
686ed50 to
47bca23
Compare
`store atomic <1 x T>` is not valid. This change legalizes vector types of atomic store via scalarization in SelectionDAG so that it can, for example, translate from `v1i32` to `i32`.
When lowering atomic <1 x T> vector types with floats, selection can fail since this pattern is unsupported. To support this, floats can be casted to an integer type of the same size.
3e49a56 to
bbd3398
Compare
47bca23 to
21e67f6
Compare
When lowering
atomic store <1 x T>vector types with floats (i.e. during scalarization in the selection DAG), selection can fail sincethis pattern is unsupported. To support this, floats can be casted to
an integer type of the same size.
Store-side counterpart to #148895. Stacked on top of #197165; and below of #197618.