[X86] Remove shouldCastAtomicLoadInIR; use DAG combine instead by jofrn · Pull Request #199520 · llvm/llvm-project

jofrn · 2026-05-25T11:56:54Z

Remove X86's shouldCastAtomicLoadInIR override that cast FP atomic loads to integer at the IR level. Instead, handle this in a pre-legalize DAG combine (combineAtomicLoad) that rewrites FP/FP-vector atomic loads to integer atomic loads plus a bitcast.

This and #199310, which adds the necessary cmpxchg support for non-integer atomic loads in AtomicExpand, are a respond to #148899 for atomic_vec4_float of atomic-load-store.ll.

Stacked above #201303 and below #200339.

llvmorg-github-actions · 2026-05-25T11:57:31Z

@llvm/pr-subscribers-backend-x86

Author: jofrn

Changes

Remove X86's shouldCastAtomicLoadInIR override that cast FP atomic
loads to integer at the IR level. Instead, handle this in a pre-legalize
DAG combine (combineAtomicLoad) that rewrites FP/FP-vector atomic loads
to integer atomic loads plus a bitcast.

This depends on #199310 which adds the necessary cmpxchg support for
non-integer atomic loads in AtomicExpand.

Full diff: https://github.com/llvm/llvm-project/pull/199520.diff

4 Files Affected:

(modified) llvm/lib/CodeGen/AtomicExpandPass.cpp (+11-1)
(modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+25-7)
(modified) llvm/lib/Target/X86/X86ISelLowering.h (-2)
(modified) llvm/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll (+44-5)

diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index 7327290f62970..b06f3534b53fc 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -673,12 +673,22 @@ bool AtomicExpandImpl::expandAtomicLoadToCmpXchg(LoadInst *LI) {
 
   Value *Addr = LI->getPointerOperand();
   Type *Ty = LI->getType();
-  Constant *DummyVal = Constant::getNullValue(Ty);
+
+  // cmpxchg supports only integer and pointer operands. If the load type is
+  // FP or vector, run the cmpxchg on the same-sized integer and bitcast the
+  // result back; mirrors createCmpXchgInstFun.
+  bool NeedBitcast = Ty->isFloatingPointTy() || Ty->isVectorTy();
+  Type *CmpXchgTy = Ty;
+  if (NeedBitcast)
+    CmpXchgTy = Builder.getIntNTy(Ty->getPrimitiveSizeInBits());
+  Constant *DummyVal = Constant::getNullValue(CmpXchgTy);
 
   Value *Pair = Builder.CreateAtomicCmpXchg(
       Addr, DummyVal, DummyVal, LI->getAlign(), Order,
       AtomicCmpXchgInst::getStrongestFailureOrdering(Order));
   Value *Loaded = Builder.CreateExtractValue(Pair, 0, "loaded");
+  if (NeedBitcast)
+    Loaded = Builder.CreateBitCast(Loaded, Ty);
 
   LI->replaceAllUsesWith(Loaded);
   LI->eraseFromParent();
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index f37d896b9c69e..66cbd9c960346 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -2785,6 +2785,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
                        ISD::FMINNUM,
                        ISD::FMAXNUM,
                        ISD::SUB,
+                       ISD::ATOMIC_LOAD,
                        ISD::LOAD,
                        ISD::LRINT,
                        ISD::LLRINT,
@@ -33030,13 +33031,6 @@ X86TargetLowering::shouldExpandAtomicRMWInIR(const AtomicRMWInst *AI) const {
   }
 }
 
-TargetLowering::AtomicExpansionKind
-X86TargetLowering::shouldCastAtomicLoadInIR(LoadInst *LI) const {
-  if (LI->getType()->getScalarType()->isFloatingPointTy())
-    return AtomicExpansionKind::CastToInteger;
-  return AtomicExpansionKind::None;
-}
-
 LoadInst *
 X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const {
   unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32;
@@ -54075,6 +54069,29 @@ static SDValue combineConstantPoolLoads(SDNode *N, const SDLoc &dl,
   return SDValue();
 }
 
+static SDValue combineAtomicLoad(SDNode *N, SelectionDAG &DAG,
+                                 TargetLowering::DAGCombinerInfo &DCI) {
+  if (!DCI.isBeforeLegalize())
+    return SDValue();
+
+  auto *AN = cast<AtomicSDNode>(N);
+  EVT VT = AN->getValueType(0);
+  if (!VT.getScalarType().isFloatingPoint())
+    return SDValue();
+
+  unsigned BitWidth = VT.getStoreSizeInBits();
+  if (BitWidth == 0 || BitWidth != VT.getSizeInBits())
+    return SDValue();
+
+  SDLoc DL(N);
+  EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), BitWidth);
+  SDValue IntLoad = DAG.getAtomic(
+      ISD::ATOMIC_LOAD, DL, IntVT, DAG.getVTList(IntVT, MVT::Other),
+      {AN->getChain(), AN->getBasePtr()}, AN->getMemOperand());
+  SDValue Cast = DAG.getBitcast(VT, IntLoad);
+  return DAG.getMergeValues({Cast, IntLoad.getValue(1)}, DL);
+}
+
 static SDValue combineLoad(SDNode *N, SelectionDAG &DAG,
                            TargetLowering::DAGCombinerInfo &DCI,
                            const X86Subtarget &Subtarget) {
@@ -62738,6 +62755,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
   case ISD::AVGCEILU:
   case ISD::AVGFLOORS:
   case ISD::AVGFLOORU:      return combineAVG(N, DAG, DCI, Subtarget);
+  case ISD::ATOMIC_LOAD:    return combineAtomicLoad(N, DAG, DCI);
   case ISD::LOAD:           return combineLoad(N, DAG, DCI, Subtarget);
   case ISD::MLOAD:          return combineMaskedLoad(N, DAG, DCI, Subtarget);
   case ISD::STORE:          return combineStore(N, DAG, DCI, Subtarget);
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 9a958525057b6..0d05c5772a707 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -892,8 +892,6 @@ namespace llvm {
     shouldExpandAtomicRMWInIR(const AtomicRMWInst *AI) const override;
     TargetLoweringBase::AtomicExpansionKind
     shouldExpandLogicAtomicRMWInIR(const AtomicRMWInst *AI) const;
-    TargetLoweringBase::AtomicExpansionKind
-    shouldCastAtomicLoadInIR(LoadInst *LI) const override;
     void emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const override;
     void emitCmpArithAtomicRMWIntrinsic(AtomicRMWInst *AI) const override;
 
diff --git a/llvm/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll b/llvm/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll
index 9f973ac5531d1..e3a58931f5e85 100644
--- a/llvm/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll
+++ b/llvm/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll
@@ -1,10 +1,11 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -S %s -passes='require<libcall-lowering-info>,atomic-expand' -mtriple=x86_64-linux-gnu | FileCheck %s --check-prefixes=CHECK,CHECK64
 ; RUN: opt -S %s -passes='require<libcall-lowering-info>,atomic-expand' -mtriple=i686-linux-gnu | FileCheck %s --check-prefixes=CHECK,CHECK32
+; RUN: opt -S %s -passes='require<libcall-lowering-info>,atomic-expand' -mtriple=x86_64-linux-gnu -mattr=+cx16,-avx | FileCheck %s --check-prefixes=CX16
 
-; This file tests the functions `llvm::convertAtomicLoadToIntegerType` and
-; `llvm::convertAtomicStoreToIntegerType`. If X86 stops using this
-; functionality, please move this test to a target which still is.
+; This file tests AtomicExpand's non-integer atomic type conversions and
+; X86-specific expansion choices. If X86 stops using this functionality, please
+; move this test to a target which still is.
 
 define float @float_load_expand(ptr %ptr) {
 ; CHECK-LABEL: define float @float_load_expand(
@@ -307,8 +308,7 @@ define <2 x i16> @atomic_vec2_i16(ptr %x) nounwind {
 define <2 x half> @atomic_vec2_half(ptr %x) nounwind {
 ; CHECK-LABEL: define <2 x half> @atomic_vec2_half(
 ; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = load atomic i32, ptr [[X]] acquire, align 8
-; CHECK-NEXT:    [[RET:%.*]] = bitcast i32 [[TMP1]] to <2 x half>
+; CHECK-NEXT:    [[RET:%.*]] = load atomic <2 x half>, ptr [[X]] acquire, align 8
 ; CHECK-NEXT:    ret <2 x half> [[RET]]
 ;
   %ret = load atomic <2 x half>, ptr %x acquire, align 8
@@ -354,3 +354,42 @@ define <4 x float> @atomic_vec4_float(ptr %x) nounwind {
   %ret = load atomic <4 x float>, ptr %x acquire, align 16
   ret <4 x float> %ret
 }
+
+; Vector atomic loads should expand to a same-sized integer cmpxchg followed by
+; a bitcast back to the original type when X86 chooses the cmpxchg expansion.
+
+define <2 x i64> @load_v2i64_cmpxchg(ptr %p) {
+; CX16-LABEL: define <2 x i64> @load_v2i64_cmpxchg(
+; CX16-SAME: ptr [[P:%.*]]) #[[ATTR0:[0-9]+]] {
+; CX16-NEXT:    [[TMP1:%.*]] = cmpxchg ptr [[P]], i128 0, i128 0 seq_cst seq_cst, align 16
+; CX16-NEXT:    [[LOADED:%.*]] = extractvalue { i128, i1 } [[TMP1]], 0
+; CX16-NEXT:    [[TMP2:%.*]] = bitcast i128 [[LOADED]] to <2 x i64>
+; CX16-NEXT:    ret <2 x i64> [[TMP2]]
+;
+  %r = load atomic <2 x i64>, ptr %p seq_cst, align 16
+  ret <2 x i64> %r
+}
+
+define <4 x i32> @load_v4i32_cmpxchg(ptr %p) {
+; CX16-LABEL: define <4 x i32> @load_v4i32_cmpxchg(
+; CX16-SAME: ptr [[P:%.*]]) #[[ATTR0]] {
+; CX16-NEXT:    [[TMP1:%.*]] = cmpxchg ptr [[P]], i128 0, i128 0 seq_cst seq_cst, align 16
+; CX16-NEXT:    [[LOADED:%.*]] = extractvalue { i128, i1 } [[TMP1]], 0
+; CX16-NEXT:    [[TMP2:%.*]] = bitcast i128 [[LOADED]] to <4 x i32>
+; CX16-NEXT:    ret <4 x i32> [[TMP2]]
+;
+  %r = load atomic <4 x i32>, ptr %p seq_cst, align 16
+  ret <4 x i32> %r
+}
+
+define <16 x i8> @load_v16i8_cmpxchg(ptr %p) {
+; CX16-LABEL: define <16 x i8> @load_v16i8_cmpxchg(
+; CX16-SAME: ptr [[P:%.*]]) #[[ATTR0]] {
+; CX16-NEXT:    [[TMP1:%.*]] = cmpxchg ptr [[P]], i128 0, i128 0 seq_cst seq_cst, align 16
+; CX16-NEXT:    [[LOADED:%.*]] = extractvalue { i128, i1 } [[TMP1]], 0
+; CX16-NEXT:    [[TMP2:%.*]] = bitcast i128 [[LOADED]] to <16 x i8>
+; CX16-NEXT:    ret <16 x i8> [[TMP2]]
+;
+  %r = load atomic <16 x i8>, ptr %p seq_cst, align 16
+  ret <16 x i8> %r
+}

llvmorg-github-actions · 2026-05-25T11:57:31Z

@llvm/pr-subscribers-llvm-transforms

Author: jofrn

Changes

Remove X86's shouldCastAtomicLoadInIR override that cast FP atomic
loads to integer at the IR level. Instead, handle this in a pre-legalize
DAG combine (combineAtomicLoad) that rewrites FP/FP-vector atomic loads
to integer atomic loads plus a bitcast.

This depends on #199310 which adds the necessary cmpxchg support for
non-integer atomic loads in AtomicExpand.

Full diff: https://github.com/llvm/llvm-project/pull/199520.diff

4 Files Affected:

(modified) llvm/lib/CodeGen/AtomicExpandPass.cpp (+11-1)
(modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+25-7)
(modified) llvm/lib/Target/X86/X86ISelLowering.h (-2)
(modified) llvm/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll (+44-5)

diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index 7327290f62970..b06f3534b53fc 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -673,12 +673,22 @@ bool AtomicExpandImpl::expandAtomicLoadToCmpXchg(LoadInst *LI) {
 
   Value *Addr = LI->getPointerOperand();
   Type *Ty = LI->getType();
-  Constant *DummyVal = Constant::getNullValue(Ty);
+
+  // cmpxchg supports only integer and pointer operands. If the load type is
+  // FP or vector, run the cmpxchg on the same-sized integer and bitcast the
+  // result back; mirrors createCmpXchgInstFun.
+  bool NeedBitcast = Ty->isFloatingPointTy() || Ty->isVectorTy();
+  Type *CmpXchgTy = Ty;
+  if (NeedBitcast)
+    CmpXchgTy = Builder.getIntNTy(Ty->getPrimitiveSizeInBits());
+  Constant *DummyVal = Constant::getNullValue(CmpXchgTy);
 
   Value *Pair = Builder.CreateAtomicCmpXchg(
       Addr, DummyVal, DummyVal, LI->getAlign(), Order,
       AtomicCmpXchgInst::getStrongestFailureOrdering(Order));
   Value *Loaded = Builder.CreateExtractValue(Pair, 0, "loaded");
+  if (NeedBitcast)
+    Loaded = Builder.CreateBitCast(Loaded, Ty);
 
   LI->replaceAllUsesWith(Loaded);
   LI->eraseFromParent();
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index f37d896b9c69e..66cbd9c960346 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -2785,6 +2785,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
                        ISD::FMINNUM,
                        ISD::FMAXNUM,
                        ISD::SUB,
+                       ISD::ATOMIC_LOAD,
                        ISD::LOAD,
                        ISD::LRINT,
                        ISD::LLRINT,
@@ -33030,13 +33031,6 @@ X86TargetLowering::shouldExpandAtomicRMWInIR(const AtomicRMWInst *AI) const {
   }
 }
 
-TargetLowering::AtomicExpansionKind
-X86TargetLowering::shouldCastAtomicLoadInIR(LoadInst *LI) const {
-  if (LI->getType()->getScalarType()->isFloatingPointTy())
-    return AtomicExpansionKind::CastToInteger;
-  return AtomicExpansionKind::None;
-}
-
 LoadInst *
 X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const {
   unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32;
@@ -54075,6 +54069,29 @@ static SDValue combineConstantPoolLoads(SDNode *N, const SDLoc &dl,
   return SDValue();
 }
 
+static SDValue combineAtomicLoad(SDNode *N, SelectionDAG &DAG,
+                                 TargetLowering::DAGCombinerInfo &DCI) {
+  if (!DCI.isBeforeLegalize())
+    return SDValue();
+
+  auto *AN = cast<AtomicSDNode>(N);
+  EVT VT = AN->getValueType(0);
+  if (!VT.getScalarType().isFloatingPoint())
+    return SDValue();
+
+  unsigned BitWidth = VT.getStoreSizeInBits();
+  if (BitWidth == 0 || BitWidth != VT.getSizeInBits())
+    return SDValue();
+
+  SDLoc DL(N);
+  EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), BitWidth);
+  SDValue IntLoad = DAG.getAtomic(
+      ISD::ATOMIC_LOAD, DL, IntVT, DAG.getVTList(IntVT, MVT::Other),
+      {AN->getChain(), AN->getBasePtr()}, AN->getMemOperand());
+  SDValue Cast = DAG.getBitcast(VT, IntLoad);
+  return DAG.getMergeValues({Cast, IntLoad.getValue(1)}, DL);
+}
+
 static SDValue combineLoad(SDNode *N, SelectionDAG &DAG,
                            TargetLowering::DAGCombinerInfo &DCI,
                            const X86Subtarget &Subtarget) {
@@ -62738,6 +62755,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
   case ISD::AVGCEILU:
   case ISD::AVGFLOORS:
   case ISD::AVGFLOORU:      return combineAVG(N, DAG, DCI, Subtarget);
+  case ISD::ATOMIC_LOAD:    return combineAtomicLoad(N, DAG, DCI);
   case ISD::LOAD:           return combineLoad(N, DAG, DCI, Subtarget);
   case ISD::MLOAD:          return combineMaskedLoad(N, DAG, DCI, Subtarget);
   case ISD::STORE:          return combineStore(N, DAG, DCI, Subtarget);
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 9a958525057b6..0d05c5772a707 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -892,8 +892,6 @@ namespace llvm {
     shouldExpandAtomicRMWInIR(const AtomicRMWInst *AI) const override;
     TargetLoweringBase::AtomicExpansionKind
     shouldExpandLogicAtomicRMWInIR(const AtomicRMWInst *AI) const;
-    TargetLoweringBase::AtomicExpansionKind
-    shouldCastAtomicLoadInIR(LoadInst *LI) const override;
     void emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const override;
     void emitCmpArithAtomicRMWIntrinsic(AtomicRMWInst *AI) const override;
 
diff --git a/llvm/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll b/llvm/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll
index 9f973ac5531d1..e3a58931f5e85 100644
--- a/llvm/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll
+++ b/llvm/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll
@@ -1,10 +1,11 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -S %s -passes='require<libcall-lowering-info>,atomic-expand' -mtriple=x86_64-linux-gnu | FileCheck %s --check-prefixes=CHECK,CHECK64
 ; RUN: opt -S %s -passes='require<libcall-lowering-info>,atomic-expand' -mtriple=i686-linux-gnu | FileCheck %s --check-prefixes=CHECK,CHECK32
+; RUN: opt -S %s -passes='require<libcall-lowering-info>,atomic-expand' -mtriple=x86_64-linux-gnu -mattr=+cx16,-avx | FileCheck %s --check-prefixes=CX16
 
-; This file tests the functions `llvm::convertAtomicLoadToIntegerType` and
-; `llvm::convertAtomicStoreToIntegerType`. If X86 stops using this
-; functionality, please move this test to a target which still is.
+; This file tests AtomicExpand's non-integer atomic type conversions and
+; X86-specific expansion choices. If X86 stops using this functionality, please
+; move this test to a target which still is.
 
 define float @float_load_expand(ptr %ptr) {
 ; CHECK-LABEL: define float @float_load_expand(
@@ -307,8 +308,7 @@ define <2 x i16> @atomic_vec2_i16(ptr %x) nounwind {
 define <2 x half> @atomic_vec2_half(ptr %x) nounwind {
 ; CHECK-LABEL: define <2 x half> @atomic_vec2_half(
 ; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = load atomic i32, ptr [[X]] acquire, align 8
-; CHECK-NEXT:    [[RET:%.*]] = bitcast i32 [[TMP1]] to <2 x half>
+; CHECK-NEXT:    [[RET:%.*]] = load atomic <2 x half>, ptr [[X]] acquire, align 8
 ; CHECK-NEXT:    ret <2 x half> [[RET]]
 ;
   %ret = load atomic <2 x half>, ptr %x acquire, align 8
@@ -354,3 +354,42 @@ define <4 x float> @atomic_vec4_float(ptr %x) nounwind {
   %ret = load atomic <4 x float>, ptr %x acquire, align 16
   ret <4 x float> %ret
 }
+
+; Vector atomic loads should expand to a same-sized integer cmpxchg followed by
+; a bitcast back to the original type when X86 chooses the cmpxchg expansion.
+
+define <2 x i64> @load_v2i64_cmpxchg(ptr %p) {
+; CX16-LABEL: define <2 x i64> @load_v2i64_cmpxchg(
+; CX16-SAME: ptr [[P:%.*]]) #[[ATTR0:[0-9]+]] {
+; CX16-NEXT:    [[TMP1:%.*]] = cmpxchg ptr [[P]], i128 0, i128 0 seq_cst seq_cst, align 16
+; CX16-NEXT:    [[LOADED:%.*]] = extractvalue { i128, i1 } [[TMP1]], 0
+; CX16-NEXT:    [[TMP2:%.*]] = bitcast i128 [[LOADED]] to <2 x i64>
+; CX16-NEXT:    ret <2 x i64> [[TMP2]]
+;
+  %r = load atomic <2 x i64>, ptr %p seq_cst, align 16
+  ret <2 x i64> %r
+}
+
+define <4 x i32> @load_v4i32_cmpxchg(ptr %p) {
+; CX16-LABEL: define <4 x i32> @load_v4i32_cmpxchg(
+; CX16-SAME: ptr [[P:%.*]]) #[[ATTR0]] {
+; CX16-NEXT:    [[TMP1:%.*]] = cmpxchg ptr [[P]], i128 0, i128 0 seq_cst seq_cst, align 16
+; CX16-NEXT:    [[LOADED:%.*]] = extractvalue { i128, i1 } [[TMP1]], 0
+; CX16-NEXT:    [[TMP2:%.*]] = bitcast i128 [[LOADED]] to <4 x i32>
+; CX16-NEXT:    ret <4 x i32> [[TMP2]]
+;
+  %r = load atomic <4 x i32>, ptr %p seq_cst, align 16
+  ret <4 x i32> %r
+}
+
+define <16 x i8> @load_v16i8_cmpxchg(ptr %p) {
+; CX16-LABEL: define <16 x i8> @load_v16i8_cmpxchg(
+; CX16-SAME: ptr [[P:%.*]]) #[[ATTR0]] {
+; CX16-NEXT:    [[TMP1:%.*]] = cmpxchg ptr [[P]], i128 0, i128 0 seq_cst seq_cst, align 16
+; CX16-NEXT:    [[LOADED:%.*]] = extractvalue { i128, i1 } [[TMP1]], 0
+; CX16-NEXT:    [[TMP2:%.*]] = bitcast i128 [[LOADED]] to <16 x i8>
+; CX16-NEXT:    ret <16 x i8> [[TMP2]]
+;
+  %r = load atomic <16 x i8>, ptr %p seq_cst, align 16
+  ret <16 x i8> %r
+}

jofrn · 2026-05-25T12:00:22Z

In particular, atomic_vec4_half of test/CodeGen/X86/atomic-load-store.ll is the test that requires the cmpxchg modifications

github-actions · 2026-06-03T10:17:58Z

🐧 Linux x64 Test Results

196150 tests passed
5309 tests skipped

✅ The build succeeded and all tests passed.

github-actions · 2026-06-03T10:17:58Z

🪟 Windows x64 Test Results

135361 tests passed
3360 tests skipped

✅ The build succeeded and all tests passed.

RKSimon

CI failures

RKSimon · 2026-06-05T14:07:31Z

+    return SDValue();
+
+  unsigned BitWidth = VT.getStoreSizeInBits();
+  if (BitWidth == 0 || BitWidth != VT.getSizeInBits())


BitWidth == 0?

You're correct. The check for FP types implies we can't have zero BitWidth. Thank you.

Remove X86's shouldCastAtomicLoadInIR override that cast FP atomic loads to integer at the IR level. Instead, handle this in a pre-legalize DAG combine (combineAtomicLoad) that rewrites FP/FP-vector atomic loads to integer atomic loads plus a bitcast. This depends on #199310 which adds the necessary cmpxchg support for non-integer atomic loads in AtomicExpand.

RKSimon

LGTM

llvmorg-github-actions Bot added backend:X86 llvm:codegen llvm:transforms labels May 25, 2026

jofrn requested a review from RKSimon May 28, 2026 11:16

RKSimon requested changes May 28, 2026

View reviewed changes

Comment thread llvm/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll

jofrn force-pushed the users/jofrn/x86-remove-shouldcastatomicloadinir-v2 branch from df4f0d0 to 99e797b Compare May 29, 2026 07:27

This was referenced May 29, 2026

[AtomicExpand][test] Regen CX16 CHECKs for atomic_vec2_half after cast removal #200339

Closed

[X86][AtomicExpand] Remove X86's shouldCastAtomicLoadInIR override #198651

Closed

[X86] Cast atomic vectors in IR to support floats #148899

Merged

jofrn force-pushed the users/jofrn/x86-remove-shouldcastatomicloadinir-v2 branch from 16f1d37 to a8fa91a Compare May 29, 2026 13:18

jofrn requested a review from RKSimon June 1, 2026 22:15

jofrn force-pushed the users/jofrn/x86-remove-shouldcastatomicloadinir-v2 branch from a8fa91a to 979691e Compare June 3, 2026 09:50

jofrn changed the base branch from main to users/jofrn/atomicexpand-vector-load-checks-base June 3, 2026 09:55

jofrn mentioned this pull request Jun 3, 2026

[AtomicExpand][test] Add CHECK32 and CHECK64 via whole-file regen #201303

Open

RKSimon requested changes Jun 3, 2026

View reviewed changes

jofrn force-pushed the users/jofrn/x86-remove-shouldcastatomicloadinir-v2 branch from 979691e to d4d1fd4 Compare June 5, 2026 02:26

jofrn requested a review from RKSimon June 5, 2026 07:58

RKSimon reviewed Jun 5, 2026

View reviewed changes

jofrn force-pushed the users/jofrn/x86-remove-shouldcastatomicloadinir-v2 branch from d4d1fd4 to 12e5a0f Compare June 6, 2026 08:07

RKSimon self-requested a review June 6, 2026 12:14

RKSimon approved these changes Jun 8, 2026

View reviewed changes

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[X86] Remove shouldCastAtomicLoadInIR; use DAG combine instead#199520

[X86] Remove shouldCastAtomicLoadInIR; use DAG combine instead#199520
jofrn wants to merge 1 commit into
users/jofrn/atomicexpand-vector-load-checks-basefrom
users/jofrn/x86-remove-shouldcastatomicloadinir-v2

jofrn commented May 25, 2026 •

edited

Loading

Uh oh!

llvmorg-github-actions Bot commented May 25, 2026

Uh oh!

llvmorg-github-actions Bot commented May 25, 2026

Uh oh!

jofrn commented May 25, 2026

Uh oh!

Uh oh!

github-actions Bot commented Jun 3, 2026 •

edited

Loading

Uh oh!

github-actions Bot commented Jun 3, 2026 •

edited

Loading

Uh oh!

RKSimon left a comment

Uh oh!

RKSimon Jun 5, 2026

Uh oh!

jofrn Jun 6, 2026

Uh oh!

RKSimon left a comment

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

2 participants

Conversation

jofrn commented May 25, 2026 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmorg-github-actions Bot commented May 25, 2026

Uh oh!

llvmorg-github-actions Bot commented May 25, 2026

Uh oh!

jofrn commented May 25, 2026

Uh oh!

Uh oh!

github-actions Bot commented Jun 3, 2026 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

🐧 Linux x64 Test Results

Uh oh!

github-actions Bot commented Jun 3, 2026 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

🪟 Windows x64 Test Results

Uh oh!

RKSimon left a comment

Choose a reason for hiding this comment

Uh oh!

RKSimon Jun 5, 2026

Choose a reason for hiding this comment

Uh oh!

jofrn Jun 6, 2026

Choose a reason for hiding this comment

Uh oh!

RKSimon left a comment

Choose a reason for hiding this comment

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

2 participants

jofrn commented May 25, 2026 •

edited

Loading

github-actions Bot commented Jun 3, 2026 •

edited

Loading

github-actions Bot commented Jun 3, 2026 •

edited

Loading