[X86][AtomicExpand] Remove X86's shouldCastAtomicLoadInIR override#198651
Closed
jofrn wants to merge 1 commit into
Closed
[X86][AtomicExpand] Remove X86's shouldCastAtomicLoadInIR override#198651jofrn wants to merge 1 commit into
jofrn wants to merge 1 commit into
Conversation
|
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-backend-x86 Author: jofrn ChangesSo that atomic floating-point and FP-vector loads are no longer bitcast to an integer at the IR level by AtomicExpand. 3 Files Affected:
diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index 7327290f62970..fc083a54132a5 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -673,13 +673,26 @@ bool AtomicExpandImpl::expandAtomicLoadToCmpXchg(LoadInst *LI) {
Value *Addr = LI->getPointerOperand();
Type *Ty = LI->getType();
- Constant *DummyVal = Constant::getNullValue(Ty);
+
+ // cmpxchg only supports integer and pointer operand types. Bitcast scalar
+ // floating-point types and vectors of floating-point to an integer of the
+ // same bit width so the expansion is valid IR; bitcast the result back
+ // afterwards.
+ Type *CmpXchgTy = Ty;
+ bool NeedBitcast = Ty->getScalarType()->isFloatingPointTy();
+ if (NeedBitcast)
+ CmpXchgTy = Builder.getIntNTy(Ty->getPrimitiveSizeInBits());
+
+ Constant *DummyVal = Constant::getNullValue(CmpXchgTy);
Value *Pair = Builder.CreateAtomicCmpXchg(
Addr, DummyVal, DummyVal, LI->getAlign(), Order,
AtomicCmpXchgInst::getStrongestFailureOrdering(Order));
Value *Loaded = Builder.CreateExtractValue(Pair, 0, "loaded");
+ if (NeedBitcast)
+ Loaded = Builder.CreateBitCast(Loaded, Ty);
+
LI->replaceAllUsesWith(Loaded);
LI->eraseFromParent();
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index bc28df877ba9c..4ab509d0e4ff3 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -2781,6 +2781,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
ISD::FMINNUM,
ISD::FMAXNUM,
ISD::SUB,
+ ISD::ATOMIC_LOAD,
ISD::LOAD,
ISD::LRINT,
ISD::LLRINT,
@@ -33013,13 +33014,6 @@ X86TargetLowering::shouldExpandAtomicRMWInIR(const AtomicRMWInst *AI) const {
}
}
-TargetLowering::AtomicExpansionKind
-X86TargetLowering::shouldCastAtomicLoadInIR(LoadInst *LI) const {
- if (LI->getType()->getScalarType()->isFloatingPointTy())
- return AtomicExpansionKind::CastToInteger;
- return AtomicExpansionKind::None;
-}
-
LoadInst *
X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const {
unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32;
@@ -54095,6 +54089,36 @@ static SDValue combineConstantPoolLoads(SDNode *N, const SDLoc &dl,
return SDValue();
}
+// For atomic loads of floating-point or floating-point vector results, rewrite
+// to an integer atomic load of the same width plus a bitcast. This is the DAG
+// equivalent of the shouldCastAtomicLoadInIR -> CastToInteger transform; it
+// lets X86's existing integer atomic load lowering (and DAG combines such as
+// load-into-xmm folding) handle FP types we have no direct selection for.
+static SDValue combineAtomicLoad(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ if (!DCI.isBeforeLegalize())
+ return SDValue();
+
+ auto *AN = cast<AtomicSDNode>(N);
+ EVT VT = AN->getValueType(0);
+ if (!VT.getScalarType().isFloatingPoint())
+ return SDValue();
+
+ unsigned BitWidth = VT.getStoreSizeInBits();
+ if (BitWidth == 0 || BitWidth != VT.getSizeInBits())
+ return SDValue();
+
+ SDLoc DL(N);
+ EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), BitWidth);
+ SDValue IntLoad =
+ DAG.getAtomic(ISD::ATOMIC_LOAD, DL, IntVT,
+ DAG.getVTList(IntVT, MVT::Other),
+ {AN->getChain(), AN->getBasePtr()}, AN->getMemOperand());
+ SDValue Cast = DAG.getBitcast(VT, IntLoad);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), IntLoad.getValue(1));
+ return Cast;
+}
+
static SDValue combineLoad(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
@@ -62699,6 +62723,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::AVGCEILU:
case ISD::AVGFLOORS:
case ISD::AVGFLOORU: return combineAVG(N, DAG, DCI, Subtarget);
+ case ISD::ATOMIC_LOAD: return combineAtomicLoad(N, DAG, DCI);
case ISD::LOAD: return combineLoad(N, DAG, DCI, Subtarget);
case ISD::MLOAD: return combineMaskedLoad(N, DAG, DCI, Subtarget);
case ISD::STORE: return combineStore(N, DAG, DCI, Subtarget);
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 9a958525057b6..0d05c5772a707 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -892,8 +892,6 @@ namespace llvm {
shouldExpandAtomicRMWInIR(const AtomicRMWInst *AI) const override;
TargetLoweringBase::AtomicExpansionKind
shouldExpandLogicAtomicRMWInIR(const AtomicRMWInst *AI) const;
- TargetLoweringBase::AtomicExpansionKind
- shouldCastAtomicLoadInIR(LoadInst *LI) const override;
void emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const override;
void emitCmpArithAtomicRMWIntrinsic(AtomicRMWInst *AI) const override;
|
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
🐧 Linux x64 Test Results
All executed tests passed, but another part of the build failed. Click on a failure below to see the details. bin/tco (Likely Already Failing)This test is already failing at the base commit.bin/fir-opt (Likely Already Failing)This test is already failing at the base commit.If these failures are unrelated to your changes (for example tests are broken or flaky at HEAD), please open an issue at https://github.com/llvm/llvm-project/issues and add the |
🪟 Windows x64 Test Results
✅ The build succeeded and all tests passed. |
…dded in #148899) So that atomic floating-point and FP-vector loads are no longer bitcast to an integer at the IR level by AtomicExpand.
c398ae2 to
ed7f89c
Compare
Contributor
Author
|
Continued in #199520. |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.This suggestion is invalid because no changes were made to the code.Suggestions cannot be applied while the pull request is closed.Suggestions cannot be applied while viewing a subset of changes.Only one suggestion per line can be applied in a batch.Add this suggestion to a batch that can be applied as a single commit.Applying suggestions on deleted lines is not supported.You must change the existing code in this line in order to create a valid suggestion.Outdated suggestions cannot be applied.This suggestion has been applied or marked resolved.Suggestions cannot be applied from pending reviews.Suggestions cannot be applied on multi-line comments.Suggestions cannot be applied while the pull request is queued to merge.Suggestion cannot be applied right now. Please check back later.
(added in #148899)
So that atomic floating-point and FP-vector loads are no longer bitcast to an integer at the IR level by AtomicExpand.