[X86] Remove shouldCastAtomicLoadInIR; use DAG combine instead#199520
Conversation
|
@llvm/pr-subscribers-backend-x86 Author: jofrn ChangesRemove X86's shouldCastAtomicLoadInIR override that cast FP atomic This depends on #199310 which adds the necessary cmpxchg support for 4 Files Affected:
diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index 7327290f62970..b06f3534b53fc 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -673,12 +673,22 @@ bool AtomicExpandImpl::expandAtomicLoadToCmpXchg(LoadInst *LI) {
Value *Addr = LI->getPointerOperand();
Type *Ty = LI->getType();
- Constant *DummyVal = Constant::getNullValue(Ty);
+
+ // cmpxchg supports only integer and pointer operands. If the load type is
+ // FP or vector, run the cmpxchg on the same-sized integer and bitcast the
+ // result back; mirrors createCmpXchgInstFun.
+ bool NeedBitcast = Ty->isFloatingPointTy() || Ty->isVectorTy();
+ Type *CmpXchgTy = Ty;
+ if (NeedBitcast)
+ CmpXchgTy = Builder.getIntNTy(Ty->getPrimitiveSizeInBits());
+ Constant *DummyVal = Constant::getNullValue(CmpXchgTy);
Value *Pair = Builder.CreateAtomicCmpXchg(
Addr, DummyVal, DummyVal, LI->getAlign(), Order,
AtomicCmpXchgInst::getStrongestFailureOrdering(Order));
Value *Loaded = Builder.CreateExtractValue(Pair, 0, "loaded");
+ if (NeedBitcast)
+ Loaded = Builder.CreateBitCast(Loaded, Ty);
LI->replaceAllUsesWith(Loaded);
LI->eraseFromParent();
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index f37d896b9c69e..66cbd9c960346 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -2785,6 +2785,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
ISD::FMINNUM,
ISD::FMAXNUM,
ISD::SUB,
+ ISD::ATOMIC_LOAD,
ISD::LOAD,
ISD::LRINT,
ISD::LLRINT,
@@ -33030,13 +33031,6 @@ X86TargetLowering::shouldExpandAtomicRMWInIR(const AtomicRMWInst *AI) const {
}
}
-TargetLowering::AtomicExpansionKind
-X86TargetLowering::shouldCastAtomicLoadInIR(LoadInst *LI) const {
- if (LI->getType()->getScalarType()->isFloatingPointTy())
- return AtomicExpansionKind::CastToInteger;
- return AtomicExpansionKind::None;
-}
-
LoadInst *
X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const {
unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32;
@@ -54075,6 +54069,29 @@ static SDValue combineConstantPoolLoads(SDNode *N, const SDLoc &dl,
return SDValue();
}
+static SDValue combineAtomicLoad(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ if (!DCI.isBeforeLegalize())
+ return SDValue();
+
+ auto *AN = cast<AtomicSDNode>(N);
+ EVT VT = AN->getValueType(0);
+ if (!VT.getScalarType().isFloatingPoint())
+ return SDValue();
+
+ unsigned BitWidth = VT.getStoreSizeInBits();
+ if (BitWidth == 0 || BitWidth != VT.getSizeInBits())
+ return SDValue();
+
+ SDLoc DL(N);
+ EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), BitWidth);
+ SDValue IntLoad = DAG.getAtomic(
+ ISD::ATOMIC_LOAD, DL, IntVT, DAG.getVTList(IntVT, MVT::Other),
+ {AN->getChain(), AN->getBasePtr()}, AN->getMemOperand());
+ SDValue Cast = DAG.getBitcast(VT, IntLoad);
+ return DAG.getMergeValues({Cast, IntLoad.getValue(1)}, DL);
+}
+
static SDValue combineLoad(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
@@ -62738,6 +62755,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::AVGCEILU:
case ISD::AVGFLOORS:
case ISD::AVGFLOORU: return combineAVG(N, DAG, DCI, Subtarget);
+ case ISD::ATOMIC_LOAD: return combineAtomicLoad(N, DAG, DCI);
case ISD::LOAD: return combineLoad(N, DAG, DCI, Subtarget);
case ISD::MLOAD: return combineMaskedLoad(N, DAG, DCI, Subtarget);
case ISD::STORE: return combineStore(N, DAG, DCI, Subtarget);
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 9a958525057b6..0d05c5772a707 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -892,8 +892,6 @@ namespace llvm {
shouldExpandAtomicRMWInIR(const AtomicRMWInst *AI) const override;
TargetLoweringBase::AtomicExpansionKind
shouldExpandLogicAtomicRMWInIR(const AtomicRMWInst *AI) const;
- TargetLoweringBase::AtomicExpansionKind
- shouldCastAtomicLoadInIR(LoadInst *LI) const override;
void emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const override;
void emitCmpArithAtomicRMWIntrinsic(AtomicRMWInst *AI) const override;
diff --git a/llvm/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll b/llvm/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll
index 9f973ac5531d1..e3a58931f5e85 100644
--- a/llvm/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll
+++ b/llvm/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll
@@ -1,10 +1,11 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
; RUN: opt -S %s -passes='require<libcall-lowering-info>,atomic-expand' -mtriple=x86_64-linux-gnu | FileCheck %s --check-prefixes=CHECK,CHECK64
; RUN: opt -S %s -passes='require<libcall-lowering-info>,atomic-expand' -mtriple=i686-linux-gnu | FileCheck %s --check-prefixes=CHECK,CHECK32
+; RUN: opt -S %s -passes='require<libcall-lowering-info>,atomic-expand' -mtriple=x86_64-linux-gnu -mattr=+cx16,-avx | FileCheck %s --check-prefixes=CX16
-; This file tests the functions `llvm::convertAtomicLoadToIntegerType` and
-; `llvm::convertAtomicStoreToIntegerType`. If X86 stops using this
-; functionality, please move this test to a target which still is.
+; This file tests AtomicExpand's non-integer atomic type conversions and
+; X86-specific expansion choices. If X86 stops using this functionality, please
+; move this test to a target which still is.
define float @float_load_expand(ptr %ptr) {
; CHECK-LABEL: define float @float_load_expand(
@@ -307,8 +308,7 @@ define <2 x i16> @atomic_vec2_i16(ptr %x) nounwind {
define <2 x half> @atomic_vec2_half(ptr %x) nounwind {
; CHECK-LABEL: define <2 x half> @atomic_vec2_half(
; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load atomic i32, ptr [[X]] acquire, align 8
-; CHECK-NEXT: [[RET:%.*]] = bitcast i32 [[TMP1]] to <2 x half>
+; CHECK-NEXT: [[RET:%.*]] = load atomic <2 x half>, ptr [[X]] acquire, align 8
; CHECK-NEXT: ret <2 x half> [[RET]]
;
%ret = load atomic <2 x half>, ptr %x acquire, align 8
@@ -354,3 +354,42 @@ define <4 x float> @atomic_vec4_float(ptr %x) nounwind {
%ret = load atomic <4 x float>, ptr %x acquire, align 16
ret <4 x float> %ret
}
+
+; Vector atomic loads should expand to a same-sized integer cmpxchg followed by
+; a bitcast back to the original type when X86 chooses the cmpxchg expansion.
+
+define <2 x i64> @load_v2i64_cmpxchg(ptr %p) {
+; CX16-LABEL: define <2 x i64> @load_v2i64_cmpxchg(
+; CX16-SAME: ptr [[P:%.*]]) #[[ATTR0:[0-9]+]] {
+; CX16-NEXT: [[TMP1:%.*]] = cmpxchg ptr [[P]], i128 0, i128 0 seq_cst seq_cst, align 16
+; CX16-NEXT: [[LOADED:%.*]] = extractvalue { i128, i1 } [[TMP1]], 0
+; CX16-NEXT: [[TMP2:%.*]] = bitcast i128 [[LOADED]] to <2 x i64>
+; CX16-NEXT: ret <2 x i64> [[TMP2]]
+;
+ %r = load atomic <2 x i64>, ptr %p seq_cst, align 16
+ ret <2 x i64> %r
+}
+
+define <4 x i32> @load_v4i32_cmpxchg(ptr %p) {
+; CX16-LABEL: define <4 x i32> @load_v4i32_cmpxchg(
+; CX16-SAME: ptr [[P:%.*]]) #[[ATTR0]] {
+; CX16-NEXT: [[TMP1:%.*]] = cmpxchg ptr [[P]], i128 0, i128 0 seq_cst seq_cst, align 16
+; CX16-NEXT: [[LOADED:%.*]] = extractvalue { i128, i1 } [[TMP1]], 0
+; CX16-NEXT: [[TMP2:%.*]] = bitcast i128 [[LOADED]] to <4 x i32>
+; CX16-NEXT: ret <4 x i32> [[TMP2]]
+;
+ %r = load atomic <4 x i32>, ptr %p seq_cst, align 16
+ ret <4 x i32> %r
+}
+
+define <16 x i8> @load_v16i8_cmpxchg(ptr %p) {
+; CX16-LABEL: define <16 x i8> @load_v16i8_cmpxchg(
+; CX16-SAME: ptr [[P:%.*]]) #[[ATTR0]] {
+; CX16-NEXT: [[TMP1:%.*]] = cmpxchg ptr [[P]], i128 0, i128 0 seq_cst seq_cst, align 16
+; CX16-NEXT: [[LOADED:%.*]] = extractvalue { i128, i1 } [[TMP1]], 0
+; CX16-NEXT: [[TMP2:%.*]] = bitcast i128 [[LOADED]] to <16 x i8>
+; CX16-NEXT: ret <16 x i8> [[TMP2]]
+;
+ %r = load atomic <16 x i8>, ptr %p seq_cst, align 16
+ ret <16 x i8> %r
+}
|
|
@llvm/pr-subscribers-llvm-transforms Author: jofrn ChangesRemove X86's shouldCastAtomicLoadInIR override that cast FP atomic This depends on #199310 which adds the necessary cmpxchg support for 4 Files Affected:
diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index 7327290f62970..b06f3534b53fc 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -673,12 +673,22 @@ bool AtomicExpandImpl::expandAtomicLoadToCmpXchg(LoadInst *LI) {
Value *Addr = LI->getPointerOperand();
Type *Ty = LI->getType();
- Constant *DummyVal = Constant::getNullValue(Ty);
+
+ // cmpxchg supports only integer and pointer operands. If the load type is
+ // FP or vector, run the cmpxchg on the same-sized integer and bitcast the
+ // result back; mirrors createCmpXchgInstFun.
+ bool NeedBitcast = Ty->isFloatingPointTy() || Ty->isVectorTy();
+ Type *CmpXchgTy = Ty;
+ if (NeedBitcast)
+ CmpXchgTy = Builder.getIntNTy(Ty->getPrimitiveSizeInBits());
+ Constant *DummyVal = Constant::getNullValue(CmpXchgTy);
Value *Pair = Builder.CreateAtomicCmpXchg(
Addr, DummyVal, DummyVal, LI->getAlign(), Order,
AtomicCmpXchgInst::getStrongestFailureOrdering(Order));
Value *Loaded = Builder.CreateExtractValue(Pair, 0, "loaded");
+ if (NeedBitcast)
+ Loaded = Builder.CreateBitCast(Loaded, Ty);
LI->replaceAllUsesWith(Loaded);
LI->eraseFromParent();
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index f37d896b9c69e..66cbd9c960346 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -2785,6 +2785,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
ISD::FMINNUM,
ISD::FMAXNUM,
ISD::SUB,
+ ISD::ATOMIC_LOAD,
ISD::LOAD,
ISD::LRINT,
ISD::LLRINT,
@@ -33030,13 +33031,6 @@ X86TargetLowering::shouldExpandAtomicRMWInIR(const AtomicRMWInst *AI) const {
}
}
-TargetLowering::AtomicExpansionKind
-X86TargetLowering::shouldCastAtomicLoadInIR(LoadInst *LI) const {
- if (LI->getType()->getScalarType()->isFloatingPointTy())
- return AtomicExpansionKind::CastToInteger;
- return AtomicExpansionKind::None;
-}
-
LoadInst *
X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const {
unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32;
@@ -54075,6 +54069,29 @@ static SDValue combineConstantPoolLoads(SDNode *N, const SDLoc &dl,
return SDValue();
}
+static SDValue combineAtomicLoad(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ if (!DCI.isBeforeLegalize())
+ return SDValue();
+
+ auto *AN = cast<AtomicSDNode>(N);
+ EVT VT = AN->getValueType(0);
+ if (!VT.getScalarType().isFloatingPoint())
+ return SDValue();
+
+ unsigned BitWidth = VT.getStoreSizeInBits();
+ if (BitWidth == 0 || BitWidth != VT.getSizeInBits())
+ return SDValue();
+
+ SDLoc DL(N);
+ EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), BitWidth);
+ SDValue IntLoad = DAG.getAtomic(
+ ISD::ATOMIC_LOAD, DL, IntVT, DAG.getVTList(IntVT, MVT::Other),
+ {AN->getChain(), AN->getBasePtr()}, AN->getMemOperand());
+ SDValue Cast = DAG.getBitcast(VT, IntLoad);
+ return DAG.getMergeValues({Cast, IntLoad.getValue(1)}, DL);
+}
+
static SDValue combineLoad(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
@@ -62738,6 +62755,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::AVGCEILU:
case ISD::AVGFLOORS:
case ISD::AVGFLOORU: return combineAVG(N, DAG, DCI, Subtarget);
+ case ISD::ATOMIC_LOAD: return combineAtomicLoad(N, DAG, DCI);
case ISD::LOAD: return combineLoad(N, DAG, DCI, Subtarget);
case ISD::MLOAD: return combineMaskedLoad(N, DAG, DCI, Subtarget);
case ISD::STORE: return combineStore(N, DAG, DCI, Subtarget);
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 9a958525057b6..0d05c5772a707 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -892,8 +892,6 @@ namespace llvm {
shouldExpandAtomicRMWInIR(const AtomicRMWInst *AI) const override;
TargetLoweringBase::AtomicExpansionKind
shouldExpandLogicAtomicRMWInIR(const AtomicRMWInst *AI) const;
- TargetLoweringBase::AtomicExpansionKind
- shouldCastAtomicLoadInIR(LoadInst *LI) const override;
void emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const override;
void emitCmpArithAtomicRMWIntrinsic(AtomicRMWInst *AI) const override;
diff --git a/llvm/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll b/llvm/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll
index 9f973ac5531d1..e3a58931f5e85 100644
--- a/llvm/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll
+++ b/llvm/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll
@@ -1,10 +1,11 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
; RUN: opt -S %s -passes='require<libcall-lowering-info>,atomic-expand' -mtriple=x86_64-linux-gnu | FileCheck %s --check-prefixes=CHECK,CHECK64
; RUN: opt -S %s -passes='require<libcall-lowering-info>,atomic-expand' -mtriple=i686-linux-gnu | FileCheck %s --check-prefixes=CHECK,CHECK32
+; RUN: opt -S %s -passes='require<libcall-lowering-info>,atomic-expand' -mtriple=x86_64-linux-gnu -mattr=+cx16,-avx | FileCheck %s --check-prefixes=CX16
-; This file tests the functions `llvm::convertAtomicLoadToIntegerType` and
-; `llvm::convertAtomicStoreToIntegerType`. If X86 stops using this
-; functionality, please move this test to a target which still is.
+; This file tests AtomicExpand's non-integer atomic type conversions and
+; X86-specific expansion choices. If X86 stops using this functionality, please
+; move this test to a target which still is.
define float @float_load_expand(ptr %ptr) {
; CHECK-LABEL: define float @float_load_expand(
@@ -307,8 +308,7 @@ define <2 x i16> @atomic_vec2_i16(ptr %x) nounwind {
define <2 x half> @atomic_vec2_half(ptr %x) nounwind {
; CHECK-LABEL: define <2 x half> @atomic_vec2_half(
; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load atomic i32, ptr [[X]] acquire, align 8
-; CHECK-NEXT: [[RET:%.*]] = bitcast i32 [[TMP1]] to <2 x half>
+; CHECK-NEXT: [[RET:%.*]] = load atomic <2 x half>, ptr [[X]] acquire, align 8
; CHECK-NEXT: ret <2 x half> [[RET]]
;
%ret = load atomic <2 x half>, ptr %x acquire, align 8
@@ -354,3 +354,42 @@ define <4 x float> @atomic_vec4_float(ptr %x) nounwind {
%ret = load atomic <4 x float>, ptr %x acquire, align 16
ret <4 x float> %ret
}
+
+; Vector atomic loads should expand to a same-sized integer cmpxchg followed by
+; a bitcast back to the original type when X86 chooses the cmpxchg expansion.
+
+define <2 x i64> @load_v2i64_cmpxchg(ptr %p) {
+; CX16-LABEL: define <2 x i64> @load_v2i64_cmpxchg(
+; CX16-SAME: ptr [[P:%.*]]) #[[ATTR0:[0-9]+]] {
+; CX16-NEXT: [[TMP1:%.*]] = cmpxchg ptr [[P]], i128 0, i128 0 seq_cst seq_cst, align 16
+; CX16-NEXT: [[LOADED:%.*]] = extractvalue { i128, i1 } [[TMP1]], 0
+; CX16-NEXT: [[TMP2:%.*]] = bitcast i128 [[LOADED]] to <2 x i64>
+; CX16-NEXT: ret <2 x i64> [[TMP2]]
+;
+ %r = load atomic <2 x i64>, ptr %p seq_cst, align 16
+ ret <2 x i64> %r
+}
+
+define <4 x i32> @load_v4i32_cmpxchg(ptr %p) {
+; CX16-LABEL: define <4 x i32> @load_v4i32_cmpxchg(
+; CX16-SAME: ptr [[P:%.*]]) #[[ATTR0]] {
+; CX16-NEXT: [[TMP1:%.*]] = cmpxchg ptr [[P]], i128 0, i128 0 seq_cst seq_cst, align 16
+; CX16-NEXT: [[LOADED:%.*]] = extractvalue { i128, i1 } [[TMP1]], 0
+; CX16-NEXT: [[TMP2:%.*]] = bitcast i128 [[LOADED]] to <4 x i32>
+; CX16-NEXT: ret <4 x i32> [[TMP2]]
+;
+ %r = load atomic <4 x i32>, ptr %p seq_cst, align 16
+ ret <4 x i32> %r
+}
+
+define <16 x i8> @load_v16i8_cmpxchg(ptr %p) {
+; CX16-LABEL: define <16 x i8> @load_v16i8_cmpxchg(
+; CX16-SAME: ptr [[P:%.*]]) #[[ATTR0]] {
+; CX16-NEXT: [[TMP1:%.*]] = cmpxchg ptr [[P]], i128 0, i128 0 seq_cst seq_cst, align 16
+; CX16-NEXT: [[LOADED:%.*]] = extractvalue { i128, i1 } [[TMP1]], 0
+; CX16-NEXT: [[TMP2:%.*]] = bitcast i128 [[LOADED]] to <16 x i8>
+; CX16-NEXT: ret <16 x i8> [[TMP2]]
+;
+ %r = load atomic <16 x i8>, ptr %p seq_cst, align 16
+ ret <16 x i8> %r
+}
|
|
In particular, |
df4f0d0 to
99e797b
Compare
16f1d37 to
a8fa91a
Compare
a8fa91a to
979691e
Compare
🐧 Linux x64 Test Results
✅ The build succeeded and all tests passed. |
🪟 Windows x64 Test Results
✅ The build succeeded and all tests passed. |
979691e to
d4d1fd4
Compare
| return SDValue(); | ||
|
|
||
| unsigned BitWidth = VT.getStoreSizeInBits(); | ||
| if (BitWidth == 0 || BitWidth != VT.getSizeInBits()) |
There was a problem hiding this comment.
You're correct. The check for FP types implies we can't have zero BitWidth. Thank you.
Remove X86's shouldCastAtomicLoadInIR override that cast FP atomic loads to integer at the IR level. Instead, handle this in a pre-legalize DAG combine (combineAtomicLoad) that rewrites FP/FP-vector atomic loads to integer atomic loads plus a bitcast. This depends on #199310 which adds the necessary cmpxchg support for non-integer atomic loads in AtomicExpand.
d4d1fd4 to
12e5a0f
Compare
Remove X86's shouldCastAtomicLoadInIR override that cast FP atomic loads to integer at the IR level. Instead, handle this in a pre-legalize DAG combine (combineAtomicLoad) that rewrites FP/FP-vector atomic loads to integer atomic loads plus a bitcast.
This and #199310, which adds the necessary cmpxchg support for non-integer atomic loads in AtomicExpand, are a respond to #148899 for
atomic_vec4_floatofatomic-load-store.ll.Stacked above #201303 and below #200339.