Reapply "[VPlan] Extract reverse mask from reverse accesses"#189930
Reapply "[VPlan] Extract reverse mask from reverse accesses"#189930
Conversation
|
@llvm/pr-subscribers-vectorizers @llvm/pr-subscribers-backend-risc-v Author: Mel Chen (Mel-Chen) ChangesFollowing #146525, separate the reverse mask from reverse access recipes. The previous revert was due to an over-aggressive assertion that incorrectly flagged a reverse load followed by a scatter store as illegal. This version relaxes the assertion to check the mask only. Re-land #155579 Patch is 62.98 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/189930.diff 15 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index ac94d0dbcc4cd..b268c81e550cf 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7137,10 +7137,18 @@ static bool planContainsAdditionalSimplifications(VPlan &Plan,
return nullptr;
};
- // Check if a select for a safe divisor was hoisted to the pre-header. If so,
- // the select doesn't need to be considered for the vector loop cost; go with
- // the more accurate VPlan-based cost model.
for (VPRecipeBase &R : *Plan.getVectorPreheader()) {
+ // Reverse operations for reverse memory accesses may be hoisted to the
+ // preheader by LICM if the reversed value is loop invariant. In this case,
+ // the VPlan-based cost model diverges from the legacy cost model.
+ if (match(&R,
+ m_CombineOr(m_Reverse(m_VPValue()),
+ m_Intrinsic<Intrinsic::experimental_vp_reverse>())))
+ return true;
+
+ // Check if a select for a safe divisor was hoisted to the pre-header. If
+ // so, the select doesn't need to be considered for the vector loop cost; go
+ // with the more accurate VPlan-based cost model.
auto *VPI = dyn_cast<VPInstruction>(&R);
if (!VPI || VPI->getOpcode() != Instruction::Select)
continue;
@@ -7193,20 +7201,6 @@ static bool planContainsAdditionalSimplifications(VPlan &Plan,
if (AddrI && vputils::isSingleScalar(WidenMemR->getAddr()) !=
CostCtx.isLegacyUniformAfterVectorization(AddrI, VF))
return true;
-
- if (WidenMemR->isReverse()) {
- // If the stored value of a reverse store is invariant, LICM will
- // hoist the reverse operation to the preheader. In this case, the
- // result of the VPlan-based cost model will diverge from that of
- // the legacy model.
- if (auto *StoreR = dyn_cast<VPWidenStoreRecipe>(WidenMemR))
- if (StoreR->getStoredValue()->isDefinedOutsideLoopRegions())
- return true;
-
- if (auto *StoreR = dyn_cast<VPWidenStoreEVLRecipe>(WidenMemR))
- if (StoreR->getStoredValue()->isDefinedOutsideLoopRegions())
- return true;
- }
}
// The legacy cost model costs non-header phis with a scalar VF as a phi,
@@ -7754,10 +7748,13 @@ VPRecipeBase *VPRecipeBuilder::tryToWidenMemory(VPInstruction *VPI,
Ptr = VectorPtr;
}
+ if (Reverse && Mask)
+ Mask = Builder.createNaryOp(VPInstruction::Reverse, Mask, I->getDebugLoc());
+
if (VPI->getOpcode() == Instruction::Load) {
auto *Load = cast<LoadInst>(I);
- auto *LoadR = new VPWidenLoadRecipe(*Load, Ptr, Mask, Consecutive, Reverse,
- *VPI, Load->getDebugLoc());
+ auto *LoadR = new VPWidenLoadRecipe(*Load, Ptr, Mask, Consecutive, *VPI,
+ Load->getDebugLoc());
if (Reverse) {
Builder.insert(LoadR);
return new VPInstruction(VPInstruction::Reverse, LoadR, {}, {},
@@ -7771,8 +7768,8 @@ VPRecipeBase *VPRecipeBuilder::tryToWidenMemory(VPInstruction *VPI,
if (Reverse)
StoredVal = Builder.createNaryOp(VPInstruction::Reverse, StoredVal,
Store->getDebugLoc());
- return new VPWidenStoreRecipe(*Store, Ptr, StoredVal, Mask, Consecutive,
- Reverse, *VPI, Store->getDebugLoc());
+ return new VPWidenStoreRecipe(*Store, Ptr, StoredVal, Mask, Consecutive, *VPI,
+ Store->getDebugLoc());
}
VPWidenIntOrFpInductionRecipe *
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index ab47d927942db..5bac173262468 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -3535,9 +3535,6 @@ class LLVM_ABI_FOR_TEST VPWidenMemoryRecipe : public VPRecipeBase,
/// Whether the accessed addresses are consecutive.
bool Consecutive;
- /// Whether the consecutive accessed addresses are in reverse order.
- bool Reverse;
-
/// Whether the memory access is masked.
bool IsMasked = false;
@@ -3551,15 +3548,10 @@ class LLVM_ABI_FOR_TEST VPWidenMemoryRecipe : public VPRecipeBase,
VPWidenMemoryRecipe(const char unsigned SC, Instruction &I,
std::initializer_list<VPValue *> Operands,
- bool Consecutive, bool Reverse,
- const VPIRMetadata &Metadata, DebugLoc DL)
+ bool Consecutive, const VPIRMetadata &Metadata,
+ DebugLoc DL)
: VPRecipeBase(SC, Operands, DL), VPIRMetadata(Metadata), Ingredient(I),
- Alignment(getLoadStoreAlignment(&I)), Consecutive(Consecutive),
- Reverse(Reverse) {
- assert((Consecutive || !Reverse) && "Reverse implies consecutive");
- assert((isa<VPVectorEndPointerRecipe>(getAddr()) || !Reverse) &&
- "Reversed acccess without VPVectorEndPointerRecipe address?");
- }
+ Alignment(getLoadStoreAlignment(&I)), Consecutive(Consecutive) {}
public:
VPWidenMemoryRecipe *clone() override {
@@ -3581,10 +3573,6 @@ class LLVM_ABI_FOR_TEST VPWidenMemoryRecipe : public VPRecipeBase,
/// Return whether the loaded-from / stored-to addresses are consecutive.
bool isConsecutive() const { return Consecutive; }
- /// Return whether the consecutive loaded/stored addresses are in reverse
- /// order.
- bool isReverse() const { return Reverse; }
-
/// Return the address accessed by this recipe.
VPValue *getAddr() const { return getOperand(0); }
@@ -3618,18 +3606,16 @@ class LLVM_ABI_FOR_TEST VPWidenMemoryRecipe : public VPRecipeBase,
struct LLVM_ABI_FOR_TEST VPWidenLoadRecipe final : public VPWidenMemoryRecipe,
public VPRecipeValue {
VPWidenLoadRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask,
- bool Consecutive, bool Reverse,
- const VPIRMetadata &Metadata, DebugLoc DL)
+ bool Consecutive, const VPIRMetadata &Metadata, DebugLoc DL)
: VPWidenMemoryRecipe(VPRecipeBase::VPWidenLoadSC, Load, {Addr},
- Consecutive, Reverse, Metadata, DL),
+ Consecutive, Metadata, DL),
VPRecipeValue(this, &Load) {
setMask(Mask);
}
VPWidenLoadRecipe *clone() override {
return new VPWidenLoadRecipe(cast<LoadInst>(Ingredient), getAddr(),
- getMask(), Consecutive, Reverse, *this,
- getDebugLoc());
+ getMask(), Consecutive, *this, getDebugLoc());
}
VP_CLASSOF_IMPL(VPRecipeBase::VPWidenLoadSC);
@@ -3662,7 +3648,7 @@ struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe,
VPWidenLoadEVLRecipe(VPWidenLoadRecipe &L, VPValue *Addr, VPValue &EVL,
VPValue *Mask)
: VPWidenMemoryRecipe(VPRecipeBase::VPWidenLoadEVLSC, L.getIngredient(),
- {Addr, &EVL}, L.isConsecutive(), L.isReverse(), L,
+ {Addr, &EVL}, L.isConsecutive(), L,
L.getDebugLoc()),
VPRecipeValue(this, &getIngredient()) {
setMask(Mask);
@@ -3701,18 +3687,17 @@ struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe,
/// to store to and an optional mask.
struct LLVM_ABI_FOR_TEST VPWidenStoreRecipe final : public VPWidenMemoryRecipe {
VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal,
- VPValue *Mask, bool Consecutive, bool Reverse,
+ VPValue *Mask, bool Consecutive,
const VPIRMetadata &Metadata, DebugLoc DL)
: VPWidenMemoryRecipe(VPRecipeBase::VPWidenStoreSC, Store,
- {Addr, StoredVal}, Consecutive, Reverse, Metadata,
- DL) {
+ {Addr, StoredVal}, Consecutive, Metadata, DL) {
setMask(Mask);
}
VPWidenStoreRecipe *clone() override {
return new VPWidenStoreRecipe(cast<StoreInst>(Ingredient), getAddr(),
getStoredValue(), getMask(), Consecutive,
- Reverse, *this, getDebugLoc());
+ *this, getDebugLoc());
}
VP_CLASSOF_IMPL(VPRecipeBase::VPWidenStoreSC);
@@ -3747,8 +3732,8 @@ struct VPWidenStoreEVLRecipe final : public VPWidenMemoryRecipe {
VPWidenStoreEVLRecipe(VPWidenStoreRecipe &S, VPValue *Addr,
VPValue *StoredVal, VPValue &EVL, VPValue *Mask)
: VPWidenMemoryRecipe(VPRecipeBase::VPWidenStoreEVLSC, S.getIngredient(),
- {Addr, StoredVal, &EVL}, S.isConsecutive(),
- S.isReverse(), S, S.getDebugLoc()) {
+ {Addr, StoredVal, &EVL}, S.isConsecutive(), S,
+ S.getDebugLoc()) {
setMask(Mask);
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 7eefd77045050..7104201b5067c 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -1029,8 +1029,6 @@ InstructionCost VPRecipeWithIRFlags::getCostForRecipeWithOpcode(
return TTI::CastContextHint::Normal;
if (!WidenMemoryRecipe->isConsecutive())
return TTI::CastContextHint::GatherScatter;
- if (WidenMemoryRecipe->isReverse())
- return TTI::CastContextHint::Reversed;
if (WidenMemoryRecipe->isMasked())
return TTI::CastContextHint::Masked;
return TTI::CastContextHint::Normal;
@@ -1038,6 +1036,7 @@ InstructionCost VPRecipeWithIRFlags::getCostForRecipeWithOpcode(
VPValue *Operand = getOperand(0);
TTI::CastContextHint CCH = TTI::CastContextHint::None;
+ bool IsReverse = false;
// For Trunc/FPTrunc, get the context from the only user.
if (Opcode == Instruction::Trunc || Opcode == Instruction::FPTrunc) {
auto GetOnlyUser = [](const VPSingleDefRecipe *R) -> VPRecipeBase * {
@@ -1046,8 +1045,13 @@ InstructionCost VPRecipeWithIRFlags::getCostForRecipeWithOpcode(
return dyn_cast<VPRecipeBase>(*R->user_begin());
};
if (VPRecipeBase *Recipe = GetOnlyUser(this)) {
- if (match(Recipe, m_Reverse(m_VPValue())))
- Recipe = GetOnlyUser(cast<VPInstruction>(Recipe));
+ if (match(Recipe,
+ m_CombineOr(m_Reverse(m_VPValue()),
+ m_Intrinsic<Intrinsic::experimental_vp_reverse>(
+ m_VPValue(), m_VPValue(), m_VPValue())))) {
+ Recipe = GetOnlyUser(cast<VPSingleDefRecipe>(Recipe));
+ IsReverse = true;
+ }
if (Recipe)
CCH = ComputeCCH(Recipe);
}
@@ -1057,12 +1061,20 @@ InstructionCost VPRecipeWithIRFlags::getCostForRecipeWithOpcode(
Opcode == Instruction::FPExt) {
if (auto *Recipe = Operand->getDefiningRecipe()) {
VPValue *ReverseOp;
- if (match(Recipe, m_Reverse(m_VPValue(ReverseOp))))
+ if (match(Recipe, m_CombineOr(
+ m_Reverse(m_VPValue(ReverseOp)),
+ m_Intrinsic<Intrinsic::experimental_vp_reverse>(
+ m_VPValue(ReverseOp), m_VPValue(),
+ m_VPValue())))) {
Recipe = ReverseOp->getDefiningRecipe();
+ IsReverse = true;
+ }
if (Recipe)
CCH = ComputeCCH(Recipe);
}
}
+ if (IsReverse && CCH != TTI::CastContextHint::None)
+ CCH = TTI::CastContextHint::Reversed;
auto *ScalarSrcTy = Ctx.Types.inferScalarType(Operand);
Type *SrcTy = VF.isVector() ? toVectorTy(ScalarSrcTy, VF) : ScalarSrcTy;
@@ -1244,8 +1256,13 @@ InstructionCost VPInstruction::computeCost(ElementCount VF,
}
case VPInstruction::Reverse: {
assert(VF.isVector() && "Reverse operation must be vector type");
- auto *VectorTy = cast<VectorType>(
- toVectorTy(Ctx.Types.inferScalarType(getOperand(0)), VF));
+ Type *EltTy = Ctx.Types.inferScalarType(this);
+ // Skip the reverse operation cost for the mask.
+ // FIXME: Remove this once redundant mask reverse operations can be
+ // eliminated by VPlanTransforms::cse before cost computation.
+ if (EltTy->isIntegerTy(1))
+ return 0;
+ auto *VectorTy = cast<VectorType>(toVectorTy(EltTy, VF));
return Ctx.TTI.getShuffleCost(TargetTransformInfo::SK_Reverse, VectorTy,
VectorTy, /*Mask=*/{}, Ctx.CostKind,
/*Index=*/0);
@@ -1933,6 +1950,13 @@ static InstructionCost getCostForIntrinsics(Intrinsic::ID ID,
const VPRecipeWithIRFlags &R,
ElementCount VF,
VPCostContext &Ctx) {
+ Type *ScalarRetTy = Ctx.Types.inferScalarType(&R);
+ // Skip the reverse operation cost for the mask.
+ // FIXME: Remove this once redundant mask reverse operations can be eliminated
+ // by VPlanTransforms::cse before cost computation.
+ if (ID == Intrinsic::experimental_vp_reverse && ScalarRetTy->isIntegerTy(1))
+ return InstructionCost(0);
+
// Some backends analyze intrinsic arguments to determine cost. Use the
// underlying value for the operand if it has one. Otherwise try to use the
// operand of the underlying call instruction, if there is one. Otherwise
@@ -1952,7 +1976,6 @@ static InstructionCost getCostForIntrinsics(Intrinsic::ID ID,
Arguments.push_back(V);
}
- Type *ScalarRetTy = Ctx.Types.inferScalarType(&R);
Type *RetTy = VF.isVector() ? toVectorizedTy(ScalarRetTy, VF) : ScalarRetTy;
SmallVector<Type *> ParamTys;
for (const VPValue *Op : Operands) {
@@ -3801,9 +3824,19 @@ InstructionCost VPWidenMemoryRecipe::computeCost(ElementCount VF,
// TODO: Using the original IR may not be accurate.
// Currently, ARM will use the underlying IR to calculate gather/scatter
// instruction cost.
- assert(!Reverse &&
- "Inconsecutive memory access should not have the order.");
+ [[maybe_unused]] auto IsReverseMask = [this]() {
+ VPValue *Mask = getMask();
+ if (!Mask)
+ return false;
+ if (isa<VPWidenLoadEVLRecipe, VPWidenStoreEVLRecipe>(this))
+ return match(Mask, m_Intrinsic<Intrinsic::experimental_vp_reverse>(
+ m_VPValue(), m_VPValue(), m_VPValue()));
+
+ return match(Mask, m_Reverse(m_VPValue()));
+ };
+ assert(!IsReverseMask() &&
+ "Inconsecutive memory access should not have reverse order");
const Value *Ptr = getLoadStorePointerOperand(&Ingredient);
Type *PtrTy = Ptr->getType();
@@ -3847,13 +3880,8 @@ void VPWidenLoadRecipe::execute(VPTransformState &State) {
auto &Builder = State.Builder;
Value *Mask = nullptr;
- if (auto *VPMask = getMask()) {
- // Mask reversal is only needed for non-all-one (null) masks, as reverse
- // of a null all-one mask is a null mask.
+ if (auto *VPMask = getMask())
Mask = State.get(VPMask);
- if (isReverse())
- Mask = Builder.CreateVectorReverse(Mask, "reverse");
- }
Value *Addr = State.get(getAddr(), /*IsScalar*/ !CreateGather);
Value *NewLI;
@@ -3881,17 +3909,6 @@ void VPWidenLoadRecipe::printRecipe(raw_ostream &O, const Twine &Indent,
}
#endif
-/// Use all-true mask for reverse rather than actual mask, as it avoids a
-/// dependence w/o affecting the result.
-static Instruction *createReverseEVL(IRBuilderBase &Builder, Value *Operand,
- Value *EVL, const Twine &Name) {
- VectorType *ValTy = cast<VectorType>(Operand->getType());
- Value *AllTrueMask =
- Builder.CreateVectorSplat(ValTy->getElementCount(), Builder.getTrue());
- return Builder.CreateIntrinsic(ValTy, Intrinsic::experimental_vp_reverse,
- {Operand, AllTrueMask, EVL}, nullptr, Name);
-}
-
void VPWidenLoadEVLRecipe::execute(VPTransformState &State) {
Type *ScalarDataTy = getLoadStoreType(&Ingredient);
auto *DataTy = VectorType::get(ScalarDataTy, State.VF);
@@ -3902,13 +3919,10 @@ void VPWidenLoadEVLRecipe::execute(VPTransformState &State) {
Value *EVL = State.get(getEVL(), VPLane(0));
Value *Addr = State.get(getAddr(), !CreateGather);
Value *Mask = nullptr;
- if (VPValue *VPMask = getMask()) {
+ if (VPValue *VPMask = getMask())
Mask = State.get(VPMask);
- if (isReverse())
- Mask = createReverseEVL(Builder, Mask, EVL, "vp.reverse.mask");
- } else {
+ else
Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue());
- }
if (CreateGather) {
NewLI =
@@ -3960,13 +3974,8 @@ void VPWidenStoreRecipe::execute(VPTransformState &State) {
auto &Builder = State.Builder;
Value *Mask = nullptr;
- if (auto *VPMask = getMask()) {
- // Mask reversal is only needed for non-all-one (null) masks, as reverse
- // of a null all-one mask is a null mask.
+ if (auto *VPMask = getMask())
Mask = State.get(VPMask);
- if (isReverse())
- Mask = Builder.CreateVectorReverse(Mask, "reverse");
- }
Value *StoredVal = State.get(StoredVPValue);
Value *Addr = State.get(getAddr(), /*IsScalar*/ !CreateScatter);
@@ -3998,13 +4007,11 @@ void VPWidenStoreEVLRecipe::execute(VPTransformState &State) {
Value *StoredVal = State.get(StoredValue);
Value *EVL = State.get(getEVL(), VPLane(0));
Value *Mask = nullptr;
- if (VPValue *VPMask = getMask()) {
+ if (VPValue *VPMask = getMask())
Mask = State.get(VPMask);
- if (isReverse())
- Mask = createReverseEVL(Builder, Mask, EVL, "vp.reverse.mask");
- } else {
+ else
Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue());
- }
+
Value *Addr = State.get(getAddr(), !CreateScatter);
if (CreateScatter) {
NewSI = Builder.CreateIntrinsic(Type::getVoidTy(EVL->getContext()),
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 90685dcb7ed63..81cf8b9d74c5b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -80,12 +80,11 @@ bool VPlanTransforms::tryToConvertVPInstructionsToVPRecipes(
if (LoadInst *Load = dyn_cast<LoadInst>(Inst)) {
NewRecipe = new VPWidenLoadRecipe(
*Load, Ingredient.getOperand(0), nullptr /*Mask*/,
- false /*Consecutive*/, false /*Reverse*/, *VPI,
- Ingredient.getDebugLoc());
+ false /*Consecutive*/, *VPI, Ingredient.getDebugLoc());
} else if (StoreInst *Store = dyn_cast<StoreInst>(Inst)) {
NewRecipe = new VPWidenStoreRecipe(
*Store, Ingredient.getOperand(1), Ingredient.getOperand(0),
- nullptr /*Mask*/, false /*Consecutive*/, false /*Reverse*/, *VPI,
+ nullptr /*Mask*/, false /*Consecutive*/, *VPI,
Ingredient.getDebugLoc());
} else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
NewRecipe = new VPWidenGEPRecipe(GEP, Ingredient.operands(), *VPI,
@@ -1813,8 +1812,6 @@ static void narrowToSingleScalarRecipes(VPlan &Plan) {
auto *WidenStoreR = dyn_cast<VPWidenStoreRecipe>(&R);
if (WidenStoreR && vputils::isSingleScalar(WidenStoreR->getAddr()) &&
!WidenStoreR->isConsecutive()) {
- assert(!WidenStoreR->isReverse() &&
- "Not consecutive memory recipes shouldn't be reversed");
VPValue *Mask = WidenStoreR->getMask();
// Only convert the scatter to a scalar store if it is unmasked.
@@ -3088,20 +3085,32 @@ static VPRecipeBase *optimizeMaskToEVL(VPValue *HeaderMask,
return EVLEndPtr;
};
+ auto GetVPReverse = [&CurRecipe, &EVL, &TypeInfo, Plan,
+ DL](VPValue *V) -> VPWidenIntrinsicRecipe * {
+ if (!V)
+ return nullptr;
+ auto *Reverse = new VPWidenIntrinsicRecipe(
+ Intrinsic::experimental_vp_reverse, {V, Plan->getTrue(), &EVL},
+ TypeInfo.inferScalarType(V), {}, {}, DL);
+ Reverse->insertBefore(&CurRecipe);
+ return Reverse;
+ };
+
if (match(&Cur...
[truncated]
|
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
c620e78 to
c7ee204
Compare
| m_CombineOr(m_Reverse(m_VPValue()), | ||
| m_Intrinsic<Intrinsic::experimental_vp_reverse>( | ||
| m_VPValue(), m_VPValue(), m_VPValue())))) { |
There was a problem hiding this comment.
this may deserve a dedicated matcher?
There was a problem hiding this comment.
I think if the arguments aren't needed this can just be m_CombineOr(m_Reverse(m_VPValue()), m_Intrinsic<Intrinsic::experimental_vp_reverse>())
lukel97
left a comment
There was a problem hiding this comment.
The relaxed assertion LGTM, just comments on what Florian said about the matcher
| m_Intrinsic<Intrinsic::experimental_vp_reverse>( | ||
| m_VPValue(ReverseOp), m_VPValue(), m_VPValue())))) { |
There was a problem hiding this comment.
I think this will also work since m_Intrinsic doesn't check the total number of arguments?
| m_Intrinsic<Intrinsic::experimental_vp_reverse>( | |
| m_VPValue(ReverseOp), m_VPValue(), m_VPValue())))) { | |
| m_Intrinsic<Intrinsic::experimental_vp_reverse>( | |
| m_VPValue(ReverseOp))))) { |
c7ee204 to
eef2ac3
Compare
Following #146525, separate the reverse mask from reverse access recipes.
At the same time, remove the unused member variable Reverse from VPWidenMemoryRecipe.
This will help to reduce redundant reverse mask computations by VPlan-based common subexpression elimination.
The previous revert was due to an over-aggressive assertion that incorrectly flagged a reverse load followed by a scatter store as illegal. This version relaxes the assertion to check the mask only.
Re-land #155579
Base on pre-commit #189928