[LV] Decompose WidenIntOrFPInduction into phi and update recipes#82021
[LV] Decompose WidenIntOrFPInduction into phi and update recipes#82021nikolaypanchenko wants to merge 3 commits intollvm:mainfrom
Conversation
|
@llvm/pr-subscribers-llvm-analysis @llvm/pr-subscribers-backend-risc-v Author: Kolya Panchenko (nikolaypanchenko) ChangesLoop Vectorizer still has two recipes This is not only bad from readability of a VPlan, but also requires more code to maintain such behavior. For instance, there's already ad-hoc code motion to move generated updates of these recipes closer to the loop latch. The changeset:
Patch is 3.06 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/82021.diff 171 Files Affected:
diff --git a/llvm/include/llvm/Analysis/IVDescriptors.h b/llvm/include/llvm/Analysis/IVDescriptors.h
index 5c7b613ac48c40..7ca13adae87f6a 100644
--- a/llvm/include/llvm/Analysis/IVDescriptors.h
+++ b/llvm/include/llvm/Analysis/IVDescriptors.h
@@ -363,6 +363,11 @@ class InductionDescriptor {
return nullptr;
}
+ const Instruction *getExactFPMathInst() const {
+ return const_cast<const Instruction *>(
+ const_cast<InductionDescriptor *>(this)->getExactFPMathInst());
+ }
+
/// Returns binary opcode of the induction operator.
Instruction::BinaryOps getInductionOpcode() const {
return InductionBinOp ? InductionBinOp->getOpcode()
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 98b177cf5d2d0e..92b783d3badeae 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8114,34 +8114,6 @@ VPHeaderPHIRecipe *VPRecipeBuilder::tryToOptimizeInductionPHI(
return nullptr;
}
-VPWidenIntOrFpInductionRecipe *VPRecipeBuilder::tryToOptimizeInductionTruncate(
- TruncInst *I, ArrayRef<VPValue *> Operands, VFRange &Range, VPlan &Plan) {
- // Optimize the special case where the source is a constant integer
- // induction variable. Notice that we can only optimize the 'trunc' case
- // because (a) FP conversions lose precision, (b) sext/zext may wrap, and
- // (c) other casts depend on pointer size.
-
- // Determine whether \p K is a truncation based on an induction variable that
- // can be optimized.
- auto isOptimizableIVTruncate =
- [&](Instruction *K) -> std::function<bool(ElementCount)> {
- return [=](ElementCount VF) -> bool {
- return CM.isOptimizableIVTruncate(K, VF);
- };
- };
-
- if (LoopVectorizationPlanner::getDecisionAndClampRange(
- isOptimizableIVTruncate(I), Range)) {
-
- auto *Phi = cast<PHINode>(I->getOperand(0));
- const InductionDescriptor &II = *Legal->getIntOrFpInductionDescriptor(Phi);
- VPValue *Start = Plan.getVPValueOrAddLiveIn(II.getStartValue());
- return createWidenInductionRecipes(Phi, I, Start, II, Plan, *PSE.getSE(),
- *OrigLoop, Range);
- }
- return nullptr;
-}
-
VPBlendRecipe *VPRecipeBuilder::tryToBlend(PHINode *Phi,
ArrayRef<VPValue *> Operands,
VPlanPtr &Plan) {
@@ -8275,6 +8247,70 @@ bool VPRecipeBuilder::shouldWiden(Instruction *I, VFRange &Range) const {
Range);
}
+VPWidenCastRecipe *VPRecipeBuilder::createCast(VPValue *V, Type *From,
+ Type *To) {
+ if (From == To)
+ return nullptr;
+ Instruction::CastOps CastOpcode;
+ if (To->isIntegerTy() && From->isIntegerTy())
+ CastOpcode = To->getPrimitiveSizeInBits() < From->getPrimitiveSizeInBits()
+ ? Instruction::Trunc
+ : Instruction::ZExt;
+ else if (To->isIntegerTy())
+ CastOpcode = Instruction::FPToUI;
+ else
+ CastOpcode = Instruction::UIToFP;
+
+ return new VPWidenCastRecipe(CastOpcode, V, To);
+}
+
+VPRecipeBase *
+VPRecipeBuilder::createWidenStep(VPWidenIntOrFpInductionRecipe &WIV,
+ ScalarEvolution &SE, VPlan &Plan,
+ DenseSet<VPRecipeBase *> *CreatedRecipes) {
+ PHINode *PN = WIV.getPHINode();
+ const InductionDescriptor &IndDesc = WIV.getInductionDescriptor();
+ VPValue *ScalarStep =
+ vputils::getOrCreateVPValueForSCEVExpr(Plan, IndDesc.getStep(), SE);
+ Type *VFxUFTy = Plan.getVFxUF().getElementType();
+ Type *StepTy = IndDesc.getStep()->getType();
+ VPValue *WidenVFxUF = &Plan.getWidenVFxUF();
+ VPBasicBlock *LatchVPBB = Plan.getVectorLoopRegion()->getExitingBasicBlock();
+ if (VPWidenCastRecipe *WidenVFxUFCast =
+ createCast(&Plan.getWidenVFxUF(), VFxUFTy, StepTy)) {
+ WidenVFxUFCast->insertBefore(LatchVPBB->getTerminator());
+ if (CreatedRecipes)
+ CreatedRecipes->insert(WidenVFxUFCast);
+ WidenVFxUF = WidenVFxUFCast->getVPSingleValue();
+ }
+ const Instruction::BinaryOps UpdateOp =
+ IndDesc.getInductionOpcode() != Instruction::BinaryOpsEnd
+ ? IndDesc.getInductionOpcode()
+ : Instruction::Add;
+ VPInstruction *Update;
+ if (StepTy->isIntegerTy()) {
+ VPInstruction *Mul = new VPInstruction(
+ Instruction::Mul, {WidenVFxUF, ScalarStep}, PN->getDebugLoc());
+ Mul->insertBefore(LatchVPBB->getTerminator());
+ if (CreatedRecipes)
+ CreatedRecipes->insert(Mul);
+ Update = new VPInstruction(UpdateOp, {&WIV, Mul}, PN->getDebugLoc());
+ Update->insertBefore(LatchVPBB->getTerminator());
+ } else {
+ FastMathFlags FMF = IndDesc.getExactFPMathInst()
+ ? IndDesc.getExactFPMathInst()->getFastMathFlags()
+ : FastMathFlags();
+ VPInstruction *Mul = new VPInstruction(
+ Instruction::FMul, {WidenVFxUF, ScalarStep}, FMF, PN->getDebugLoc());
+ Mul->insertBefore(LatchVPBB->getTerminator());
+ Update = new VPInstruction(UpdateOp, {&WIV, Mul}, FMF, PN->getDebugLoc());
+ Update->insertBefore(LatchVPBB->getTerminator());
+ }
+ if (CreatedRecipes)
+ CreatedRecipes->insert(Update);
+ return Update;
+}
+
VPWidenRecipe *VPRecipeBuilder::tryToWiden(Instruction *I,
ArrayRef<VPValue *> Operands,
VPBasicBlock *VPBB, VPlanPtr &Plan) {
@@ -8324,10 +8360,15 @@ VPWidenRecipe *VPRecipeBuilder::tryToWiden(Instruction *I,
};
}
-void VPRecipeBuilder::fixHeaderPhis() {
+void VPRecipeBuilder::fixHeaderPhis(VPlan &Plan) {
BasicBlock *OrigLatch = OrigLoop->getLoopLatch();
for (VPHeaderPHIRecipe *R : PhisToFix) {
- auto *PN = cast<PHINode>(R->getUnderlyingValue());
+ if (auto *VPWIFR = dyn_cast<VPWidenIntOrFpInductionRecipe>(R)) {
+ VPWIFR->addOperand(
+ createWidenStep(*VPWIFR, *PSE.getSE(), Plan)->getVPSingleValue());
+ continue;
+ }
+ PHINode *PN = cast<PHINode>(R->getUnderlyingValue());
VPRecipeBase *IncR =
getRecipe(cast<Instruction>(PN->getIncomingValueForBlock(OrigLatch)));
R->addOperand(IncR->getVPSingleValue());
@@ -8405,8 +8446,12 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(
// can have earlier phis as incoming values.
recordRecipeOf(Phi);
- if ((Recipe = tryToOptimizeInductionPHI(Phi, Operands, *Plan, Range)))
+ if ((Recipe = tryToOptimizeInductionPHI(Phi, Operands, *Plan, Range))) {
+ if (isa<VPWidenPointerInductionRecipe>(Recipe))
+ return Recipe;
+ PhisToFix.push_back(cast<VPWidenIntOrFpInductionRecipe>(Recipe));
return Recipe;
+ }
VPHeaderPHIRecipe *PhiRecipe = nullptr;
assert((Legal->isReductionVariable(Phi) ||
@@ -8441,10 +8486,17 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(
return PhiRecipe;
}
- if (isa<TruncInst>(Instr) &&
- (Recipe = tryToOptimizeInductionTruncate(cast<TruncInst>(Instr), Operands,
- Range, *Plan)))
- return Recipe;
+ if (isa<TruncInst>(Instr)) {
+ auto IsOptimizableIVTruncate =
+ [&](Instruction *K) -> std::function<bool(ElementCount)> {
+ return [=](ElementCount VF) -> bool {
+ return CM.isOptimizableIVTruncate(K, VF);
+ };
+ };
+
+ LoopVectorizationPlanner::getDecisionAndClampRange(
+ IsOptimizableIVTruncate(Instr), Range);
+ }
// All widen recipes below deal only with VF > 1.
if (LoopVectorizationPlanner::getDecisionAndClampRange(
@@ -8707,7 +8759,7 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
!Plan->getVectorLoopRegion()->getEntryBasicBlock()->empty() &&
"entry block must be set to a VPRegionBlock having a non-empty entry "
"VPBasicBlock");
- RecipeBuilder.fixHeaderPhis();
+ RecipeBuilder.fixHeaderPhis(*Plan);
// ---------------------------------------------------------------------------
// Transform initial VPlan: Apply previously taken decisions, in order, to
diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
index b1498026adadfe..126a6b1c061265 100644
--- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
+++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
@@ -146,6 +146,18 @@ class VPRecipeBuilder {
/// between SRC and DST.
VPValue *getEdgeMask(BasicBlock *Src, BasicBlock *Dst) const;
+ /// A helper function to create VPWidenCastRecipe of a \p V VPValue to a \p To
+ /// type.
+ /// FIXME: Remove \p From argument and take it from a \p V value
+ static VPWidenCastRecipe *createCast(VPValue *V, Type *From, Type *To);
+
+ /// A helper function which widens \p WIV step, multiplies it by WidenVFxUF
+ /// and attaches to loop latch of the \p Plan. Returns multiplication.
+ static VPRecipeBase *
+ createWidenStep(VPWidenIntOrFpInductionRecipe &WIV, ScalarEvolution &SE,
+ VPlan &Plan,
+ DenseSet<VPRecipeBase *> *CreatedRecipes = nullptr);
+
/// Mark given ingredient for recording its recipe once one is created for
/// it.
void recordRecipeOf(Instruction *I) {
@@ -171,7 +183,7 @@ class VPRecipeBuilder {
/// Add the incoming values from the backedge to reduction & first-order
/// recurrence cross-iteration phis.
- void fixHeaderPhis();
+ void fixHeaderPhis(VPlan &Plan);
};
} // end namespace llvm
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 2c0daa82afa59f..96732b77a9db3d 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -76,12 +76,25 @@ Value *VPLane::getAsRuntimeExpr(IRBuilderBase &Builder,
llvm_unreachable("Unknown lane kind");
}
-VPValue::VPValue(const unsigned char SC, Value *UV, VPDef *Def)
- : SubclassID(SC), UnderlyingVal(UV), Def(Def) {
+VPValue::VPValue(const unsigned char SC, Value *UV, VPDef *Def, Type *Ty)
+ : SubclassID(SC), UnderlyingVal(UV), UnderlyingTy(Ty), Def(Def) {
+ if (UnderlyingTy)
+ assert((!UnderlyingVal || UnderlyingVal->getType() == UnderlyingTy) &&
+ "VPValue with set type should either be created without underlying "
+ "value or type should match the given type");
if (Def)
Def->addDefinedValue(this);
}
+Type *VPValue::getElementType() {
+ return const_cast<Type *>(
+ const_cast<const VPValue *>(this)->getElementType());
+}
+
+const Type *VPValue::getElementType() const {
+ return UnderlyingVal ? UnderlyingVal->getType() : UnderlyingTy;
+}
+
VPValue::~VPValue() {
assert(Users.empty() && "trying to delete a VPValue with remaining users");
if (Def)
@@ -763,6 +776,10 @@ VPlanPtr VPlan::createInitialVPlan(const SCEV *TripCount, ScalarEvolution &SE) {
auto Plan = std::make_unique<VPlan>(Preheader, VecPreheader);
Plan->TripCount =
vputils::getOrCreateVPValueForSCEVExpr(*Plan, TripCount, SE);
+ Type *TCType = TripCount->getType();
+ Plan->getVectorTripCount().setElementType(TCType);
+ Plan->getVFxUF().setElementType(TCType);
+ Plan->getWidenVFxUF().setElementType(TCType);
// Create empty VPRegionBlock, to be filled during processing later.
auto *TopRegion = new VPRegionBlock("vector loop", false /*isReplicator*/);
VPBlockUtils::insertBlockAfter(TopRegion, VecPreheader);
@@ -796,6 +813,18 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
createStepForVF(Builder, TripCountV->getType(), State.VF, State.UF),
0);
+ if (WidenVFxUF.getNumUsers() > 0)
+ for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) {
+ Value *Step =
+ createStepForVF(Builder, TripCountV->getType(), State.VF, Part+1);
+ if (State.VF.isScalar())
+ State.set(&WidenVFxUF, Step, Part);
+ else
+ State.set(&WidenVFxUF,
+ Builder.CreateVectorSplat(State.VF, Step, "widen.vfxuf"),
+ Part);
+ }
+
// When vectorizing the epilogue loop, the canonical induction start value
// needs to be changed from zero to the value after the main vector loop.
// FIXME: Improve modeling for canonical IV start values in the epilogue loop.
@@ -845,21 +874,16 @@ void VPlan::execute(VPTransformState *State) {
if (isa<VPWidenPHIRecipe>(&R))
continue;
- if (isa<VPWidenPointerInductionRecipe>(&R) ||
- isa<VPWidenIntOrFpInductionRecipe>(&R)) {
+ if (isa<VPWidenPointerInductionRecipe>(&R)) {
PHINode *Phi = nullptr;
- if (isa<VPWidenIntOrFpInductionRecipe>(&R)) {
- Phi = cast<PHINode>(State->get(R.getVPSingleValue(), 0));
- } else {
- auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
- // TODO: Split off the case that all users of a pointer phi are scalar
- // from the VPWidenPointerInductionRecipe.
- if (WidenPhi->onlyScalarsGenerated(State->VF.isScalable()))
- continue;
-
- auto *GEP = cast<GetElementPtrInst>(State->get(WidenPhi, 0));
- Phi = cast<PHINode>(GEP->getPointerOperand());
- }
+ auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
+ // TODO: Split off the case that all users of a pointer phi are scalar
+ // from the VPWidenPointerInductionRecipe.
+ if (WidenPhi->onlyScalarsGenerated(State->VF.isScalable()))
+ continue;
+
+ auto *GEP = cast<GetElementPtrInst>(State->get(WidenPhi, 0));
+ Phi = cast<PHINode>(GEP->getPointerOperand());
Phi->setIncomingBlock(1, VectorLatchBB);
@@ -877,6 +901,7 @@ void VPlan::execute(VPTransformState *State) {
// generated.
bool SinglePartNeeded = isa<VPCanonicalIVPHIRecipe>(PhiR) ||
isa<VPFirstOrderRecurrencePHIRecipe>(PhiR) ||
+ isa<VPWidenIntOrFpInductionRecipe>(PhiR) ||
(isa<VPReductionPHIRecipe>(PhiR) &&
cast<VPReductionPHIRecipe>(PhiR)->isOrdered());
unsigned LastPartForNewPhi = SinglePartNeeded ? 1 : State->UF;
@@ -908,6 +933,12 @@ void VPlan::printLiveIns(raw_ostream &O) const {
O << " = VF * UF";
}
+ if (WidenVFxUF.getNumUsers() > 0) {
+ O << "\nLive-in ";
+ WidenVFxUF.printAsOperand(O, SlotTracker);
+ O << " = WIDEN VF * UF";
+ }
+
if (VectorTripCount.getNumUsers() > 0) {
O << "\nLive-in ";
VectorTripCount.printAsOperand(O, SlotTracker);
@@ -1083,6 +1114,11 @@ VPlan *VPlan::duplicate() {
}
Old2NewVPValues[&VectorTripCount] = &NewPlan->VectorTripCount;
Old2NewVPValues[&VFxUF] = &NewPlan->VFxUF;
+ Old2NewVPValues[&WidenVFxUF] = &NewPlan->WidenVFxUF;
+ NewPlan->getVectorTripCount().setElementType(
+ getVectorTripCount().getElementType());
+ NewPlan->getVFxUF().setElementType(getVFxUF().getElementType());
+ NewPlan->getWidenVFxUF().setElementType(getWidenVFxUF().getElementType());
if (BackedgeTakenCount) {
NewPlan->BackedgeTakenCount = new VPValue();
Old2NewVPValues[BackedgeTakenCount] = NewPlan->BackedgeTakenCount;
@@ -1379,6 +1415,8 @@ void VPSlotTracker::assignSlot(const VPValue *V) {
void VPSlotTracker::assignSlots(const VPlan &Plan) {
if (Plan.VFxUF.getNumUsers() > 0)
assignSlot(&Plan.VFxUF);
+ if (Plan.WidenVFxUF.getNumUsers() > 0)
+ assignSlot(&Plan.WidenVFxUF);
assignSlot(&Plan.VectorTripCount);
if (Plan.BackedgeTakenCount)
assignSlot(Plan.BackedgeTakenCount);
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 13e1859ad6b250..306c2200ca34c9 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1618,38 +1618,65 @@ class VPHeaderPHIRecipe : public VPSingleDefRecipe {
}
};
-/// A recipe for handling phi nodes of integer and floating-point inductions,
-/// producing their vector values.
-class VPWidenIntOrFpInductionRecipe : public VPHeaderPHIRecipe {
- PHINode *IV;
- TruncInst *Trunc;
+/// A base class for all widen induction-like recipes
+class VPWidenInductionBasePHIRecipe : public VPHeaderPHIRecipe {
+protected:
const InductionDescriptor &IndDesc;
public:
- VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step,
+ VPWidenInductionBasePHIRecipe(unsigned char VPDefID, Instruction *Instr,
+ VPValue *Start, VPValue *Step,
const InductionDescriptor &IndDesc)
- : VPHeaderPHIRecipe(VPDef::VPWidenIntOrFpInductionSC, IV, Start), IV(IV),
- Trunc(nullptr), IndDesc(IndDesc) {
+ : VPHeaderPHIRecipe(VPDefID, Instr, Start), IndDesc(IndDesc) {
addOperand(Step);
}
+ ~VPWidenInductionBasePHIRecipe() override = default;
+
+ /// Returns the step value of the induction.
+ VPValue *getStepValue() { return getOperand(1); }
+ const VPValue *getStepValue() const { return getOperand(1); }
+
+ /// Returns the induction descriptor for the recipe.
+ const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }
+};
+
+/// A recipe for handling phi nodes of integer and floating-point inductions,
+/// producing their vector values.
+class VPWidenIntOrFpInductionRecipe : public VPWidenInductionBasePHIRecipe {
+ PHINode *IV = nullptr;
+ TruncInst *Trunc = nullptr;
+
+public:
+ VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step,
+ const InductionDescriptor &IndDesc)
+ : VPWidenInductionBasePHIRecipe(VPDef::VPWidenIntOrFpInductionSC, IV,
+ Start, Step, IndDesc),
+ IV(IV), Trunc(nullptr) {}
+
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step,
const InductionDescriptor &IndDesc,
TruncInst *Trunc)
- : VPHeaderPHIRecipe(VPDef::VPWidenIntOrFpInductionSC, Trunc, Start),
- IV(IV), Trunc(Trunc), IndDesc(IndDesc) {
- addOperand(Step);
- }
+ : VPWidenInductionBasePHIRecipe(VPDef::VPWidenIntOrFpInductionSC, Trunc,
+ Start, Step, IndDesc),
+ IV(IV), Trunc(Trunc) {}
~VPWidenIntOrFpInductionRecipe() override = default;
VPRecipeBase *clone() override {
- return new VPWidenIntOrFpInductionRecipe(IV, getStartValue(),
- getStepValue(), IndDesc, Trunc);
+ VPRecipeBase *Cloned = new VPWidenIntOrFpInductionRecipe(
+ getPHINode(), getStartValue(), getStepValue(), IndDesc, Trunc);
+ if (getNumOperands() == 3)
+ Cloned->addOperand(getOperand(2));
+ return Cloned;
}
VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionSC)
+ static inline bool classof(const VPHeaderPHIRecipe *R) {
+ return R->getVPDefID() == VPDef::VPWidenIntOrFpInductionSC;
+ }
+
/// Generate the vectorized and scalarized versions of the phi node as
/// needed by their users.
void execute(VPTransformState &State) override;
@@ -1660,33 +1687,24 @@ class VPWidenIntOrFpInductionRecipe : public VPHeaderPHIRecipe {
VPSlotTracker &SlotTracker) const override;
#endif
- VPValue *getBackedgeValue() override {
- // TODO: All operands of base recipe must exist and be at same index in
- // derived recipe.
- llvm_unreachable(
- "VPWidenIntOrFpInductionRecipe generates its own backedge value");
+ VPValue *getBackedgeValue() override final {
+ if (getNumOperands() != 3)
+ llvm_unreachable(
+ "VPWidenIntOrFpInductionRecipe::getBackedgeValue is not yet valid");
+ return getOperand(2);
}
- VPRecipeBase &getBackedgeRecipe() override {
- // TODO: All operands of base recipe must exist and be at same index in
- // derived recipe.
- llvm_unreachable(
- "VPWidenIntOrFpInductionRecipe generates its own backedge value");
+ VPRecipeBase &getBackedgeRecipe() override final {
+ return *getBackedgeValue()->getDefiningRecipe();
}
- /// Returns the step value of the induction.
- VPValue *getStepValue() { return getOperand(1); }
- const VPValue *getStepValue() const { return getOperand(1); }
-
/// Returns the first defined value as TruncInst, if it is one or nullptr
/// otherwise.
TruncInst *getTruncInst() { return Trunc; }
const TruncInst *getTruncInst() const { retu...
[truncated]
|
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll
Show resolved
Hide resolved
llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll
Show resolved
Hide resolved
| @@ -48,7 +49,7 @@ for.end: | |||
| ; CHECK-NEXT: <x1> vector loop: { | |||
| ; CHECK-NEXT: vector.body: | |||
| ; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION | |||
| ; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next, ir<1> | |||
| ; CHECK-NEXT: WIDEN-INDUCTION ir<%iv> = phi ir<0>, vp<[[NEXT_WIV:%.+]]>, ir<1> | |||
There was a problem hiding this comment.
What is the meaning of ir<1>?
Is it necessary after this patch decompose WidenIntOrFPInduction?
There was a problem hiding this comment.
in dump ir<> represents VPValue that has underlying LLVM IR value.
Prior to that changeset WidenIntOrFPInduction::print method printed LLVM IR PHINode. I've changed it to print VPValues used by the WidenIntOrFPInduction
372daf0 to
f3d58bf
Compare
f3d58bf to
3dc7746
Compare
Loop Vectorizer still has two recipes `VPWidenIntOrFpInductionRecipe`
and `VPWidenPointerInductionRecipe` that behave in a VPlan as
phi-like, as they're derived from `VPHeaderPHIRecipe`, but their generate
functions construct vector phi and vector self-update in the vectorized loop.
This is not only bad from readability of a VPlan, but also requires more code to
maintain such behavior. For instance, there's already ad-hoc code motion
to move generated updates of these recipes closer to the loop latch.
The changeset:
* Adds `WidenVFxUF` to represent `broadcast({1...UF} x `VFxUF`)` value
* Decomposes existing `VPWidenIntOrFpInductionRecipe` into
```
WIDEN-INDUCTION vp<%iv> = phi ir<0>, vp<%be-value>
...
EMIT vp<%widen-step> = mul ir<%step>, vp<WidenVFxUF>
EMIT vp<%be-value> = add vp<%iv>,vp<%widen-step>
```
* Moves trunc optimization of widen IV into VPlan xform
* Adds trivial cyclic dependency removal and mark some binops as
non side-effecting
* Adds element type to `VPValue` to query it for artifical added
`VPValue` without underlying instruction
3dc7746 to
484e061
Compare
|
@fhahn ping |
|
@fhahn ping |
|
@fhahn ping |
…ataWithEVL vectorization mode. As an alternative approach to llvm#82021, this patch lowers VPWidenIntOrFpInductionRecipe into a widen phi recipe and step recipes, computed using EVL in the EVL transformation phase.
Loop Vectorizer still has two recipes
VPWidenIntOrFpInductionRecipeandVPWidenPointerInductionRecipethat behave in a VPlan as phi-like, as they're derived fromVPHeaderPHIRecipe, but their generate functions construct vector phi and vector self-update in the vectorized loop.This is not only bad from readability of a VPlan, but also requires more code to maintain such behavior. For instance, there's already ad-hoc code motion to move generated updates of these recipes closer to the loop latch.
The changeset:
WidenVFxUFto representbroadcast({1...UF} xVFxUF)valueVPWidenIntOrFpInductionRecipeintoVPValueto query it for artifical addedVPValuewithout underlying instruction