[VPlan] Handle scalar VPWidenPointerInd in convertToConcreteRecipes.#169338
[VPlan] Handle scalar VPWidenPointerInd in convertToConcreteRecipes.#169338
Conversation
…ipes. In some case, VPWidenPointerInductions become only used by scalars after legalizeAndOptimizationInducftions was already run, for example due to some VPlan optimizations. Move the code to scalarize VPWidenPointerInductions to a helper and use it if needed. This fixes a crash after llvm#148274 in the added test case.
|
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-vectorizers Author: Florian Hahn (fhahn) ChangesIn some case, VPWidenPointerInductions become only used by scalars after legalizeAndOptimizationInducftions was already run, for example due to some VPlan optimizations. Move the code to scalarize VPWidenPointerInductions to a helper and use it if needed. This fixes a crash after #148274 in the added test case. 2 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index e7a8773be067b..5079fa488ec46 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -668,6 +668,23 @@ static SmallVector<VPUser *> collectUsersRecursively(VPValue *V) {
return Users.takeVector();
}
+/// Scalarize a VPWidenPointerInductionRecipe by replacing it with a PtrAdd
+/// (IndStart, ScalarIVSteps (0, Step)). This is used when the recipe only
+/// generates scalar values.
+static VPValue *
+scalarizeVPWidenPointerInduction(VPWidenPointerInductionRecipe *PtrIV,
+ VPlan &Plan, VPBuilder &Builder) {
+ const InductionDescriptor &ID = PtrIV->getInductionDescriptor();
+ VPValue *StartV = Plan.getConstantInt(ID.getStep()->getType(), 0);
+ VPValue *StepV = PtrIV->getOperand(1);
+ VPScalarIVStepsRecipe *Steps = createScalarIVSteps(
+ Plan, InductionDescriptor::IK_IntInduction, Instruction::Add, nullptr,
+ nullptr, StartV, StepV, PtrIV->getDebugLoc(), Builder);
+
+ return Builder.createPtrAdd(PtrIV->getStartValue(), Steps,
+ PtrIV->getDebugLoc(), "next.gep");
+}
+
/// Legalize VPWidenPointerInductionRecipe, by replacing it with a PtrAdd
/// (IndStart, ScalarIVSteps (0, Step)) if only its scalar values are used, as
/// VPWidenPointerInductionRecipe will generate vectors only. If some users
@@ -720,16 +737,7 @@ static void legalizeAndOptimizeInductions(VPlan &Plan) {
if (!PtrIV->onlyScalarsGenerated(Plan.hasScalableVF()))
continue;
- const InductionDescriptor &ID = PtrIV->getInductionDescriptor();
- VPValue *StartV = Plan.getConstantInt(ID.getStep()->getType(), 0);
- VPValue *StepV = PtrIV->getOperand(1);
- VPScalarIVStepsRecipe *Steps = createScalarIVSteps(
- Plan, InductionDescriptor::IK_IntInduction, Instruction::Add, nullptr,
- nullptr, StartV, StepV, PtrIV->getDebugLoc(), Builder);
-
- VPValue *PtrAdd = Builder.createPtrAdd(PtrIV->getStartValue(), Steps,
- PtrIV->getDebugLoc(), "next.gep");
-
+ VPValue *PtrAdd = scalarizeVPWidenPointerInduction(PtrIV, Plan, Builder);
PtrIV->replaceAllUsesWith(PtrAdd);
continue;
}
@@ -3502,6 +3510,16 @@ void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan) {
}
if (auto *WidenIVR = dyn_cast<VPWidenPointerInductionRecipe>(&R)) {
+ // If the recipe only generates scalars, scalarize it instead of
+ // expanding it.
+ if (WidenIVR->onlyScalarsGenerated(Plan.hasScalableVF())) {
+ VPBuilder Builder(WidenIVR);
+ VPValue *PtrAdd =
+ scalarizeVPWidenPointerInduction(WidenIVR, Plan, Builder);
+ WidenIVR->replaceAllUsesWith(PtrAdd);
+ ToRemove.push_back(WidenIVR);
+ continue;
+ }
expandVPWidenPointerInduction(WidenIVR, TypeInfo);
ToRemove.push_back(WidenIVR);
continue;
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/pointer-induction.ll
index fa710cb8d65b1..47bb6b469c8ad 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/pointer-induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/pointer-induction.ll
@@ -61,4 +61,106 @@ exit:
ret void
}
+define i1 @scalarize_ptr_induction(ptr %start, ptr %end, ptr noalias %dst, i1 %c) #1 {
+; CHECK-LABEL: define i1 @scalarize_ptr_induction(
+; CHECK-SAME: ptr [[START:%.*]], ptr [[END:%.*]], ptr noalias [[DST:%.*]], i1 [[C:%.*]]) #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[START5:%.*]] = ptrtoint ptr [[START]] to i64
+; CHECK-NEXT: [[END4:%.*]] = ptrtoint ptr [[END]] to i64
+; CHECK-NEXT: [[START2:%.*]] = ptrtoint ptr [[START]] to i64
+; CHECK-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[END4]], -12
+; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[START5]]
+; CHECK-NEXT: [[TMP2:%.*]] = udiv i64 [[TMP1]], 12
+; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
+; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP4]], 1
+; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP5]], i64 15)
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[TMP3]], [[UMAX]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
+; CHECK: [[VECTOR_MEMCHECK]]:
+; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 8
+; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[END1]], -12
+; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[TMP6]], [[START2]]
+; CHECK-NEXT: [[TMP8:%.*]] = udiv i64 [[TMP7]], 12
+; CHECK-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 12
+; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[TMP9]], 8
+; CHECK-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP10]]
+; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP3]]
+; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[START]], [[SCEVGEP]]
+; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
+; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 2
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], [[TMP12]]
+; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
+; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i64 [[TMP12]], i64 [[N_MOD_VF]]
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[TMP14]]
+; CHECK-NEXT: [[TMP15:%.*]] = mul i64 [[N_VEC]], 12
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP15]]
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 12
+; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 6 x i32>, ptr [[NEXT_GEP]], align 4, !alias.scope [[META3:![0-9]+]]
+; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32> } @llvm.vector.deinterleave3.nxv6i32(<vscale x 6 x i32> [[WIDE_VEC]])
+; CHECK-NEXT: [[TMP17:%.*]] = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32> } [[STRIDED_VEC]], 0
+; CHECK-NEXT: [[TMP18:%.*]] = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32> } [[STRIDED_VEC]], 1
+; CHECK-NEXT: [[TMP19:%.*]] = zext <vscale x 2 x i32> [[TMP18]] to <vscale x 2 x i64>
+; CHECK-NEXT: [[TMP20:%.*]] = mul <vscale x 2 x i64> [[TMP19]], splat (i64 -7070675565921424023)
+; CHECK-NEXT: [[TMP21:%.*]] = add <vscale x 2 x i64> [[TMP20]], splat (i64 -4)
+; CHECK-NEXT: [[TMP22:%.*]] = call i32 @llvm.vscale.i32()
+; CHECK-NEXT: [[TMP23:%.*]] = mul nuw i32 [[TMP22]], 2
+; CHECK-NEXT: [[TMP24:%.*]] = sub i32 [[TMP23]], 1
+; CHECK-NEXT: [[TMP25:%.*]] = extractelement <vscale x 2 x i64> [[TMP21]], i32 [[TMP24]]
+; CHECK-NEXT: store i64 [[TMP25]], ptr [[DST]], align 1, !alias.scope [[META6:![0-9]+]], !noalias [[META3]]
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP12]]
+; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP26]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: br label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[TMP16]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ], [ [[START]], %[[VECTOR_MEMCHECK]] ]
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[PTR_IV]], i64 4
+; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP]], align 4
+; CHECK-NEXT: [[EXT:%.*]] = zext i32 [[L]] to i64
+; CHECK-NEXT: [[UNUSED:%.*]] = load i32, ptr [[PTR_IV]], align 4
+; CHECK-NEXT: [[MUL1:%.*]] = mul i64 [[EXT]], -7070675565921424023
+; CHECK-NEXT: [[MUL2:%.*]] = add i64 [[MUL1]], -4
+; CHECK-NEXT: store i64 [[MUL2]], ptr [[DST]], align 1
+; CHECK-NEXT: [[PTR_IV_NEXT]] = getelementptr nusw i8, ptr [[PTR_IV]], i64 12
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
+; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[CMP]], i1 true, i1 false
+; CHECK-NEXT: br i1 [[OR_COND]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP9:![0-9]+]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[CMP_LCSSA:%.*]] = phi i1 [ [[CMP]], %[[LOOP]] ]
+; CHECK-NEXT: ret i1 [[CMP_LCSSA]]
+;
+entry:
+ br label %loop
+
+loop:
+ %ptr.iv = phi ptr [ %start, %entry ], [ %ptr.iv.next, %loop ]
+ %gep = getelementptr i8, ptr %ptr.iv, i64 4
+ %l = load i32, ptr %gep, align 4
+ %ext = zext i32 %l to i64
+ %unused = load i32, ptr %ptr.iv, align 4
+ %mul1 = mul i64 %ext, -7070675565921424023
+ %mul2 = add i64 %mul1, -4
+ store i64 %mul2, ptr %dst, align 1
+ %ptr.iv.next = getelementptr nusw i8, ptr %ptr.iv, i64 12
+ %cmp = icmp eq ptr %ptr.iv.next, %end
+ %or.cond = select i1 %cmp, i1 true, i1 false
+ br i1 %or.cond, label %exit, label %loop
+
+exit:
+ ret i1 %cmp
+}
+
attributes #0 = { "target-features"="+v" }
+attributes #1 = { "target-cpu"="sifive-p670" }
|
🐧 Linux x64 Test Results
Failed Tests(click on a test name to see its output) LLVMLLVM.Transforms/LoopVectorize/RISCV/pointer-induction.llIf these failures are unrelated to your changes (for example tests are broken or flaky at HEAD), please open an issue at https://github.com/llvm/llvm-project/issues and add the |
lukel97
left a comment
There was a problem hiding this comment.
Does VPWidenIntOrFpInductionRecipe have the same problem where it could be replaced by scalar steps? Does running legalizeAndOptimizeInductions a second time before convertToConcreteRecipes help
I don't think so, I think VPWidenPointerInductionRecipe is special in some sense here as it previously supported both wide/scalar codegen. |
…lvm#169338) In some case, VPWidenPointerInductions become only used by scalars after legalizeAndOptimizationInducftions was already run, for example due to some VPlan optimizations. Move the code to scalarize VPWidenPointerInductions to a helper and use it if needed. This fixes a crash after llvm#148274 in the added test case. Fixes llvm#169780
…lvm#169338) In some case, VPWidenPointerInductions become only used by scalars after legalizeAndOptimizationInducftions was already run, for example due to some VPlan optimizations. Move the code to scalarize VPWidenPointerInductions to a helper and use it if needed. This fixes a crash after llvm#148274 in the added test case. Fixes llvm#169780
…lvm#169338) In some case, VPWidenPointerInductions become only used by scalars after legalizeAndOptimizationInducftions was already run, for example due to some VPlan optimizations. Move the code to scalarize VPWidenPointerInductions to a helper and use it if needed. This fixes a crash after llvm#148274 in the added test case. Fixes llvm#169780
In some case, VPWidenPointerInductions become only used by scalars after legalizeAndOptimizationInducftions was already run, for example due to some VPlan optimizations.
Move the code to scalarize VPWidenPointerInductions to a helper and use it if needed.
This fixes a crash after #148274 in the added test case.
Fixes #169780