Skip to content

Commit 840e9a4

Browse files
authored
[VPlan] Fix wrap-flags on WidenInduction unroll (#187710)
Due to a somewhat recent change, IntOrFpInduction recipes have associated VPIRFlags. The VPlanUnroll logic for WidenInduction recipes predates this change, and computes incomplete wrap-flags: update it to simply use the flags on IntOrFpInduction recipes; PointerInduction recipes have no associated flags, and indeed, no flags should be used.
1 parent 94c0d37 commit 840e9a4

17 files changed

+90
-73
lines changed

flang/test/Integration/unroll-loops.f90

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,13 +27,13 @@ subroutine unroll(a)
2727
! NO-UNROLL-NEXT: %[[NIV:.*]] = add nuw i64 %{{.*}}, 2
2828
! NO-UNROLL-NEXT: %[[NVIND]] = add nuw nsw <2 x i64> %[[VIND]], splat (i64 2)
2929
!
30-
! UNROLL-NEXT: %[[VIND1:.*]] = add <2 x i64> %[[VIND]], splat (i64 2)
30+
! UNROLL-NEXT: %[[VIND1:.*]] = add nuw nsw <2 x i64> %[[VIND]], splat (i64 2)
3131
! UNROLL-NEXT: %[[GEP0:.*]] = getelementptr [8 x i8], ptr %[[ARG0]], i64 %[[IND]]
3232
! UNROLL-NEXT: %[[GEP1:.*]] = getelementptr i8, ptr %[[GEP0]], i64 16
3333
! UNROLL-NEXT: store <2 x i64> %[[VIND]], ptr %[[GEP0]]
3434
! UNROLL-NEXT: store <2 x i64> %[[VIND1]], ptr %[[GEP1]]
3535
! UNROLL-NEXT: %[[NIV:.*]] = add nuw i64 %[[IND]], 4
36-
! UNROLL-NEXT: %[[NVIND:.*]] = add <2 x i64> %[[VIND]], splat (i64 4)
36+
! UNROLL-NEXT: %[[NVIND:.*]] = add nuw nsw <2 x i64> %[[VIND]], splat (i64 4)
3737
!
3838
! CHECK-NEXT: %[[EXIT:.*]] = icmp eq i64 %[[NIV]], 1000
3939
! CHECK-NEXT: br i1 %[[EXIT]], label %{{.*}}, label %[[BLK]]

flang/test/Lower/HLFIR/unroll-loops.fir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,13 +29,13 @@ func.func @unroll(%arg0: !fir.ref<!fir.array<1000 x index>> {fir.bindc_name = "a
2929
// NO-UNROLL-NEXT: %[[NIV:.*]] = add nuw i64 %{{.*}}, 2
3030
// NO-UNROLL-NEXT: %[[NVIND]] = add nuw nsw <2 x i64> %[[VIND]], splat (i64 2)
3131

32-
// UNROLL-NEXT: %[[VIND1:.*]] = add <2 x i64> %[[VIND]], splat (i64 2)
32+
// UNROLL-NEXT: %[[VIND1:.*]] = add nuw nsw <2 x i64> %[[VIND]], splat (i64 2)
3333
// UNROLL-NEXT: %[[GEP0:.*]] = getelementptr [8 x i8], ptr %[[ARG0]], i64 %[[IND]]
3434
// UNROLL-NEXT: %[[GEP1:.*]] = getelementptr i8, ptr %[[GEP0]], i64 16
3535
// UNROLL-NEXT: store <2 x i64> %[[VIND]], ptr %[[GEP0]]
3636
// UNROLL-NEXT: store <2 x i64> %[[VIND1]], ptr %[[GEP1]]
3737
// UNROLL-NEXT: %[[NIV:.*]] = add nuw i64 %[[IND]], 4
38-
// UNROLL-NEXT: %[[NVIND:.*]] = add <2 x i64> %[[VIND]], splat (i64 4)
38+
// UNROLL-NEXT: %[[NVIND:.*]] = add nuw nsw <2 x i64> %[[VIND]], splat (i64 4)
3939

4040
// CHECK-NEXT: %[[EXIT:.*]] = icmp eq i64 %[[NIV]], 1000
4141
// CHECK-NEXT: br i1 %[[EXIT]], label %{{.*}}, label %[[BLK]]

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1030,6 +1030,20 @@ class VPIRFlags {
10301030
}
10311031
}
10321032

1033+
bool hasNoWrapFlags() const {
1034+
switch (OpType) {
1035+
case OperationType::OverflowingBinOp:
1036+
case OperationType::Trunc:
1037+
return true;
1038+
default:
1039+
return false;
1040+
}
1041+
}
1042+
1043+
WrapFlagsTy getNoWrapFlags() const {
1044+
return {hasNoUnsignedWrap(), hasNoSignedWrap()};
1045+
}
1046+
10331047
bool isDisjoint() const {
10341048
assert(OpType == OperationType::DisjointOp &&
10351049
"recipe cannot have a disjoing flag");

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3364,11 +3364,9 @@ void VPlanTransforms::addExplicitVectorLength(
33643364
OpVPEVL = Builder.createScalarZExtOrTrunc(
33653365
OpVPEVL, CanIVTy, I32Ty, CanonicalIVIncrement->getDebugLoc());
33663366

3367-
auto *NextIter = Builder.createAdd(OpVPEVL, CurrentIteration,
3368-
CanonicalIVIncrement->getDebugLoc(),
3369-
"current.iteration.next",
3370-
{CanonicalIVIncrement->hasNoUnsignedWrap(),
3371-
CanonicalIVIncrement->hasNoSignedWrap()});
3367+
auto *NextIter = Builder.createAdd(
3368+
OpVPEVL, CurrentIteration, CanonicalIVIncrement->getDebugLoc(),
3369+
"current.iteration.next", CanonicalIVIncrement->getNoWrapFlags());
33723370
CurrentIteration->addOperand(NextIter);
33733371

33743372
VPValue *NextAVL =

llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -179,17 +179,22 @@ void UnrollState::unrollWidenInductionByUF(
179179
IV->getParent()->getEnclosingLoopRegion()->getSinglePredecessor());
180180
Type *IVTy = TypeInfo.inferScalarType(IV);
181181
auto &ID = IV->getInductionDescriptor();
182-
VPIRFlags Flags;
183-
if (isa_and_present<FPMathOperator>(ID.getInductionBinOp()))
184-
Flags = ID.getInductionBinOp()->getFastMathFlags();
182+
FastMathFlags FMF;
183+
VPIRFlags::WrapFlagsTy WrapFlags(false, false);
184+
if (auto *IntOrFPInd = dyn_cast<VPWidenIntOrFpInductionRecipe>(IV)) {
185+
if (IntOrFPInd->hasFastMathFlags())
186+
FMF = IntOrFPInd->getFastMathFlags();
187+
if (IntOrFPInd->hasNoWrapFlags())
188+
WrapFlags = IntOrFPInd->getNoWrapFlags();
189+
}
185190

186191
VPValue *ScalarStep = IV->getStepValue();
187192
VPBuilder Builder(PH);
188193
Type *VectorStepTy =
189194
IVTy->isPointerTy() ? TypeInfo.inferScalarType(ScalarStep) : IVTy;
190195
VPInstruction *VectorStep = Builder.createNaryOp(
191-
VPInstruction::WideIVStep, {&Plan.getVF(), ScalarStep}, VectorStepTy,
192-
Flags, IV->getDebugLoc());
196+
VPInstruction::WideIVStep, {&Plan.getVF(), ScalarStep}, VectorStepTy, FMF,
197+
IV->getDebugLoc());
193198

194199
ToSkip.insert(VectorStep);
195200

@@ -214,10 +219,10 @@ void UnrollState::unrollWidenInductionByUF(
214219
AddFlags = GEPNoWrapFlags::none();
215220
} else if (IVTy->isFloatingPointTy()) {
216221
AddOpc = ID.getInductionOpcode();
217-
AddFlags = Flags; // FMF flags
222+
AddFlags = FMF;
218223
} else {
219224
AddOpc = Instruction::Add;
220-
AddFlags = VPIRFlags::getDefaultFlags(AddOpc);
225+
AddFlags = WrapFlags;
221226
if (cast<VPWidenIntOrFpInductionRecipe>(IV)->isCanonical())
222227
AddFlags = VPIRFlags::WrapFlagsTy(/*NUW=*/true, /*NSW=*/false);
223228
}

llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ define i8 @select_icmp_var_start(ptr %a, i8 %n, i8 %start) {
2424
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <16 x i8> [ <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
2525
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <16 x i8> [ splat (i8 -128), %[[VECTOR_PH]] ], [ [[TMP10:%.*]], %[[VECTOR_BODY]] ]
2626
; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <16 x i8> [ splat (i8 -128), %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ]
27-
; CHECK-NEXT: [[STEP_ADD:%.*]] = add <16 x i8> [[VEC_IND]], splat (i8 16)
27+
; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw nsw <16 x i8> [[VEC_IND]], splat (i8 16)
2828
; CHECK-NEXT: [[INDEX4:%.*]] = trunc i32 [[INDEX]] to i8
2929
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[A]], i8 [[INDEX4]]
3030
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i64 16

llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -197,7 +197,7 @@ define void @test_widen_induction_variable_start(ptr %A, i64 %N, i64 %start) {
197197
; CHECK: vector.body:
198198
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
199199
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
200-
; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
200+
; CHECK-NEXT: [[STEP_ADD:%.*]] = add nuw nsw <2 x i64> [[VEC_IND]], splat (i64 2)
201201
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[START]], [[INDEX]]
202202
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[OFFSET_IDX]]
203203
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i64 2

llvm/test/Transforms/LoopVectorize/AArch64/pr60831-sve-inv-store-crash.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,7 @@ define void @test_invar_gep_var_start(i64 %start, ptr %dst) #0 {
179179
; IC2: vector.body:
180180
; IC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
181181
; IC2-NEXT: [[DOTSPLAT:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
182-
; IC2-NEXT: [[TMP10:%.*]] = add <vscale x 4 x i64> [[DOTSPLAT]], [[TMP9]]
182+
; IC2-NEXT: [[TMP10:%.*]] = add nsw <vscale x 4 x i64> [[DOTSPLAT]], [[TMP9]]
183183
; IC2-NEXT: [[TMP12:%.*]] = call i32 @llvm.vscale.i32()
184184
; IC2-NEXT: [[TMP13:%.*]] = mul nuw i32 [[TMP12]], 4
185185
; IC2-NEXT: [[TMP14:%.*]] = sub i32 [[TMP13]], 1
@@ -303,7 +303,7 @@ define void @test_invar_gep_var_start_step_2(i64 %start, ptr %dst) #0 {
303303
; IC2: vector.body:
304304
; IC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
305305
; IC2-NEXT: [[DOTSPLAT:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
306-
; IC2-NEXT: [[TMP14:%.*]] = add <vscale x 4 x i64> [[DOTSPLAT]], [[TMP13]]
306+
; IC2-NEXT: [[TMP14:%.*]] = add nsw <vscale x 4 x i64> [[DOTSPLAT]], [[TMP13]]
307307
; IC2-NEXT: [[TMP17:%.*]] = call i32 @llvm.vscale.i32()
308308
; IC2-NEXT: [[TMP18:%.*]] = mul nuw i32 [[TMP17]], 4
309309
; IC2-NEXT: [[TMP19:%.*]] = sub i32 [[TMP18]], 1

llvm/test/Transforms/LoopVectorize/RISCV/riscv-interleaved.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55

66
; CHECK-LABEL: foo
77
; CHECK: LV: IC is 2
8-
; CHECK: %{{.*}} = add <8 x i32> %{{.*}}, splat (i32 8)
98
; CHECK: %{{.*}} = add {{.*}}, 16
9+
; CHECK: %{{.*}} = add <8 x i32> %{{.*}}, splat (i32 8)
1010

1111
; Function Attrs: nofree norecurse nosync nounwind writeonly
1212
define void @foo(i32 signext %n, ptr nocapture %A) #0 {

llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,7 @@ define void @single_constant_stride_int_iv(ptr %p) {
156156
; CHECK-UF2: vector.body:
157157
; CHECK-UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
158158
; CHECK-UF2-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[TMP8]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
159-
; CHECK-UF2-NEXT: [[STEP_ADD:%.*]] = add <vscale x 4 x i64> [[VEC_IND]], [[TMP6]]
159+
; CHECK-UF2-NEXT: [[STEP_ADD:%.*]] = add nuw nsw <vscale x 4 x i64> [[VEC_IND]], [[TMP6]]
160160
; CHECK-UF2-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[P:%.*]], <vscale x 4 x i64> [[VEC_IND]]
161161
; CHECK-UF2-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[P]], <vscale x 4 x i64> [[STEP_ADD]]
162162
; CHECK-UF2-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> align 4 [[TMP9]], <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> poison)

0 commit comments

Comments
 (0)