Skip to content

Commit b0b433c

Browse files
authored
[VPlan] Intersect IR flags across interleave members when narrowing. (#201682)
Update narrowInterleaveGroupOp to properly intersect flags for all wide members, to make sure we only use the flags common across all combined members.
1 parent f76f26d commit b0b433c

2 files changed

Lines changed: 17 additions & 10 deletions

File tree

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5625,10 +5625,13 @@ static bool isAlreadyNarrow(VPValue *VPV) {
56255625
return RepR && RepR->isSingleScalar();
56265626
}
56275627

5628-
// Convert a wide recipe defining a VPValue \p V feeding an interleave group to
5629-
// a narrow variant.
5628+
// Convert the wide recipes defining the VPValues in \p Members feeding an
5629+
// interleave group to a single narrow variant. The first member is reused as
5630+
// the narrowed recipe.
56305631
static VPValue *
5631-
narrowInterleaveGroupOp(VPValue *V, SmallPtrSetImpl<VPValue *> &NarrowedOps) {
5632+
narrowInterleaveGroupOp(ArrayRef<VPValue *> Members,
5633+
SmallPtrSetImpl<VPValue *> &NarrowedOps) {
5634+
VPValue *V = Members.front();
56325635
auto *R = V->getDefiningRecipe();
56335636
if (!R || NarrowedOps.contains(V))
56345637
return V;
@@ -5637,11 +5640,15 @@ narrowInterleaveGroupOp(VPValue *V, SmallPtrSetImpl<VPValue *> &NarrowedOps) {
56375640
return V;
56385641

56395642
if (isa<VPWidenRecipe, VPWidenCastRecipe>(R)) {
5640-
auto *WideMember0 = cast<VPSingleDefRecipe>(R);
5641-
for (unsigned Idx = 0, E = WideMember0->getNumOperands(); Idx != E; ++Idx)
5642-
WideMember0->setOperand(
5643-
Idx,
5644-
narrowInterleaveGroupOp(WideMember0->getOperand(Idx), NarrowedOps));
5643+
auto *WideMember0 = cast<VPRecipeWithIRFlags>(R);
5644+
for (VPValue *Member : Members.drop_front())
5645+
WideMember0->intersectFlags(*cast<VPRecipeWithIRFlags>(Member));
5646+
for (unsigned Idx = 0, E = WideMember0->getNumOperands(); Idx != E; ++Idx) {
5647+
SmallVector<VPValue *> OpsI;
5648+
for (VPValue *Member : Members)
5649+
OpsI.push_back(Member->getDefiningRecipe()->getOperand(Idx));
5650+
WideMember0->setOperand(Idx, narrowInterleaveGroupOp(OpsI, NarrowedOps));
5651+
}
56455652
return V;
56465653
}
56475654

@@ -5808,7 +5815,7 @@ VPlanTransforms::narrowInterleaveGroups(VPlan &Plan,
58085815
// Narrow operation tree rooted at store groups.
58095816
for (auto *StoreGroup : StoreGroups) {
58105817
VPValue *Res =
5811-
narrowInterleaveGroupOp(StoreGroup->getStoredValues()[0], NarrowedOps);
5818+
narrowInterleaveGroupOp(StoreGroup->getStoredValues(), NarrowedOps);
58125819
auto *SI =
58135820
cast<StoreInst>(StoreGroup->getInterleaveGroup()->getInsertPos());
58145821
auto *S = new VPWidenStoreRecipe(*SI, StoreGroup->getAddr(), Res, nullptr,

llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-with-wide-ops.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1283,7 +1283,7 @@ define void @flag_mismatch_disjoint(ptr noalias %dst, ptr noalias %src) {
12831283
; VF2-NEXT: [[TMP0:%.*]] = shl nsw i64 [[INDEX]], 1
12841284
; VF2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[TMP0]]
12851285
; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP1]], align 8
1286-
; VF2-NEXT: [[TMP2:%.*]] = or disjoint <2 x i64> [[WIDE_LOAD]], splat (i64 1)
1286+
; VF2-NEXT: [[TMP2:%.*]] = or <2 x i64> [[WIDE_LOAD]], splat (i64 1)
12871287
; VF2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[TMP0]]
12881288
; VF2-NEXT: store <2 x i64> [[TMP2]], ptr [[TMP3]], align 8
12891289
; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 1

0 commit comments

Comments
 (0)