Skip to content

Commit f072e3c

Browse files
committed
Update comments, restore vplan-sink-scalars-and-merge.ll so we're still merging replicate regions
1 parent 88748ae commit f072e3c

File tree

3 files changed

+50
-86
lines changed

3 files changed

+50
-86
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1243,7 +1243,10 @@ class LoopVectorizationCostModel {
12431243
getPredBlockCostDivisor(TargetTransformInfo::TargetCostKind CostKind,
12441244
BasicBlock *BB) const {
12451245
// If a block wasn't originally predicated but was predicated due to
1246-
// e.g. tail folding, don't divide the cost.
1246+
// e.g. tail folding, don't divide the cost. Tail folded loops may still be
1247+
// predicated in the final vector loop iteration, but for most loops that
1248+
// don't have low trip counts we can expect their probability to be close to
1249+
// zero.
12471250
if (!Legal->blockNeedsPredication(BB))
12481251
return 1;
12491252
return CostKind == TTI::TCK_CodeSize ? 1 : 2;

llvm/lib/Transforms/Vectorize/VPlanHelpers.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -352,6 +352,7 @@ struct VPCostContext {
352352
bool skipCostComputation(Instruction *UI, bool IsVector) const;
353353

354354
/// \returns how much the cost of a predicated block should be divided by.
355+
/// Forwards to LoopVectorizationCostModel::getPredBlockCostDivisor.
355356
unsigned getPredBlockCostDivisor(TargetTransformInfo::TargetCostKind CostKind,
356357
BasicBlock *BB) const;
357358

llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll

Lines changed: 45 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
; REQUIRES: asserts
22

3-
; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -debug -disable-output %s 2>&1 | FileCheck %s
3+
; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -force-widen-divrem-via-safe-divisor=0 -debug -disable-output %s 2>&1 | FileCheck %s
44

55
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
66

@@ -28,45 +28,26 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
2828
; CHECK-NEXT: vector.body:
2929
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
3030
; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION ir<0>, ir<1>, vp<[[VF]]>
31-
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]>
3231
; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv>, vp<[[BTC]]>
33-
; CHECK-NEXT: Successor(s): pred.load
34-
; CHECK-EMPTY:
35-
; CHECK-NEXT: <xVFxUF> pred.load: {
36-
; CHECK-NEXT: pred.load.entry:
37-
; CHECK-NEXT: BRANCH-ON-MASK vp<[[MASK]]>
38-
; CHECK-NEXT: Successor(s): pred.load.if, pred.load.continue
39-
; CHECK-EMPTY:
40-
; CHECK-NEXT: pred.load.if:
41-
; CHECK-NEXT: REPLICATE ir<%gep.b> = getelementptr inbounds ir<@b>, ir<0>, vp<[[STEPS]]>
42-
; CHECK-NEXT: REPLICATE ir<%lv.b> = load ir<%gep.b> (S->V)
43-
; CHECK-NEXT: Successor(s): pred.load.continue
44-
; CHECK-EMPTY:
45-
; CHECK-NEXT: pred.load.continue:
46-
; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%8> = ir<%lv.b>
47-
; CHECK-NEXT: No successors
48-
; CHECK-NEXT: }
49-
; CHECK-NEXT: Successor(s): loop.0
50-
; CHECK-EMPTY:
51-
; CHECK-NEXT: loop.0:
52-
; CHECK-NEXT: WIDEN ir<%add> = add vp<%8>, ir<10>
53-
; CHECK-NEXT: WIDEN ir<%mul> = mul ir<2>, ir<%add>
54-
; CHECK-NEXT: Successor(s): pred.store
55-
; CHECK-EMPTY:
56-
; CHECK-NEXT: <xVFxUF> pred.store: {
57-
; CHECK-NEXT: pred.store.entry:
58-
; CHECK-NEXT: BRANCH-ON-MASK vp<[[MASK]]>
59-
; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue
60-
; CHECK-EMPTY:
61-
; CHECK-NEXT: pred.store.if:
62-
; CHECK-NEXT: REPLICATE ir<%gep.a> = getelementptr inbounds ir<@a>, ir<0>, vp<[[STEPS]]>
63-
; CHECK-NEXT: REPLICATE store ir<%mul>, ir<%gep.a>
64-
; CHECK-NEXT: Successor(s): pred.store.continue
65-
; CHECK-EMPTY:
66-
; CHECK-NEXT: pred.store.continue:
67-
; CHECK-NEXT: No successors
68-
; CHECK-NEXT: }
69-
; CHECK-NEXT: Successor(s): loop.1
32+
; CHECK-NEXT: Successor(s): pred.store
33+
34+
; CHECK: <xVFxUF> pred.store: {
35+
; CHECK-NEXT: pred.store.entry:
36+
; CHECK-NEXT: BRANCH-ON-MASK vp<[[MASK]]>
37+
; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue
38+
39+
; CHECK: pred.store.if:
40+
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
41+
; CHECK-NEXT: REPLICATE ir<%gep.b> = getelementptr inbounds ir<@b>, ir<0>, vp<[[STEPS]]>
42+
; CHECK-NEXT: REPLICATE ir<%lv.b> = load ir<%gep.b>
43+
; CHECK-NEXT: REPLICATE ir<%gep.a> = getelementptr inbounds ir<@a>, ir<0>, vp<[[STEPS]]
44+
; CHECK-NEXT: REPLICATE store ir<%lv.b>, ir<%gep.a>
45+
; CHECK-NEXT: Successor(s): pred.store.continue
46+
47+
; CHECK: pred.store.continue:
48+
; CHECK-NEXT: No successors
49+
; CHECK-NEXT: }
50+
7051
; CHECK: loop.1:
7152
; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
7253
; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]>
@@ -81,10 +62,8 @@ loop:
8162
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
8263
%gep.b = getelementptr inbounds [2048 x i32], ptr @b, i32 0, i32 %iv
8364
%lv.b = load i32, ptr %gep.b, align 4
84-
%add = add i32 %lv.b, 10
85-
%mul = mul i32 2, %add
8665
%gep.a = getelementptr inbounds [2048 x i32], ptr @a, i32 0, i32 %iv
87-
store i32 %mul, ptr %gep.a, align 4
66+
store i32 %lv.b, ptr %gep.a, align 4
8867
%iv.next = add i32 %iv, 1
8968
%large = icmp sge i32 %iv, 8
9069
%exitcond = icmp eq i32 %iv, %k
@@ -777,46 +756,28 @@ define void @update_2_uses_in_same_recipe_in_merged_block(i32 %k) {
777756
; CHECK-NEXT: vector.body:
778757
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
779758
; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION ir<0>, ir<1>, vp<[[VF]]>
780-
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]>
781759
; CHECK-NEXT: EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv>, vp<[[BTC]]>
782-
; CHECK-NEXT: REPLICATE ir<%gep.a> = getelementptr inbounds ir<@a>, ir<0>, vp<[[STEPS]]>
783-
; CHECK-NEXT: Successor(s): pred.load
784-
; CHECK-EMPTY:
785-
; CHECK-NEXT: <xVFxUF> pred.load: {
786-
; CHECK-NEXT: pred.load.entry:
787-
; CHECK-NEXT: BRANCH-ON-MASK vp<[[MASK]]>
788-
; CHECK-NEXT: Successor(s): pred.load.if, pred.load.continue
789-
; CHECK-EMPTY:
790-
; CHECK-NEXT: pred.load.if:
791-
; CHECK-NEXT: REPLICATE ir<%lv.a> = load ir<%gep.a> (S->V)
792-
; CHECK-NEXT: Successor(s): pred.load.continue
793-
; CHECK-EMPTY:
794-
; CHECK-NEXT: pred.load.continue:
795-
; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<[[PRED:%.+]]> = ir<%lv.a>
796-
; CHECK-NEXT: No successors
797-
; CHECK-NEXT: }
798-
; CHECK-NEXT: Successor(s): loop.0
799-
; CHECK-EMPTY:
800-
; CHECK-NEXT: loop.0:
801-
; CHECK-NEXT: EMIT vp<[[SELECT:%.+]]> = select vp<[[MASK]]>, vp<[[PRED]]>, ir<1>
802-
; CHECK-NEXT: WIDEN ir<%div> = sdiv vp<[[PRED]]>, vp<[[SELECT]]>
803-
; CHECK-NEXT: Successor(s): pred.store
804-
; CHECK-EMPTY:
805-
; CHECK-NEXT: <xVFxUF> pred.store: {
806-
; CHECK-NEXT: pred.store.entry:
807-
; CHECK-NEXT: BRANCH-ON-MASK vp<[[MASK]]>
808-
; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue
809-
; CHECK-EMPTY:
810-
; CHECK-NEXT: pred.store.if:
811-
; CHECK-NEXT: REPLICATE store ir<%div>, ir<%gep.a>
812-
; CHECK-NEXT: Successor(s): pred.store.continue
813-
; CHECK-EMPTY:
814-
; CHECK-NEXT: pred.store.continue:
815-
; CHECK-NEXT: No successors
816-
; CHECK-NEXT: }
817-
; CHECK-NEXT: Successor(s): loop.1
818-
; CHECK-EMPTY:
819-
; CHECK-NEXT: loop.1:
760+
; CHECK-NEXT: Successor(s): pred.store
761+
; CHECK-EMPTY:
762+
; CHECK-NEXT: <xVFxUF> pred.store: {
763+
; CHECK-NEXT: pred.store.entry:
764+
; CHECK-NEXT: BRANCH-ON-MASK vp<[[MASK]]>
765+
; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue
766+
; CHECK-EMPTY:
767+
; CHECK-NEXT: pred.store.if:
768+
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
769+
; CHECK-NEXT: REPLICATE ir<%gep.a> = getelementptr inbounds ir<@a>, ir<0>, vp<[[STEPS]]>
770+
; CHECK-NEXT: REPLICATE ir<%lv.a> = load ir<%gep.a>
771+
; CHECK-NEXT: REPLICATE ir<%div> = sdiv ir<%lv.a>, ir<%lv.a>
772+
; CHECK-NEXT: REPLICATE store ir<%div>, ir<%gep.a>
773+
; CHECK-NEXT: Successor(s): pred.store.continue
774+
; CHECK-EMPTY:
775+
; CHECK-NEXT: pred.store.continue:
776+
; CHECK-NEXT: No successors
777+
; CHECK-NEXT: }
778+
; CHECK-NEXT: Successor(s): loop.2
779+
; CHECK-EMPTY:
780+
; CHECK-NEXT: loop.2:
820781
; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
821782
; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]>
822783
; CHECK-NEXT: No successors
@@ -884,8 +845,6 @@ define void @recipe_in_merge_candidate_used_by_first_order_recurrence(i32 %k) {
884845
; CHECK-EMPTY:
885846
; CHECK-NEXT: loop.0:
886847
; CHECK-NEXT: EMIT vp<[[SPLICE:%.+]]> = first-order splice ir<%for>, vp<[[PRED]]>
887-
; CHECK-NEXT: EMIT vp<[[SELECT:%.+]]> = select vp<[[MASK]]>, vp<[[PRED]]>, ir<1>
888-
; CHECK-NEXT: WIDEN ir<%div> = sdiv vp<[[SPLICE]]>, vp<[[SELECT]]>
889848
; CHECK-NEXT: Successor(s): pred.store
890849
; CHECK-EMPTY:
891850
; CHECK-NEXT: <xVFxUF> pred.store: {
@@ -894,15 +853,16 @@ define void @recipe_in_merge_candidate_used_by_first_order_recurrence(i32 %k) {
894853
; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue
895854
; CHECK-EMPTY:
896855
; CHECK-NEXT: pred.store.if:
856+
; CHECK-NEXT: REPLICATE ir<%div> = sdiv vp<[[SPLICE]]>, vp<[[PRED]]>
897857
; CHECK-NEXT: REPLICATE store ir<%div>, ir<%gep.a>
898858
; CHECK-NEXT: Successor(s): pred.store.continue
899859
; CHECK-EMPTY:
900860
; CHECK-NEXT: pred.store.continue:
901861
; CHECK-NEXT: No successors
902862
; CHECK-NEXT: }
903-
; CHECK-NEXT: Successor(s): loop.1
863+
; CHECK-NEXT: Successor(s): loop.2
904864
; CHECK-EMPTY:
905-
; CHECK-NEXT: loop.1:
865+
; CHECK-NEXT: loop.2:
906866
; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
907867
; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]>
908868
; CHECK-NEXT: No successors

0 commit comments

Comments
 (0)