@@ -58,30 +58,30 @@ define void @test_pr25490(i32 %n, ptr noalias nocapture %a, ptr noalias nocaptur
5858; CHECK-NEXT: [[N_VEC5:%.*]] = and i64 [[TMP0]], 4294967288
5959; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
6060; CHECK: vec.epilog.vector.body:
61- ; CHECK-NEXT: [[INDEX7 :%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT11 :%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
62- ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 [[INDEX7 ]]
63- ; CHECK-NEXT: [[WIDE_LOAD8 :%.*]] = load <8 x i8>, ptr [[TMP14]], align 1
64- ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX7 ]]
65- ; CHECK-NEXT: [[WIDE_LOAD9 :%.*]] = load <8 x i8>, ptr [[TMP15]], align 1
66- ; CHECK-NEXT: [[TMP16:%.*]] = zext <8 x i8> [[WIDE_LOAD9 ]] to <8 x i16>
67- ; CHECK-NEXT: [[TMP17:%.*]] = zext <8 x i8> [[WIDE_LOAD8 ]] to <8 x i16>
61+ ; CHECK-NEXT: [[INDEX6 :%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT10 :%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
62+ ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[C]], i64 [[INDEX6 ]]
63+ ; CHECK-NEXT: [[WIDE_LOAD7 :%.*]] = load <8 x i8>, ptr [[TMP14]], align 1
64+ ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX6 ]]
65+ ; CHECK-NEXT: [[WIDE_LOAD8 :%.*]] = load <8 x i8>, ptr [[TMP15]], align 1
66+ ; CHECK-NEXT: [[TMP16:%.*]] = zext <8 x i8> [[WIDE_LOAD8 ]] to <8 x i16>
67+ ; CHECK-NEXT: [[TMP17:%.*]] = zext <8 x i8> [[WIDE_LOAD7 ]] to <8 x i16>
6868; CHECK-NEXT: [[TMP18:%.*]] = mul nuw <8 x i16> [[TMP16]], [[TMP17]]
6969; CHECK-NEXT: [[TMP19:%.*]] = lshr <8 x i16> [[TMP18]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
7070; CHECK-NEXT: [[TMP20:%.*]] = trunc nuw <8 x i16> [[TMP19]] to <8 x i8>
7171; CHECK-NEXT: store <8 x i8> [[TMP20]], ptr [[TMP15]], align 1
72- ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[INDEX7 ]]
73- ; CHECK-NEXT: [[WIDE_LOAD10 :%.*]] = load <8 x i8>, ptr [[TMP21]], align 1
74- ; CHECK-NEXT: [[TMP22:%.*]] = zext <8 x i8> [[WIDE_LOAD10 ]] to <8 x i16>
72+ ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[INDEX6 ]]
73+ ; CHECK-NEXT: [[WIDE_LOAD9 :%.*]] = load <8 x i8>, ptr [[TMP21]], align 1
74+ ; CHECK-NEXT: [[TMP22:%.*]] = zext <8 x i8> [[WIDE_LOAD9 ]] to <8 x i16>
7575; CHECK-NEXT: [[TMP23:%.*]] = mul nuw <8 x i16> [[TMP22]], [[TMP17]]
7676; CHECK-NEXT: [[TMP24:%.*]] = lshr <8 x i16> [[TMP23]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
7777; CHECK-NEXT: [[TMP25:%.*]] = trunc nuw <8 x i16> [[TMP24]] to <8 x i8>
7878; CHECK-NEXT: store <8 x i8> [[TMP25]], ptr [[TMP21]], align 1
79- ; CHECK-NEXT: [[INDEX_NEXT11 ]] = add nuw i64 [[INDEX7 ]], 8
80- ; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT11 ]], [[N_VEC5]]
79+ ; CHECK-NEXT: [[INDEX_NEXT10 ]] = add nuw i64 [[INDEX6 ]], 8
80+ ; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT10 ]], [[N_VEC5]]
8181; CHECK-NEXT: br i1 [[TMP26]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
8282; CHECK: vec.epilog.middle.block:
83- ; CHECK-NEXT: [[CMP_N6 :%.*]] = icmp eq i64 [[N_VEC5]], [[TMP0]]
84- ; CHECK-NEXT: br i1 [[CMP_N6 ]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]]
83+ ; CHECK-NEXT: [[CMP_N11 :%.*]] = icmp eq i64 [[N_VEC5]], [[TMP0]]
84+ ; CHECK-NEXT: br i1 [[CMP_N11 ]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]]
8585; CHECK: vec.epilog.scalar.ph:
8686; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ]
8787; CHECK-NEXT: br label [[FOR_BODY:%.*]]
@@ -165,15 +165,15 @@ define void @test_shrink_zext_in_preheader(ptr noalias %src, ptr noalias %dst, i
165165; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[TMP1]], <16 x i16> poison, <16 x i32> zeroinitializer
166166; CHECK-NEXT: [[TMP3:%.*]] = insertelement <16 x i16> poison, i16 [[B]], i64 0
167167; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x i16> [[TMP3]], <16 x i16> poison, <16 x i32> zeroinitializer
168- ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
169- ; CHECK: vector.body:
170- ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
171168; CHECK-NEXT: [[TMP5:%.*]] = mul <16 x i16> [[TMP2]], [[TMP4]]
172169; CHECK-NEXT: [[TMP6:%.*]] = mul <16 x i16> [[TMP2]], [[TMP4]]
173170; CHECK-NEXT: [[TMP7:%.*]] = lshr <16 x i16> [[TMP5]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
174171; CHECK-NEXT: [[TMP8:%.*]] = lshr <16 x i16> [[TMP6]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
175172; CHECK-NEXT: [[TMP9:%.*]] = trunc nuw <16 x i16> [[TMP7]] to <16 x i8>
176173; CHECK-NEXT: [[TMP10:%.*]] = trunc nuw <16 x i16> [[TMP8]] to <16 x i8>
174+ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
175+ ; CHECK: vector.body:
176+ ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
177177; CHECK-NEXT: [[TMP11:%.*]] = sext i32 [[INDEX]] to i64
178178; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP11]]
179179; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i64 16
@@ -190,13 +190,13 @@ define void @test_shrink_zext_in_preheader(ptr noalias %src, ptr noalias %dst, i
190190; CHECK-NEXT: [[TMP15:%.*]] = trunc i32 [[A]] to i16
191191; CHECK-NEXT: [[TMP16:%.*]] = insertelement <8 x i16> poison, i16 [[TMP15]], i64 0
192192; CHECK-NEXT: [[TMP17:%.*]] = insertelement <8 x i16> poison, i16 [[B]], i64 0
193- ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
194- ; CHECK: vec.epilog.vector.body:
195- ; CHECK-NEXT: [[INDEX7:%.*]] = phi i32 [ 992, [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT8:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
196193; CHECK-NEXT: [[TMP18:%.*]] = mul <8 x i16> [[TMP16]], [[TMP17]]
197194; CHECK-NEXT: [[TMP19:%.*]] = lshr <8 x i16> [[TMP18]], <i16 8, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
198195; CHECK-NEXT: [[TMP20:%.*]] = trunc <8 x i16> [[TMP19]] to <8 x i8>
199196; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <8 x i8> [[TMP20]], <8 x i8> poison, <8 x i32> zeroinitializer
197+ ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
198+ ; CHECK: vec.epilog.vector.body:
199+ ; CHECK-NEXT: [[INDEX7:%.*]] = phi i32 [ 992, [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT8:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
200200; CHECK-NEXT: [[TMP22:%.*]] = sext i32 [[INDEX7]] to i64
201201; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP22]]
202202; CHECK-NEXT: store <8 x i8> [[TMP21]], ptr [[TMP23]], align 1
@@ -244,41 +244,47 @@ define void @test_shrink_select(ptr noalias %src, ptr noalias %dst, i32 %A, i1 %
244244; CHECK: vector.ph:
245245; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[A]] to i16
246246; CHECK-NEXT: [[TMP1:%.*]] = insertelement <16 x i16> poison, i16 [[TMP0]], i64 0
247+ ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[TMP1]], <16 x i16> poison, <16 x i32> zeroinitializer
248+ ; CHECK-NEXT: [[TMP3:%.*]] = mul <16 x i16> [[TMP2]], <i16 99, i16 99, i16 99, i16 99, i16 99, i16 99, i16 99, i16 99, i16 99, i16 99, i16 99, i16 99, i16 99, i16 99, i16 99, i16 99>
249+ ; CHECK-NEXT: [[TMP4:%.*]] = mul <16 x i16> [[TMP2]], <i16 99, i16 99, i16 99, i16 99, i16 99, i16 99, i16 99, i16 99, i16 99, i16 99, i16 99, i16 99, i16 99, i16 99, i16 99, i16 99>
250+ ; CHECK-NEXT: [[TMP5:%.*]] = lshr <16 x i16> [[TMP3]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
251+ ; CHECK-NEXT: [[TMP6:%.*]] = lshr <16 x i16> [[TMP4]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
252+ ; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[C]], <16 x i16> [[TMP5]], <16 x i16> [[TMP3]]
253+ ; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[C]], <16 x i16> [[TMP6]], <16 x i16> [[TMP4]]
254+ ; CHECK-NEXT: [[TMP9:%.*]] = trunc <16 x i16> [[TMP7]] to <16 x i8>
255+ ; CHECK-NEXT: [[TMP10:%.*]] = trunc <16 x i16> [[TMP8]] to <16 x i8>
247256; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
248257; CHECK: vector.body:
249258; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
250- ; CHECK-NEXT: [[TMP2:%.*]] = mul <16 x i16> [[TMP1]], <i16 99, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison>
251- ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i16> [[TMP2]], <16 x i16> poison, <16 x i32> zeroinitializer
252- ; CHECK-NEXT: [[TMP4:%.*]] = lshr <16 x i16> [[TMP3]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
253- ; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[C]], <16 x i16> [[TMP4]], <16 x i16> [[TMP3]]
254- ; CHECK-NEXT: [[TMP6:%.*]] = trunc <16 x i16> [[TMP5]] to <16 x i8>
255- ; CHECK-NEXT: [[TMP7:%.*]] = sext i32 [[INDEX]] to i64
256- ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP7]]
257- ; CHECK-NEXT: store <16 x i8> [[TMP6]], ptr [[TMP8]], align 1
258- ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16
259- ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], 992
260- ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
259+ ; CHECK-NEXT: [[TMP11:%.*]] = sext i32 [[INDEX]] to i64
260+ ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP11]]
261+ ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i64 16
262+ ; CHECK-NEXT: store <16 x i8> [[TMP9]], ptr [[TMP12]], align 1
263+ ; CHECK-NEXT: store <16 x i8> [[TMP10]], ptr [[TMP13]], align 1
264+ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 32
265+ ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], 992
266+ ; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
261267; CHECK: middle.block:
262268; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
263269; CHECK: vec.epilog.iter.check:
264270; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
265271; CHECK: vec.epilog.ph:
266- ; CHECK-NEXT: [[TMP10:%.*]] = trunc i32 [[A]] to i16
267- ; CHECK-NEXT: [[TMP11:%.*]] = insertelement <8 x i16> poison, i16 [[TMP10]], i64 0
272+ ; CHECK-NEXT: [[TMP15:%.*]] = trunc i32 [[A]] to i16
273+ ; CHECK-NEXT: [[TMP16:%.*]] = insertelement <8 x i16> poison, i16 [[TMP15]], i64 0
274+ ; CHECK-NEXT: [[TMP17:%.*]] = mul <8 x i16> [[TMP16]], <i16 99, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison>
275+ ; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <8 x i16> [[TMP17]], <8 x i16> poison, <8 x i32> zeroinitializer
276+ ; CHECK-NEXT: [[TMP19:%.*]] = lshr <8 x i16> [[TMP18]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
277+ ; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[C]], <8 x i16> [[TMP19]], <8 x i16> [[TMP18]]
278+ ; CHECK-NEXT: [[TMP21:%.*]] = trunc <8 x i16> [[TMP20]] to <8 x i8>
268279; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
269280; CHECK: vec.epilog.vector.body:
270281; CHECK-NEXT: [[INDEX3:%.*]] = phi i32 [ 992, [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT4:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
271- ; CHECK-NEXT: [[TMP12:%.*]] = mul <8 x i16> [[TMP11]], <i16 99, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison>
272- ; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <8 x i16> [[TMP12]], <8 x i16> poison, <8 x i32> zeroinitializer
273- ; CHECK-NEXT: [[TMP14:%.*]] = lshr <8 x i16> [[TMP13]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
274- ; CHECK-NEXT: [[TMP15:%.*]] = select i1 [[C]], <8 x i16> [[TMP14]], <8 x i16> [[TMP13]]
275- ; CHECK-NEXT: [[TMP16:%.*]] = trunc <8 x i16> [[TMP15]] to <8 x i8>
276- ; CHECK-NEXT: [[TMP17:%.*]] = sext i32 [[INDEX3]] to i64
277- ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP17]]
278- ; CHECK-NEXT: store <8 x i8> [[TMP16]], ptr [[TMP18]], align 1
282+ ; CHECK-NEXT: [[TMP22:%.*]] = sext i32 [[INDEX3]] to i64
283+ ; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP22]]
284+ ; CHECK-NEXT: store <8 x i8> [[TMP21]], ptr [[TMP23]], align 1
279285; CHECK-NEXT: [[INDEX_NEXT4]] = add nuw i32 [[INDEX3]], 8
280- ; CHECK-NEXT: [[TMP19 :%.*]] = icmp eq i32 [[INDEX_NEXT4]], 1000
281- ; CHECK-NEXT: br i1 [[TMP19 ]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
286+ ; CHECK-NEXT: [[TMP24 :%.*]] = icmp eq i32 [[INDEX_NEXT4]], 1000
287+ ; CHECK-NEXT: br i1 [[TMP24 ]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
282288; CHECK: vec.epilog.middle.block:
283289; CHECK-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
284290; CHECK: vec.epilog.scalar.ph:
0 commit comments