@@ -24,10 +24,9 @@ define <4 x i32> @smmla.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <16 x i8> %b) sa
2424; CHECK-NEXT: [[TMP6:%.*]] = zext <16 x i1> [[TMP5]] to <16 x i8>
2525; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.ummla.v4i32.v16i8(<4 x i32> zeroinitializer, <16 x i8> [[TMP4]], <16 x i8> [[TMP6]])
2626; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <4 x i32> [[TMP7]], splat (i32 8)
27- ; CHECK-NEXT: [[TMP9:%.*]] = sext <4 x i1> [[TMP8]] to <4 x i32>
2827; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <4 x i32> [[TMP0]], zeroinitializer
29- ; CHECK-NEXT: [[TMP11:%.*]] = sext <4 x i1> [[TMP10]] to <4 x i32>
30- ; CHECK-NEXT: [[TMP12:%.*]] = or <4 x i32 > [[TMP9]], [[ TMP11]]
28+ ; CHECK-NEXT: [[TMP11:%.*]] = or <4 x i1> [[TMP8]], [[TMP10]]
29+ ; CHECK-NEXT: [[TMP12:%.*]] = sext <4 x i1 > [[TMP11]] to <4 x i32>
3130; CHECK-NEXT: [[VMMLA1_I:%.*]] = tail call <4 x i32> @llvm.aarch64.neon.smmla.v4i32.v16i8(<4 x i32> [[R]], <16 x i8> [[A]], <16 x i8> [[B]])
3231; CHECK-NEXT: store <4 x i32> [[TMP12]], ptr @__msan_retval_tls, align 8
3332; CHECK-NEXT: ret <4 x i32> [[VMMLA1_I]]
@@ -51,10 +50,9 @@ define <4 x i32> @ummla.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <16 x i8> %b) sa
5150; CHECK-NEXT: [[TMP6:%.*]] = zext <16 x i1> [[TMP5]] to <16 x i8>
5251; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.ummla.v4i32.v16i8(<4 x i32> zeroinitializer, <16 x i8> [[TMP4]], <16 x i8> [[TMP6]])
5352; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <4 x i32> [[TMP7]], splat (i32 8)
54- ; CHECK-NEXT: [[TMP9:%.*]] = sext <4 x i1> [[TMP8]] to <4 x i32>
5553; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <4 x i32> [[TMP0]], zeroinitializer
56- ; CHECK-NEXT: [[TMP11:%.*]] = sext <4 x i1> [[TMP10]] to <4 x i32>
57- ; CHECK-NEXT: [[TMP12:%.*]] = or <4 x i32 > [[TMP9]], [[ TMP11]]
54+ ; CHECK-NEXT: [[TMP11:%.*]] = or <4 x i1> [[TMP8]], [[TMP10]]
55+ ; CHECK-NEXT: [[TMP12:%.*]] = sext <4 x i1 > [[TMP11]] to <4 x i32>
5856; CHECK-NEXT: [[VMMLA1_I:%.*]] = tail call <4 x i32> @llvm.aarch64.neon.ummla.v4i32.v16i8(<4 x i32> [[R]], <16 x i8> [[A]], <16 x i8> [[B]])
5957; CHECK-NEXT: store <4 x i32> [[TMP12]], ptr @__msan_retval_tls, align 8
6058; CHECK-NEXT: ret <4 x i32> [[VMMLA1_I]]
@@ -78,10 +76,9 @@ define <4 x i32> @usmmla.v4i32.v16i8(<4 x i32> %r, <16 x i8> %a, <16 x i8> %b) s
7876; CHECK-NEXT: [[TMP6:%.*]] = zext <16 x i1> [[TMP5]] to <16 x i8>
7977; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.ummla.v4i32.v16i8(<4 x i32> zeroinitializer, <16 x i8> [[TMP4]], <16 x i8> [[TMP6]])
8078; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <4 x i32> [[TMP7]], splat (i32 8)
81- ; CHECK-NEXT: [[TMP9:%.*]] = sext <4 x i1> [[TMP8]] to <4 x i32>
8279; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <4 x i32> [[TMP0]], zeroinitializer
83- ; CHECK-NEXT: [[TMP11:%.*]] = sext <4 x i1> [[TMP10]] to <4 x i32>
84- ; CHECK-NEXT: [[TMP12:%.*]] = or <4 x i32 > [[TMP9]], [[ TMP11]]
80+ ; CHECK-NEXT: [[TMP11:%.*]] = or <4 x i1> [[TMP8]], [[TMP10]]
81+ ; CHECK-NEXT: [[TMP12:%.*]] = sext <4 x i1 > [[TMP11]] to <4 x i32>
8582; CHECK-NEXT: [[VUSMMLA1_I:%.*]] = tail call <4 x i32> @llvm.aarch64.neon.usmmla.v4i32.v16i8(<4 x i32> [[R]], <16 x i8> [[A]], <16 x i8> [[B]])
8683; CHECK-NEXT: store <4 x i32> [[TMP12]], ptr @__msan_retval_tls, align 8
8784; CHECK-NEXT: ret <4 x i32> [[VUSMMLA1_I]]
0 commit comments