Skip to content

Commit 7e439d5

Browse files
authored
[VectorCombine] Skip foldShuffleOfIntrinsics when operand types differ (#201241)
Example: ```llvm define <4 x i32> @t(<2 x float> %a, <2 x double> %b) { %fa = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> %a) %fb = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f64(<2 x double> %b) %s = shufflevector <2 x i32> %fa, <2 x i32> %fb, <4 x i32> <i32 0, i32 1, i32 2, i32 3> ret <4 x i32> %s } ``` In this code, `foldShuffleOfIntrinsics` folds `shuffle(intrinsic(x), intrinsic(y))` into `intrinsic(shuffle(x, y))`, but only checks the result type, not the operand types. Since `fptosi.sat` is overloaded on its operand type, the two calls share an `<2 x i32>` result but have different operands (`<2 x float>` vs `<2 x double>`), so the new `shufflevector` gets mismatched operands and trips `isValidOperands`. Fix: bail out when the intrinsics' shuffled operands have different types.
1 parent 517308a commit 7e439d5

2 files changed

Lines changed: 35 additions & 3 deletions

File tree

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3291,10 +3291,21 @@ bool VectorCombine::foldShuffleOfIntrinsics(Instruction &I) {
32913291
if (!isTriviallyVectorizable(IID))
32923292
return false;
32933293

3294-
for (unsigned I = 0, E = II0->arg_size(); I != E; ++I)
3295-
if (isVectorIntrinsicWithScalarOpAtArg(IID, I, &TTI) &&
3296-
II0->getArgOperand(I) != II1->getArgOperand(I))
3294+
for (unsigned I = 0, E = II0->arg_size(); I != E; ++I) {
3295+
Value *Arg0 = II0->getArgOperand(I);
3296+
Value *Arg1 = II1->getArgOperand(I);
3297+
if (isVectorIntrinsicWithScalarOpAtArg(IID, I, &TTI)) {
3298+
// Scalar operands must be identical.
3299+
if (Arg0 != Arg1)
3300+
return false;
3301+
} else if (Arg0->getType() != Arg1->getType()) {
3302+
// The corresponding vector operands are shuffled together, so they must
3303+
// share the same type. For intrinsics overloaded on their operand type
3304+
// (e.g. llvm.fptosi.sat), two calls can produce the same result type
3305+
// from different operand types; shuffling those would be invalid.
32973306
return false;
3307+
}
3308+
}
32983309

32993310
InstructionCost OldCost =
33003311
CostII0 + CostII1 +

llvm/test/Transforms/VectorCombine/X86/shuffle-of-intrinsics.ll

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,3 +233,24 @@ entry:
233233
%r = shufflevector <4 x i32> %a, <4 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
234234
ret <8 x i32> %r
235235
}
236+
237+
; Intrinsics overloaded on their operand type (e.g. llvm.fptosi.sat) can share
238+
; a result type while having different operand types. The corresponding
239+
; operands cannot be shuffled together, so the fold must not fire.
240+
define <4 x i32> @test_mismatched_operand_types(<2 x float> %0, <2 x double> %1) {
241+
; CHECK-LABEL: @test_mismatched_operand_types(
242+
; CHECK-NEXT: entry:
243+
; CHECK-NEXT: [[A:%.*]] = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> [[TMP0:%.*]])
244+
; CHECK-NEXT: [[B:%.*]] = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f64(<2 x double> [[TMP1:%.*]])
245+
; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
246+
; CHECK-NEXT: ret <4 x i32> [[R]]
247+
;
248+
entry:
249+
%a = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> %0)
250+
%b = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f64(<2 x double> %1)
251+
%r = shufflevector <2 x i32> %a, <2 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
252+
ret <4 x i32> %r
253+
}
254+
255+
declare <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float>)
256+
declare <2 x i32> @llvm.fptosi.sat.v2i32.v2f64(<2 x double>)

0 commit comments

Comments
 (0)