[SDAGBuilder] Fix incorrect fcmp+select to minnum/maxnum transform#184590
[SDAGBuilder] Fix incorrect fcmp+select to minnum/maxnum transform#184590
Conversation
|
@llvm/pr-subscribers-llvm-selectiondag @llvm/pr-subscribers-backend-nvptx Author: Nikita Popov (nikic) Changesminnum/maxnum don't have the correct sNaN semantics, we must convert to minimumnum/maximumnum instead. To avoid an NVPTX regression, make it handle fmaximmumnum in one TableGen pattern. This is intended as a targeted fix for the miscompile, as the complete removal of this transform (#93575) appears to be blocked. Fixes #176624. Patch is 37.19 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/184590.diff 11 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 3e6fdd7bbf9fe..6ed4cd04d06f1 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3878,7 +3878,10 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
switch (SPR.NaNBehavior) {
case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
case SPNB_RETURNS_NAN: break;
- case SPNB_RETURNS_OTHER: Opc = ISD::FMINNUM; break;
+ case SPNB_RETURNS_OTHER:
+ Opc = ISD::FMINIMUMNUM;
+ Flags.setNoSignedZeros(true);
+ break;
case SPNB_RETURNS_ANY:
if (TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT) ||
(UseScalarMinMax &&
@@ -3891,7 +3894,10 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
switch (SPR.NaNBehavior) {
case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
case SPNB_RETURNS_NAN: break;
- case SPNB_RETURNS_OTHER: Opc = ISD::FMAXNUM; break;
+ case SPNB_RETURNS_OTHER:
+ Opc = ISD::FMAXIMUMNUM;
+ Flags.setNoSignedZeros(true);
+ break;
case SPNB_RETURNS_ANY:
if (TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT) ||
(UseScalarMinMax &&
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index 096c5e470ed02..5019d76039a4d 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -2553,10 +2553,10 @@ def NVPTX_fma_oneuse_and_nnan : PatFrag<(ops node:$a, node:$b, node:$c),
return N->hasOneUse() &&
(N->getFlags().hasNoNaNs() || TM.Options.NoNaNsFPMath);
}]>;
-// fmaxnum will differentiate between signed and unsigned zeros soon, so this
-// PatFrag is for a fmaxnum node with nsz
-def NVPTX_fmaxnum_nsz : PatFrag<(ops node:$a, node:$b),
- (fmaxnum node:$a, node:$b), [{
+// fmaxnum/fmaximumnum differentiate between signed and unsigned zeros, so this
+// PatFrag is for a fmaxnum/fmaximumnum node with nsz
+def NVPTX_fmaxnum_or_fmaximumnum_nsz : PatFrag<(ops node:$a, node:$b),
+ (fmaxnum_or_fmaximumnum node:$a, node:$b), [{
return N->getFlags().hasNoSignedZeros();
}]>;
@@ -2564,7 +2564,7 @@ class FMARELUInst<RegTyInfo t, bit allow_ftz, PatFrag zero_pat>
: BasicFlagsNVPTXInst<(outs t.RC:$dst), (ins t.RC:$a, t.RC:$b, t.RC:$c),
!if(allow_ftz, (ins FTZFlag:$ftz), (ins)),
"fma.rn" # !if(allow_ftz, "$ftz", "") # ".relu." # t.PtxType,
- [(set t.Ty:$dst, (NVPTX_fmaxnum_nsz (NVPTX_fma_oneuse_and_nnan t.Ty:$a, t.Ty:$b, t.Ty:$c), zero_pat))]>;
+ [(set t.Ty:$dst, (NVPTX_fmaxnum_or_fmaximumnum_nsz (NVPTX_fma_oneuse_and_nnan t.Ty:$a, t.Ty:$b, t.Ty:$c), zero_pat))]>;
let Predicates = [useFP16Math, hasPTX<70>, hasSM<80>] in {
def FMARELU_F16 : FMARELUInst<F16RT, true, fpimm_0>;
diff --git a/llvm/test/CodeGen/AArch64/arm64-fmax-safe.ll b/llvm/test/CodeGen/AArch64/arm64-fmax-safe.ll
index 159075db0f7bc..4e3fac3650e98 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fmax-safe.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fmax-safe.ll
@@ -31,12 +31,14 @@ define double @test_cross(float %in) {
}
; Same as previous, but with ordered comparison;
-; must become fminnm, not fmin.
+; Should not become either fminnm or fmin, because neither have the correct
+; behavior for sNaN.
define double @test_cross_fail_nan(float %in) {
; CHECK-LABEL: test_cross_fail_nan:
; CHECK: // %bb.0:
; CHECK-NEXT: movi d1, #0000000000000000
-; CHECK-NEXT: fminnm s0, s0, s1
+; CHECK-NEXT: fcmp s0, #0.0
+; CHECK-NEXT: fcsel s0, s0, s1, mi
; CHECK-NEXT: fcvt d0, s0
; CHECK-NEXT: ret
%cmp = fcmp olt float %in, 0.000000e+00
diff --git a/llvm/test/CodeGen/ARM/fp16-vminmaxnm-safe.ll b/llvm/test/CodeGen/ARM/fp16-vminmaxnm-safe.ll
index 2185bd8a2a138..9e4685f2081df 100644
--- a/llvm/test/CodeGen/ARM/fp16-vminmaxnm-safe.ll
+++ b/llvm/test/CodeGen/ARM/fp16-vminmaxnm-safe.ll
@@ -201,7 +201,9 @@ define half @fp16_vminnm_NNNo(half %a) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.f16 s0, r0
; CHECK-NEXT: vmov.f16 s2, #1.200000e+01
-; CHECK-NEXT: vminnm.f16 s0, s0, s2
+; CHECK-NEXT: vcmp.f16 s2, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f16 s0, s0, s2
; CHECK-NEXT: vldr.16 s2, .LCPI12_0
; CHECK-NEXT: vcmp.f16 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
@@ -229,7 +231,9 @@ define half @fp16_vminnm_NNNo_rev(half %a) {
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselgt.f16 s0, s2, s0
; CHECK-NEXT: vldr.16 s2, .LCPI13_1
-; CHECK-NEXT: vminnm.f16 s0, s0, s2
+; CHECK-NEXT: vcmp.f16 s2, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f16 s0, s0, s2
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
@@ -251,11 +255,13 @@ define half @fp16_vminnm_NNNu(half %b) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.f16 s0, r0
; CHECK-NEXT: vmov.f16 s2, #1.200000e+01
-; CHECK-NEXT: vminnm.f16 s0, s0, s2
-; CHECK-NEXT: vldr.16 s2, .LCPI14_0
; CHECK-NEXT: vcmp.f16 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vselgt.f16 s0, s0, s2
+; CHECK-NEXT: vselge.f16 s0, s0, s2
+; CHECK-NEXT: vldr.16 s2, .LCPI14_0
+; CHECK-NEXT: vcmp.f16 s0, s2
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f16 s0, s2, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
@@ -275,11 +281,13 @@ define half @fp16_vminnm_NNNule(half %b) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldr.16 s2, .LCPI15_0
; CHECK-NEXT: vmov.f16 s0, r0
-; CHECK-NEXT: vminnm.f16 s0, s0, s2
-; CHECK-NEXT: vldr.16 s2, .LCPI15_1
; CHECK-NEXT: vcmp.f16 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vselge.f16 s0, s0, s2
+; CHECK-NEXT: vselgt.f16 s0, s0, s2
+; CHECK-NEXT: vldr.16 s2, .LCPI15_1
+; CHECK-NEXT: vcmp.f16 s0, s2
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f16 s0, s2, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
@@ -306,7 +314,9 @@ define half @fp16_vminnm_NNNu_rev(half %b) {
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f16 s0, s2, s0
; CHECK-NEXT: vldr.16 s2, .LCPI16_1
-; CHECK-NEXT: vminnm.f16 s0, s0, s2
+; CHECK-NEXT: vcmp.f16 s2, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f16 s0, s0, s2
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
@@ -330,7 +340,9 @@ define half @fp16_vmaxnm_NNNo(half %a) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.f16 s0, r0
; CHECK-NEXT: vmov.f16 s2, #1.200000e+01
-; CHECK-NEXT: vmaxnm.f16 s0, s0, s2
+; CHECK-NEXT: vcmp.f16 s0, s2
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f16 s0, s0, s2
; CHECK-NEXT: vldr.16 s2, .LCPI17_0
; CHECK-NEXT: vcmp.f16 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
@@ -354,7 +366,9 @@ define half @fp16_vmaxnm_NNNoge(half %a) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldr.16 s2, .LCPI18_0
; CHECK-NEXT: vmov.f16 s0, r0
-; CHECK-NEXT: vmaxnm.f16 s0, s0, s2
+; CHECK-NEXT: vcmp.f16 s0, s2
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f16 s0, s0, s2
; CHECK-NEXT: vldr.16 s2, .LCPI18_1
; CHECK-NEXT: vcmp.f16 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
@@ -384,7 +398,9 @@ define half @fp16_vmaxnm_NNNo_rev(half %a) {
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselgt.f16 s0, s2, s0
; CHECK-NEXT: vldr.16 s2, .LCPI19_1
-; CHECK-NEXT: vmaxnm.f16 s0, s0, s2
+; CHECK-NEXT: vcmp.f16 s0, s2
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f16 s0, s0, s2
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
@@ -410,7 +426,9 @@ define half @fp16_vmaxnm_NNNole_rev(half %a) {
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f16 s0, s2, s0
; CHECK-NEXT: vldr.16 s2, .LCPI20_1
-; CHECK-NEXT: vmaxnm.f16 s0, s0, s2
+; CHECK-NEXT: vcmp.f16 s0, s2
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f16 s0, s0, s2
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
@@ -432,11 +450,13 @@ define half @fp16_vmaxnm_NNNu(half %b) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.f16 s0, r0
; CHECK-NEXT: vmov.f16 s2, #1.200000e+01
-; CHECK-NEXT: vmaxnm.f16 s0, s0, s2
-; CHECK-NEXT: vldr.16 s2, .LCPI21_0
; CHECK-NEXT: vcmp.f16 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vselgt.f16 s0, s0, s2
+; CHECK-NEXT: vselge.f16 s0, s0, s2
+; CHECK-NEXT: vldr.16 s2, .LCPI21_0
+; CHECK-NEXT: vcmp.f16 s2, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f16 s0, s2, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
@@ -456,11 +476,13 @@ define half @fp16_vmaxnm_NNNuge(half %b) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldr.16 s2, .LCPI22_0
; CHECK-NEXT: vmov.f16 s0, r0
-; CHECK-NEXT: vmaxnm.f16 s0, s0, s2
-; CHECK-NEXT: vldr.16 s2, .LCPI22_1
; CHECK-NEXT: vcmp.f16 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vselge.f16 s0, s0, s2
+; CHECK-NEXT: vselgt.f16 s0, s0, s2
+; CHECK-NEXT: vldr.16 s2, .LCPI22_1
+; CHECK-NEXT: vcmp.f16 s2, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f16 s0, s2, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
@@ -480,12 +502,14 @@ entry:
define half @fp16_vminmaxnm_neg0(half %a) {
; CHECK-LABEL: fp16_vminmaxnm_neg0:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldr.16 s0, .LCPI23_0
-; CHECK-NEXT: vmov.f16 s2, r0
-; CHECK-NEXT: vminnm.f16 s2, s2, s0
+; CHECK-NEXT: vldr.16 s2, .LCPI23_0
+; CHECK-NEXT: vmov.f16 s0, r0
; CHECK-NEXT: vcmp.f16 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vselgt.f16 s0, s2, s0
+; CHECK-NEXT: vselgt.f16 s0, s0, s2
+; CHECK-NEXT: vcmp.f16 s2, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f16 s0, s2, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
@@ -508,7 +532,9 @@ define half @fp16_vminmaxnm_e_0(half %a) {
; CHECK-NEXT: vcmp.f16 s0, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f16 s0, s2, s0
-; CHECK-NEXT: vmaxnm.f16 s0, s0, s2
+; CHECK-NEXT: vcmp.f16 s0, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f16 s0, s0, s2
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
@@ -526,12 +552,14 @@ entry:
define half @fp16_vminmaxnm_e_neg0(half %a) {
; CHECK-LABEL: fp16_vminmaxnm_e_neg0:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldr.16 s0, .LCPI25_0
-; CHECK-NEXT: vmov.f16 s2, r0
-; CHECK-NEXT: vminnm.f16 s2, s2, s0
-; CHECK-NEXT: vcmp.f16 s0, s2
+; CHECK-NEXT: vldr.16 s2, .LCPI25_0
+; CHECK-NEXT: vmov.f16 s0, r0
+; CHECK-NEXT: vcmp.f16 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vselge.f16 s0, s0, s2
+; CHECK-NEXT: vselgt.f16 s0, s0, s2
+; CHECK-NEXT: vcmp.f16 s2, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f16 s0, s2, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
diff --git a/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll b/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll
index 8564d7d9996d3..01cb3ba404816 100644
--- a/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll
+++ b/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll
@@ -1300,9 +1300,9 @@ define void @pr65820(ptr %y, <4 x float> %splat) {
; ARMV8-LABEL: pr65820:
; ARMV8: @ %bb.0: @ %entry
; ARMV8-NEXT: vmov d16, r2, r3
-; ARMV8-NEXT: vmov.i32 q9, #0x0
; ARMV8-NEXT: vdup.32 q8, d16[0]
-; ARMV8-NEXT: vmaxnm.f32 q8, q8, q9
+; ARMV8-NEXT: vcgt.f32 q9, q8, #0
+; ARMV8-NEXT: vand q8, q9, q8
; ARMV8-NEXT: vst1.32 {d16, d17}, [r0]
; ARMV8-NEXT: bx lr
;
@@ -1312,7 +1312,8 @@ define void @pr65820(ptr %y, <4 x float> %splat) {
; ARMV8M-NEXT: vmov r1, s0
; ARMV8M-NEXT: vmov.i32 q0, #0x0
; ARMV8M-NEXT: vdup.32 q1, r1
-; ARMV8M-NEXT: vmaxnm.f32 q0, q1, q0
+; ARMV8M-NEXT: vcmp.f32 gt, q1, zr
+; ARMV8M-NEXT: vdupt.32 q0, r1
; ARMV8M-NEXT: vstrw.32 q0, [r0]
; ARMV8M-NEXT: bx lr
entry:
diff --git a/llvm/test/CodeGen/ARM/vminmaxnm-safe.ll b/llvm/test/CodeGen/ARM/vminmaxnm-safe.ll
index 9d0fef6452a38..04c11c35f99e5 100644
--- a/llvm/test/CodeGen/ARM/vminmaxnm-safe.ll
+++ b/llvm/test/CodeGen/ARM/vminmaxnm-safe.ll
@@ -309,12 +309,14 @@ define float @fp_armv8_vminnm_NNNo(float %a) {
; CHECK-LABEL: fp_armv8_vminnm_NNNo:
; CHECK: @ %bb.0:
; CHECK-NEXT: vmov.f32 s0, #1.200000e+01
-; CHECK-NEXT: vldr s2, .LCPI20_0
-; CHECK-NEXT: vmov s4, r0
-; CHECK-NEXT: vminnm.f32 s0, s4, s0
+; CHECK-NEXT: vldr s4, .LCPI20_0
+; CHECK-NEXT: vmov s2, r0
; CHECK-NEXT: vcmp.f32 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselgt.f32 s0, s2, s0
+; CHECK-NEXT: vcmp.f32 s0, s4
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s4, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 2
@@ -331,13 +333,15 @@ define float @fp_armv8_vminnm_NNNo(float %a) {
define double @fp_armv8_vminnm_NNNole(double %a) {
; CHECK-LABEL: fp_armv8_vminnm_NNNole:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vldr d16, .LCPI21_0
-; CHECK-NEXT: vmov d18, r0, r1
-; CHECK-NEXT: vldr d17, .LCPI21_1
-; CHECK-NEXT: vminnm.f64 d16, d18, d16
-; CHECK-NEXT: vcmp.f64 d16, d17
+; CHECK-NEXT: vmov d16, r0, r1
+; CHECK-NEXT: vldr d17, .LCPI21_0
+; CHECK-NEXT: vldr d18, .LCPI21_1
+; CHECK-NEXT: vcmp.f64 d17, d16
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vselge.f64 d16, d17, d16
+; CHECK-NEXT: vselge.f64 d16, d16, d17
+; CHECK-NEXT: vcmp.f64 d16, d18
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f64 d16, d18, d16
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 3
@@ -364,7 +368,9 @@ define float @fp_armv8_vminnm_NNNo_rev(float %a) {
; CHECK-NEXT: vcmp.f32 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselgt.f32 s0, s0, s2
-; CHECK-NEXT: vminnm.f32 s0, s0, s4
+; CHECK-NEXT: vcmp.f32 s4, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s0, s4
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 2
@@ -389,7 +395,9 @@ define double @fp_armv8_vminnm_NNNoge_rev(double %a) {
; CHECK-NEXT: vcmp.f64 d17, d16
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f64 d16, d16, d17
-; CHECK-NEXT: vminnm.f64 d16, d16, d18
+; CHECK-NEXT: vcmp.f64 d18, d16
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f64 d16, d16, d18
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 3
@@ -411,12 +419,14 @@ define float @fp_armv8_vminnm_NNNu(float %b) {
; CHECK-LABEL: fp_armv8_vminnm_NNNu:
; CHECK: @ %bb.0:
; CHECK-NEXT: vmov.f32 s0, #1.200000e+01
-; CHECK-NEXT: vldr s2, .LCPI24_0
-; CHECK-NEXT: vmov s4, r0
-; CHECK-NEXT: vminnm.f32 s0, s4, s0
-; CHECK-NEXT: vcmp.f32 s2, s0
+; CHECK-NEXT: vldr s4, .LCPI24_0
+; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: vcmp.f32 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vselgt.f32 s0, s0, s2
+; CHECK-NEXT: vselge.f32 s0, s2, s0
+; CHECK-NEXT: vcmp.f32 s0, s4
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f32 s0, s4, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 2
@@ -434,12 +444,14 @@ define float @fp_armv8_vminnm_NNNule(float %b) {
; CHECK-LABEL: fp_armv8_vminnm_NNNule:
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr s0, .LCPI25_0
-; CHECK-NEXT: vmov s4, r0
-; CHECK-NEXT: vldr s2, .LCPI25_1
-; CHECK-NEXT: vminnm.f32 s0, s4, s0
-; CHECK-NEXT: vcmp.f32 s2, s0
+; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: vldr s4, .LCPI25_1
+; CHECK-NEXT: vcmp.f32 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vselge.f32 s0, s0, s2
+; CHECK-NEXT: vselgt.f32 s0, s2, s0
+; CHECK-NEXT: vcmp.f32 s0, s4
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s4, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 2
@@ -464,7 +476,9 @@ define float @fp_armv8_vminnm_NNNu_rev(float %b) {
; CHECK-NEXT: vcmp.f32 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f32 s0, s0, s2
-; CHECK-NEXT: vminnm.f32 s0, s0, s4
+; CHECK-NEXT: vcmp.f32 s4, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f32 s0, s0, s4
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 2
@@ -489,7 +503,9 @@ define double @fp_armv8_vminnm_NNNuge_rev(double %b) {
; CHECK-NEXT: vcmp.f64 d17, d16
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselgt.f64 d16, d16, d17
-; CHECK-NEXT: vminnm.f64 d16, d16, d18
+; CHECK-NEXT: vcmp.f64 d18, d16
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f64 d16, d16, d18
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 3
@@ -511,12 +527,14 @@ define float @fp_armv8_vmaxnm_NNNo(float %a) {
; CHECK-LABEL: fp_armv8_vmaxnm_NNNo:
; CHECK: @ %bb.0:
; CHECK-NEXT: vmov.f32 s0, #1.200000e+01
-; CHECK-NEXT: vldr s2, .LCPI28_0
-; CHECK-NEXT: vmov s4, r0
-; CHECK-NEXT: vmaxnm.f32 s0, s4, s0
+; CHECK-NEXT: vldr s4, .LCPI28_0
+; CHECK-NEXT: vmov s2, r0
; CHECK-NEXT: vcmp.f32 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselgt.f32 s0, s2, s0
+; CHECK-NEXT: vcmp.f32 s4, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s4, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 2
@@ -534,12 +552,14 @@ define float @fp_armv8_vmaxnm_NNNoge(float %a) {
; CHECK-LABEL: fp_armv8_vmaxnm_NNNoge:
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr s0, .LCPI29_0
-; CHECK-NEXT: vmov s4, r0
-; CHECK-NEXT: vldr s2, .LCPI29_1
-; CHECK-NEXT: vmaxnm.f32 s0, s4, s0
+; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: vldr s4, .LCPI29_1
; CHECK-NEXT: vcmp.f32 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f32 s0, s2, s0
+; CHECK-NEXT: vcmp.f32 s4, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f32 s0, s4, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 2
@@ -564,7 +584,9 @@ define float @fp_armv8_vmaxnm_NNNo_rev(float %a) {
; CHECK-NEXT: vcmp.f32 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselgt.f32 s0, s0, s2
-; CHECK-NEXT: vmaxnm.f32 s0, s0, s4
+; CHECK-NEXT: vcmp.f32 s0, s4
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s0, s4
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 2
@@ -589,7 +611,9 @@ define float @fp_armv8_vmaxnm_NNNole_rev(float %a) {
; CHECK-NEXT: vcmp.f32 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f32 s0, s0, s2
-; CHECK-NEXT: vmaxnm.f32 s0, s0, s4
+; CHECK-NEXT: vcmp.f32 s0, s4
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f32 s0, s0, s4
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 2
@@ -609,12 +633,14 @@ define float @fp_armv8_vmaxnm_NNNu(float %b) {
; CHECK-LABEL: fp_armv8_vmaxnm_NNNu:
; CHECK: @ %bb.0:
; CHECK-NEXT: vmov.f32 s0, #1.200000e+01
-; CHECK-NEXT: vldr s2, .LCPI32_0
-; CHECK-NEXT: vmov s4, r0
-; CHECK-NEXT: vmaxnm.f32 s0, s4, s0
-; CHECK-NEXT: vcmp.f32 s0, s2
+; CHECK-NEXT: vldr s4, .LCPI32_0
+; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: vcmp.f32 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vselgt.f32 s0, s0, s2
+; CHECK-NEXT: vselge.f32 s0, s2, s0
+; CHECK-NEXT: vcmp.f32 s4, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f32 s0, s4, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 2
@@ -632,12 +658,14 @@ define float...
[truncated]
|
|
@llvm/pr-subscribers-backend-arm Author: Nikita Popov (nikic) Changesminnum/maxnum don't have the correct sNaN semantics, we must convert to minimumnum/maximumnum instead. To avoid an NVPTX regression, make it handle fmaximmumnum in one TableGen pattern. This is intended as a targeted fix for the miscompile, as the complete removal of this transform (#93575) appears to be blocked. Fixes #176624. Patch is 37.19 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/184590.diff 11 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 3e6fdd7bbf9fe..6ed4cd04d06f1 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3878,7 +3878,10 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
switch (SPR.NaNBehavior) {
case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
case SPNB_RETURNS_NAN: break;
- case SPNB_RETURNS_OTHER: Opc = ISD::FMINNUM; break;
+ case SPNB_RETURNS_OTHER:
+ Opc = ISD::FMINIMUMNUM;
+ Flags.setNoSignedZeros(true);
+ break;
case SPNB_RETURNS_ANY:
if (TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT) ||
(UseScalarMinMax &&
@@ -3891,7 +3894,10 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
switch (SPR.NaNBehavior) {
case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
case SPNB_RETURNS_NAN: break;
- case SPNB_RETURNS_OTHER: Opc = ISD::FMAXNUM; break;
+ case SPNB_RETURNS_OTHER:
+ Opc = ISD::FMAXIMUMNUM;
+ Flags.setNoSignedZeros(true);
+ break;
case SPNB_RETURNS_ANY:
if (TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT) ||
(UseScalarMinMax &&
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index 096c5e470ed02..5019d76039a4d 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -2553,10 +2553,10 @@ def NVPTX_fma_oneuse_and_nnan : PatFrag<(ops node:$a, node:$b, node:$c),
return N->hasOneUse() &&
(N->getFlags().hasNoNaNs() || TM.Options.NoNaNsFPMath);
}]>;
-// fmaxnum will differentiate between signed and unsigned zeros soon, so this
-// PatFrag is for a fmaxnum node with nsz
-def NVPTX_fmaxnum_nsz : PatFrag<(ops node:$a, node:$b),
- (fmaxnum node:$a, node:$b), [{
+// fmaxnum/fmaximumnum differentiate between signed and unsigned zeros, so this
+// PatFrag is for a fmaxnum/fmaximumnum node with nsz
+def NVPTX_fmaxnum_or_fmaximumnum_nsz : PatFrag<(ops node:$a, node:$b),
+ (fmaxnum_or_fmaximumnum node:$a, node:$b), [{
return N->getFlags().hasNoSignedZeros();
}]>;
@@ -2564,7 +2564,7 @@ class FMARELUInst<RegTyInfo t, bit allow_ftz, PatFrag zero_pat>
: BasicFlagsNVPTXInst<(outs t.RC:$dst), (ins t.RC:$a, t.RC:$b, t.RC:$c),
!if(allow_ftz, (ins FTZFlag:$ftz), (ins)),
"fma.rn" # !if(allow_ftz, "$ftz", "") # ".relu." # t.PtxType,
- [(set t.Ty:$dst, (NVPTX_fmaxnum_nsz (NVPTX_fma_oneuse_and_nnan t.Ty:$a, t.Ty:$b, t.Ty:$c), zero_pat))]>;
+ [(set t.Ty:$dst, (NVPTX_fmaxnum_or_fmaximumnum_nsz (NVPTX_fma_oneuse_and_nnan t.Ty:$a, t.Ty:$b, t.Ty:$c), zero_pat))]>;
let Predicates = [useFP16Math, hasPTX<70>, hasSM<80>] in {
def FMARELU_F16 : FMARELUInst<F16RT, true, fpimm_0>;
diff --git a/llvm/test/CodeGen/AArch64/arm64-fmax-safe.ll b/llvm/test/CodeGen/AArch64/arm64-fmax-safe.ll
index 159075db0f7bc..4e3fac3650e98 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fmax-safe.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fmax-safe.ll
@@ -31,12 +31,14 @@ define double @test_cross(float %in) {
}
; Same as previous, but with ordered comparison;
-; must become fminnm, not fmin.
+; Should not become either fminnm or fmin, because neither have the correct
+; behavior for sNaN.
define double @test_cross_fail_nan(float %in) {
; CHECK-LABEL: test_cross_fail_nan:
; CHECK: // %bb.0:
; CHECK-NEXT: movi d1, #0000000000000000
-; CHECK-NEXT: fminnm s0, s0, s1
+; CHECK-NEXT: fcmp s0, #0.0
+; CHECK-NEXT: fcsel s0, s0, s1, mi
; CHECK-NEXT: fcvt d0, s0
; CHECK-NEXT: ret
%cmp = fcmp olt float %in, 0.000000e+00
diff --git a/llvm/test/CodeGen/ARM/fp16-vminmaxnm-safe.ll b/llvm/test/CodeGen/ARM/fp16-vminmaxnm-safe.ll
index 2185bd8a2a138..9e4685f2081df 100644
--- a/llvm/test/CodeGen/ARM/fp16-vminmaxnm-safe.ll
+++ b/llvm/test/CodeGen/ARM/fp16-vminmaxnm-safe.ll
@@ -201,7 +201,9 @@ define half @fp16_vminnm_NNNo(half %a) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.f16 s0, r0
; CHECK-NEXT: vmov.f16 s2, #1.200000e+01
-; CHECK-NEXT: vminnm.f16 s0, s0, s2
+; CHECK-NEXT: vcmp.f16 s2, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f16 s0, s0, s2
; CHECK-NEXT: vldr.16 s2, .LCPI12_0
; CHECK-NEXT: vcmp.f16 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
@@ -229,7 +231,9 @@ define half @fp16_vminnm_NNNo_rev(half %a) {
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselgt.f16 s0, s2, s0
; CHECK-NEXT: vldr.16 s2, .LCPI13_1
-; CHECK-NEXT: vminnm.f16 s0, s0, s2
+; CHECK-NEXT: vcmp.f16 s2, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f16 s0, s0, s2
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
@@ -251,11 +255,13 @@ define half @fp16_vminnm_NNNu(half %b) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.f16 s0, r0
; CHECK-NEXT: vmov.f16 s2, #1.200000e+01
-; CHECK-NEXT: vminnm.f16 s0, s0, s2
-; CHECK-NEXT: vldr.16 s2, .LCPI14_0
; CHECK-NEXT: vcmp.f16 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vselgt.f16 s0, s0, s2
+; CHECK-NEXT: vselge.f16 s0, s0, s2
+; CHECK-NEXT: vldr.16 s2, .LCPI14_0
+; CHECK-NEXT: vcmp.f16 s0, s2
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f16 s0, s2, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
@@ -275,11 +281,13 @@ define half @fp16_vminnm_NNNule(half %b) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldr.16 s2, .LCPI15_0
; CHECK-NEXT: vmov.f16 s0, r0
-; CHECK-NEXT: vminnm.f16 s0, s0, s2
-; CHECK-NEXT: vldr.16 s2, .LCPI15_1
; CHECK-NEXT: vcmp.f16 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vselge.f16 s0, s0, s2
+; CHECK-NEXT: vselgt.f16 s0, s0, s2
+; CHECK-NEXT: vldr.16 s2, .LCPI15_1
+; CHECK-NEXT: vcmp.f16 s0, s2
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f16 s0, s2, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
@@ -306,7 +314,9 @@ define half @fp16_vminnm_NNNu_rev(half %b) {
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f16 s0, s2, s0
; CHECK-NEXT: vldr.16 s2, .LCPI16_1
-; CHECK-NEXT: vminnm.f16 s0, s0, s2
+; CHECK-NEXT: vcmp.f16 s2, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f16 s0, s0, s2
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
@@ -330,7 +340,9 @@ define half @fp16_vmaxnm_NNNo(half %a) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.f16 s0, r0
; CHECK-NEXT: vmov.f16 s2, #1.200000e+01
-; CHECK-NEXT: vmaxnm.f16 s0, s0, s2
+; CHECK-NEXT: vcmp.f16 s0, s2
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f16 s0, s0, s2
; CHECK-NEXT: vldr.16 s2, .LCPI17_0
; CHECK-NEXT: vcmp.f16 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
@@ -354,7 +366,9 @@ define half @fp16_vmaxnm_NNNoge(half %a) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldr.16 s2, .LCPI18_0
; CHECK-NEXT: vmov.f16 s0, r0
-; CHECK-NEXT: vmaxnm.f16 s0, s0, s2
+; CHECK-NEXT: vcmp.f16 s0, s2
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f16 s0, s0, s2
; CHECK-NEXT: vldr.16 s2, .LCPI18_1
; CHECK-NEXT: vcmp.f16 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
@@ -384,7 +398,9 @@ define half @fp16_vmaxnm_NNNo_rev(half %a) {
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselgt.f16 s0, s2, s0
; CHECK-NEXT: vldr.16 s2, .LCPI19_1
-; CHECK-NEXT: vmaxnm.f16 s0, s0, s2
+; CHECK-NEXT: vcmp.f16 s0, s2
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f16 s0, s0, s2
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
@@ -410,7 +426,9 @@ define half @fp16_vmaxnm_NNNole_rev(half %a) {
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f16 s0, s2, s0
; CHECK-NEXT: vldr.16 s2, .LCPI20_1
-; CHECK-NEXT: vmaxnm.f16 s0, s0, s2
+; CHECK-NEXT: vcmp.f16 s0, s2
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f16 s0, s0, s2
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
@@ -432,11 +450,13 @@ define half @fp16_vmaxnm_NNNu(half %b) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.f16 s0, r0
; CHECK-NEXT: vmov.f16 s2, #1.200000e+01
-; CHECK-NEXT: vmaxnm.f16 s0, s0, s2
-; CHECK-NEXT: vldr.16 s2, .LCPI21_0
; CHECK-NEXT: vcmp.f16 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vselgt.f16 s0, s0, s2
+; CHECK-NEXT: vselge.f16 s0, s0, s2
+; CHECK-NEXT: vldr.16 s2, .LCPI21_0
+; CHECK-NEXT: vcmp.f16 s2, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f16 s0, s2, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
@@ -456,11 +476,13 @@ define half @fp16_vmaxnm_NNNuge(half %b) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldr.16 s2, .LCPI22_0
; CHECK-NEXT: vmov.f16 s0, r0
-; CHECK-NEXT: vmaxnm.f16 s0, s0, s2
-; CHECK-NEXT: vldr.16 s2, .LCPI22_1
; CHECK-NEXT: vcmp.f16 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vselge.f16 s0, s0, s2
+; CHECK-NEXT: vselgt.f16 s0, s0, s2
+; CHECK-NEXT: vldr.16 s2, .LCPI22_1
+; CHECK-NEXT: vcmp.f16 s2, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f16 s0, s2, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
@@ -480,12 +502,14 @@ entry:
define half @fp16_vminmaxnm_neg0(half %a) {
; CHECK-LABEL: fp16_vminmaxnm_neg0:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldr.16 s0, .LCPI23_0
-; CHECK-NEXT: vmov.f16 s2, r0
-; CHECK-NEXT: vminnm.f16 s2, s2, s0
+; CHECK-NEXT: vldr.16 s2, .LCPI23_0
+; CHECK-NEXT: vmov.f16 s0, r0
; CHECK-NEXT: vcmp.f16 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vselgt.f16 s0, s2, s0
+; CHECK-NEXT: vselgt.f16 s0, s0, s2
+; CHECK-NEXT: vcmp.f16 s2, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f16 s0, s2, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
@@ -508,7 +532,9 @@ define half @fp16_vminmaxnm_e_0(half %a) {
; CHECK-NEXT: vcmp.f16 s0, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f16 s0, s2, s0
-; CHECK-NEXT: vmaxnm.f16 s0, s0, s2
+; CHECK-NEXT: vcmp.f16 s0, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f16 s0, s0, s2
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
@@ -526,12 +552,14 @@ entry:
define half @fp16_vminmaxnm_e_neg0(half %a) {
; CHECK-LABEL: fp16_vminmaxnm_e_neg0:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldr.16 s0, .LCPI25_0
-; CHECK-NEXT: vmov.f16 s2, r0
-; CHECK-NEXT: vminnm.f16 s2, s2, s0
-; CHECK-NEXT: vcmp.f16 s0, s2
+; CHECK-NEXT: vldr.16 s2, .LCPI25_0
+; CHECK-NEXT: vmov.f16 s0, r0
+; CHECK-NEXT: vcmp.f16 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vselge.f16 s0, s0, s2
+; CHECK-NEXT: vselgt.f16 s0, s0, s2
+; CHECK-NEXT: vcmp.f16 s2, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f16 s0, s2, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
diff --git a/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll b/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll
index 8564d7d9996d3..01cb3ba404816 100644
--- a/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll
+++ b/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll
@@ -1300,9 +1300,9 @@ define void @pr65820(ptr %y, <4 x float> %splat) {
; ARMV8-LABEL: pr65820:
; ARMV8: @ %bb.0: @ %entry
; ARMV8-NEXT: vmov d16, r2, r3
-; ARMV8-NEXT: vmov.i32 q9, #0x0
; ARMV8-NEXT: vdup.32 q8, d16[0]
-; ARMV8-NEXT: vmaxnm.f32 q8, q8, q9
+; ARMV8-NEXT: vcgt.f32 q9, q8, #0
+; ARMV8-NEXT: vand q8, q9, q8
; ARMV8-NEXT: vst1.32 {d16, d17}, [r0]
; ARMV8-NEXT: bx lr
;
@@ -1312,7 +1312,8 @@ define void @pr65820(ptr %y, <4 x float> %splat) {
; ARMV8M-NEXT: vmov r1, s0
; ARMV8M-NEXT: vmov.i32 q0, #0x0
; ARMV8M-NEXT: vdup.32 q1, r1
-; ARMV8M-NEXT: vmaxnm.f32 q0, q1, q0
+; ARMV8M-NEXT: vcmp.f32 gt, q1, zr
+; ARMV8M-NEXT: vdupt.32 q0, r1
; ARMV8M-NEXT: vstrw.32 q0, [r0]
; ARMV8M-NEXT: bx lr
entry:
diff --git a/llvm/test/CodeGen/ARM/vminmaxnm-safe.ll b/llvm/test/CodeGen/ARM/vminmaxnm-safe.ll
index 9d0fef6452a38..04c11c35f99e5 100644
--- a/llvm/test/CodeGen/ARM/vminmaxnm-safe.ll
+++ b/llvm/test/CodeGen/ARM/vminmaxnm-safe.ll
@@ -309,12 +309,14 @@ define float @fp_armv8_vminnm_NNNo(float %a) {
; CHECK-LABEL: fp_armv8_vminnm_NNNo:
; CHECK: @ %bb.0:
; CHECK-NEXT: vmov.f32 s0, #1.200000e+01
-; CHECK-NEXT: vldr s2, .LCPI20_0
-; CHECK-NEXT: vmov s4, r0
-; CHECK-NEXT: vminnm.f32 s0, s4, s0
+; CHECK-NEXT: vldr s4, .LCPI20_0
+; CHECK-NEXT: vmov s2, r0
; CHECK-NEXT: vcmp.f32 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselgt.f32 s0, s2, s0
+; CHECK-NEXT: vcmp.f32 s0, s4
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s4, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 2
@@ -331,13 +333,15 @@ define float @fp_armv8_vminnm_NNNo(float %a) {
define double @fp_armv8_vminnm_NNNole(double %a) {
; CHECK-LABEL: fp_armv8_vminnm_NNNole:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vldr d16, .LCPI21_0
-; CHECK-NEXT: vmov d18, r0, r1
-; CHECK-NEXT: vldr d17, .LCPI21_1
-; CHECK-NEXT: vminnm.f64 d16, d18, d16
-; CHECK-NEXT: vcmp.f64 d16, d17
+; CHECK-NEXT: vmov d16, r0, r1
+; CHECK-NEXT: vldr d17, .LCPI21_0
+; CHECK-NEXT: vldr d18, .LCPI21_1
+; CHECK-NEXT: vcmp.f64 d17, d16
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vselge.f64 d16, d17, d16
+; CHECK-NEXT: vselge.f64 d16, d16, d17
+; CHECK-NEXT: vcmp.f64 d16, d18
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f64 d16, d18, d16
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 3
@@ -364,7 +368,9 @@ define float @fp_armv8_vminnm_NNNo_rev(float %a) {
; CHECK-NEXT: vcmp.f32 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselgt.f32 s0, s0, s2
-; CHECK-NEXT: vminnm.f32 s0, s0, s4
+; CHECK-NEXT: vcmp.f32 s4, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s0, s4
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 2
@@ -389,7 +395,9 @@ define double @fp_armv8_vminnm_NNNoge_rev(double %a) {
; CHECK-NEXT: vcmp.f64 d17, d16
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f64 d16, d16, d17
-; CHECK-NEXT: vminnm.f64 d16, d16, d18
+; CHECK-NEXT: vcmp.f64 d18, d16
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f64 d16, d16, d18
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 3
@@ -411,12 +419,14 @@ define float @fp_armv8_vminnm_NNNu(float %b) {
; CHECK-LABEL: fp_armv8_vminnm_NNNu:
; CHECK: @ %bb.0:
; CHECK-NEXT: vmov.f32 s0, #1.200000e+01
-; CHECK-NEXT: vldr s2, .LCPI24_0
-; CHECK-NEXT: vmov s4, r0
-; CHECK-NEXT: vminnm.f32 s0, s4, s0
-; CHECK-NEXT: vcmp.f32 s2, s0
+; CHECK-NEXT: vldr s4, .LCPI24_0
+; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: vcmp.f32 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vselgt.f32 s0, s0, s2
+; CHECK-NEXT: vselge.f32 s0, s2, s0
+; CHECK-NEXT: vcmp.f32 s0, s4
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f32 s0, s4, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 2
@@ -434,12 +444,14 @@ define float @fp_armv8_vminnm_NNNule(float %b) {
; CHECK-LABEL: fp_armv8_vminnm_NNNule:
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr s0, .LCPI25_0
-; CHECK-NEXT: vmov s4, r0
-; CHECK-NEXT: vldr s2, .LCPI25_1
-; CHECK-NEXT: vminnm.f32 s0, s4, s0
-; CHECK-NEXT: vcmp.f32 s2, s0
+; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: vldr s4, .LCPI25_1
+; CHECK-NEXT: vcmp.f32 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vselge.f32 s0, s0, s2
+; CHECK-NEXT: vselgt.f32 s0, s2, s0
+; CHECK-NEXT: vcmp.f32 s0, s4
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s4, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 2
@@ -464,7 +476,9 @@ define float @fp_armv8_vminnm_NNNu_rev(float %b) {
; CHECK-NEXT: vcmp.f32 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f32 s0, s0, s2
-; CHECK-NEXT: vminnm.f32 s0, s0, s4
+; CHECK-NEXT: vcmp.f32 s4, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f32 s0, s0, s4
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 2
@@ -489,7 +503,9 @@ define double @fp_armv8_vminnm_NNNuge_rev(double %b) {
; CHECK-NEXT: vcmp.f64 d17, d16
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselgt.f64 d16, d16, d17
-; CHECK-NEXT: vminnm.f64 d16, d16, d18
+; CHECK-NEXT: vcmp.f64 d18, d16
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f64 d16, d16, d18
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 3
@@ -511,12 +527,14 @@ define float @fp_armv8_vmaxnm_NNNo(float %a) {
; CHECK-LABEL: fp_armv8_vmaxnm_NNNo:
; CHECK: @ %bb.0:
; CHECK-NEXT: vmov.f32 s0, #1.200000e+01
-; CHECK-NEXT: vldr s2, .LCPI28_0
-; CHECK-NEXT: vmov s4, r0
-; CHECK-NEXT: vmaxnm.f32 s0, s4, s0
+; CHECK-NEXT: vldr s4, .LCPI28_0
+; CHECK-NEXT: vmov s2, r0
; CHECK-NEXT: vcmp.f32 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselgt.f32 s0, s2, s0
+; CHECK-NEXT: vcmp.f32 s4, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s4, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 2
@@ -534,12 +552,14 @@ define float @fp_armv8_vmaxnm_NNNoge(float %a) {
; CHECK-LABEL: fp_armv8_vmaxnm_NNNoge:
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr s0, .LCPI29_0
-; CHECK-NEXT: vmov s4, r0
-; CHECK-NEXT: vldr s2, .LCPI29_1
-; CHECK-NEXT: vmaxnm.f32 s0, s4, s0
+; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: vldr s4, .LCPI29_1
; CHECK-NEXT: vcmp.f32 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f32 s0, s2, s0
+; CHECK-NEXT: vcmp.f32 s4, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f32 s0, s4, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 2
@@ -564,7 +584,9 @@ define float @fp_armv8_vmaxnm_NNNo_rev(float %a) {
; CHECK-NEXT: vcmp.f32 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselgt.f32 s0, s0, s2
-; CHECK-NEXT: vmaxnm.f32 s0, s0, s4
+; CHECK-NEXT: vcmp.f32 s0, s4
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s0, s4
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 2
@@ -589,7 +611,9 @@ define float @fp_armv8_vmaxnm_NNNole_rev(float %a) {
; CHECK-NEXT: vcmp.f32 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f32 s0, s0, s2
-; CHECK-NEXT: vmaxnm.f32 s0, s0, s4
+; CHECK-NEXT: vcmp.f32 s0, s4
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f32 s0, s0, s4
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 2
@@ -609,12 +633,14 @@ define float @fp_armv8_vmaxnm_NNNu(float %b) {
; CHECK-LABEL: fp_armv8_vmaxnm_NNNu:
; CHECK: @ %bb.0:
; CHECK-NEXT: vmov.f32 s0, #1.200000e+01
-; CHECK-NEXT: vldr s2, .LCPI32_0
-; CHECK-NEXT: vmov s4, r0
-; CHECK-NEXT: vmaxnm.f32 s0, s4, s0
-; CHECK-NEXT: vcmp.f32 s0, s2
+; CHECK-NEXT: vldr s4, .LCPI32_0
+; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: vcmp.f32 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vselgt.f32 s0, s0, s2
+; CHECK-NEXT: vselge.f32 s0, s2, s0
+; CHECK-NEXT: vcmp.f32 s4, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f32 s0, s4, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 2
@@ -632,12 +658,14 @@ define float...
[truncated]
|
|
@llvm/pr-subscribers-backend-aarch64 Author: Nikita Popov (nikic) Changesminnum/maxnum don't have the correct sNaN semantics, we must convert to minimumnum/maximumnum instead. To avoid an NVPTX regression, make it handle fmaximmumnum in one TableGen pattern. This is intended as a targeted fix for the miscompile, as the complete removal of this transform (#93575) appears to be blocked. Fixes #176624. Patch is 37.19 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/184590.diff 11 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 3e6fdd7bbf9fe..6ed4cd04d06f1 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3878,7 +3878,10 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
switch (SPR.NaNBehavior) {
case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
case SPNB_RETURNS_NAN: break;
- case SPNB_RETURNS_OTHER: Opc = ISD::FMINNUM; break;
+ case SPNB_RETURNS_OTHER:
+ Opc = ISD::FMINIMUMNUM;
+ Flags.setNoSignedZeros(true);
+ break;
case SPNB_RETURNS_ANY:
if (TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT) ||
(UseScalarMinMax &&
@@ -3891,7 +3894,10 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
switch (SPR.NaNBehavior) {
case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
case SPNB_RETURNS_NAN: break;
- case SPNB_RETURNS_OTHER: Opc = ISD::FMAXNUM; break;
+ case SPNB_RETURNS_OTHER:
+ Opc = ISD::FMAXIMUMNUM;
+ Flags.setNoSignedZeros(true);
+ break;
case SPNB_RETURNS_ANY:
if (TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT) ||
(UseScalarMinMax &&
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index 096c5e470ed02..5019d76039a4d 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -2553,10 +2553,10 @@ def NVPTX_fma_oneuse_and_nnan : PatFrag<(ops node:$a, node:$b, node:$c),
return N->hasOneUse() &&
(N->getFlags().hasNoNaNs() || TM.Options.NoNaNsFPMath);
}]>;
-// fmaxnum will differentiate between signed and unsigned zeros soon, so this
-// PatFrag is for a fmaxnum node with nsz
-def NVPTX_fmaxnum_nsz : PatFrag<(ops node:$a, node:$b),
- (fmaxnum node:$a, node:$b), [{
+// fmaxnum/fmaximumnum differentiate between signed and unsigned zeros, so this
+// PatFrag is for a fmaxnum/fmaximumnum node with nsz
+def NVPTX_fmaxnum_or_fmaximumnum_nsz : PatFrag<(ops node:$a, node:$b),
+ (fmaxnum_or_fmaximumnum node:$a, node:$b), [{
return N->getFlags().hasNoSignedZeros();
}]>;
@@ -2564,7 +2564,7 @@ class FMARELUInst<RegTyInfo t, bit allow_ftz, PatFrag zero_pat>
: BasicFlagsNVPTXInst<(outs t.RC:$dst), (ins t.RC:$a, t.RC:$b, t.RC:$c),
!if(allow_ftz, (ins FTZFlag:$ftz), (ins)),
"fma.rn" # !if(allow_ftz, "$ftz", "") # ".relu." # t.PtxType,
- [(set t.Ty:$dst, (NVPTX_fmaxnum_nsz (NVPTX_fma_oneuse_and_nnan t.Ty:$a, t.Ty:$b, t.Ty:$c), zero_pat))]>;
+ [(set t.Ty:$dst, (NVPTX_fmaxnum_or_fmaximumnum_nsz (NVPTX_fma_oneuse_and_nnan t.Ty:$a, t.Ty:$b, t.Ty:$c), zero_pat))]>;
let Predicates = [useFP16Math, hasPTX<70>, hasSM<80>] in {
def FMARELU_F16 : FMARELUInst<F16RT, true, fpimm_0>;
diff --git a/llvm/test/CodeGen/AArch64/arm64-fmax-safe.ll b/llvm/test/CodeGen/AArch64/arm64-fmax-safe.ll
index 159075db0f7bc..4e3fac3650e98 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fmax-safe.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fmax-safe.ll
@@ -31,12 +31,14 @@ define double @test_cross(float %in) {
}
; Same as previous, but with ordered comparison;
-; must become fminnm, not fmin.
+; Should not become either fminnm or fmin, because neither have the correct
+; behavior for sNaN.
define double @test_cross_fail_nan(float %in) {
; CHECK-LABEL: test_cross_fail_nan:
; CHECK: // %bb.0:
; CHECK-NEXT: movi d1, #0000000000000000
-; CHECK-NEXT: fminnm s0, s0, s1
+; CHECK-NEXT: fcmp s0, #0.0
+; CHECK-NEXT: fcsel s0, s0, s1, mi
; CHECK-NEXT: fcvt d0, s0
; CHECK-NEXT: ret
%cmp = fcmp olt float %in, 0.000000e+00
diff --git a/llvm/test/CodeGen/ARM/fp16-vminmaxnm-safe.ll b/llvm/test/CodeGen/ARM/fp16-vminmaxnm-safe.ll
index 2185bd8a2a138..9e4685f2081df 100644
--- a/llvm/test/CodeGen/ARM/fp16-vminmaxnm-safe.ll
+++ b/llvm/test/CodeGen/ARM/fp16-vminmaxnm-safe.ll
@@ -201,7 +201,9 @@ define half @fp16_vminnm_NNNo(half %a) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.f16 s0, r0
; CHECK-NEXT: vmov.f16 s2, #1.200000e+01
-; CHECK-NEXT: vminnm.f16 s0, s0, s2
+; CHECK-NEXT: vcmp.f16 s2, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f16 s0, s0, s2
; CHECK-NEXT: vldr.16 s2, .LCPI12_0
; CHECK-NEXT: vcmp.f16 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
@@ -229,7 +231,9 @@ define half @fp16_vminnm_NNNo_rev(half %a) {
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselgt.f16 s0, s2, s0
; CHECK-NEXT: vldr.16 s2, .LCPI13_1
-; CHECK-NEXT: vminnm.f16 s0, s0, s2
+; CHECK-NEXT: vcmp.f16 s2, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f16 s0, s0, s2
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
@@ -251,11 +255,13 @@ define half @fp16_vminnm_NNNu(half %b) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.f16 s0, r0
; CHECK-NEXT: vmov.f16 s2, #1.200000e+01
-; CHECK-NEXT: vminnm.f16 s0, s0, s2
-; CHECK-NEXT: vldr.16 s2, .LCPI14_0
; CHECK-NEXT: vcmp.f16 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vselgt.f16 s0, s0, s2
+; CHECK-NEXT: vselge.f16 s0, s0, s2
+; CHECK-NEXT: vldr.16 s2, .LCPI14_0
+; CHECK-NEXT: vcmp.f16 s0, s2
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f16 s0, s2, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
@@ -275,11 +281,13 @@ define half @fp16_vminnm_NNNule(half %b) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldr.16 s2, .LCPI15_0
; CHECK-NEXT: vmov.f16 s0, r0
-; CHECK-NEXT: vminnm.f16 s0, s0, s2
-; CHECK-NEXT: vldr.16 s2, .LCPI15_1
; CHECK-NEXT: vcmp.f16 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vselge.f16 s0, s0, s2
+; CHECK-NEXT: vselgt.f16 s0, s0, s2
+; CHECK-NEXT: vldr.16 s2, .LCPI15_1
+; CHECK-NEXT: vcmp.f16 s0, s2
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f16 s0, s2, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
@@ -306,7 +314,9 @@ define half @fp16_vminnm_NNNu_rev(half %b) {
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f16 s0, s2, s0
; CHECK-NEXT: vldr.16 s2, .LCPI16_1
-; CHECK-NEXT: vminnm.f16 s0, s0, s2
+; CHECK-NEXT: vcmp.f16 s2, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f16 s0, s0, s2
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
@@ -330,7 +340,9 @@ define half @fp16_vmaxnm_NNNo(half %a) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.f16 s0, r0
; CHECK-NEXT: vmov.f16 s2, #1.200000e+01
-; CHECK-NEXT: vmaxnm.f16 s0, s0, s2
+; CHECK-NEXT: vcmp.f16 s0, s2
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f16 s0, s0, s2
; CHECK-NEXT: vldr.16 s2, .LCPI17_0
; CHECK-NEXT: vcmp.f16 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
@@ -354,7 +366,9 @@ define half @fp16_vmaxnm_NNNoge(half %a) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldr.16 s2, .LCPI18_0
; CHECK-NEXT: vmov.f16 s0, r0
-; CHECK-NEXT: vmaxnm.f16 s0, s0, s2
+; CHECK-NEXT: vcmp.f16 s0, s2
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f16 s0, s0, s2
; CHECK-NEXT: vldr.16 s2, .LCPI18_1
; CHECK-NEXT: vcmp.f16 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
@@ -384,7 +398,9 @@ define half @fp16_vmaxnm_NNNo_rev(half %a) {
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselgt.f16 s0, s2, s0
; CHECK-NEXT: vldr.16 s2, .LCPI19_1
-; CHECK-NEXT: vmaxnm.f16 s0, s0, s2
+; CHECK-NEXT: vcmp.f16 s0, s2
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f16 s0, s0, s2
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
@@ -410,7 +426,9 @@ define half @fp16_vmaxnm_NNNole_rev(half %a) {
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f16 s0, s2, s0
; CHECK-NEXT: vldr.16 s2, .LCPI20_1
-; CHECK-NEXT: vmaxnm.f16 s0, s0, s2
+; CHECK-NEXT: vcmp.f16 s0, s2
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f16 s0, s0, s2
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
@@ -432,11 +450,13 @@ define half @fp16_vmaxnm_NNNu(half %b) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.f16 s0, r0
; CHECK-NEXT: vmov.f16 s2, #1.200000e+01
-; CHECK-NEXT: vmaxnm.f16 s0, s0, s2
-; CHECK-NEXT: vldr.16 s2, .LCPI21_0
; CHECK-NEXT: vcmp.f16 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vselgt.f16 s0, s0, s2
+; CHECK-NEXT: vselge.f16 s0, s0, s2
+; CHECK-NEXT: vldr.16 s2, .LCPI21_0
+; CHECK-NEXT: vcmp.f16 s2, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f16 s0, s2, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
@@ -456,11 +476,13 @@ define half @fp16_vmaxnm_NNNuge(half %b) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldr.16 s2, .LCPI22_0
; CHECK-NEXT: vmov.f16 s0, r0
-; CHECK-NEXT: vmaxnm.f16 s0, s0, s2
-; CHECK-NEXT: vldr.16 s2, .LCPI22_1
; CHECK-NEXT: vcmp.f16 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vselge.f16 s0, s0, s2
+; CHECK-NEXT: vselgt.f16 s0, s0, s2
+; CHECK-NEXT: vldr.16 s2, .LCPI22_1
+; CHECK-NEXT: vcmp.f16 s2, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f16 s0, s2, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
@@ -480,12 +502,14 @@ entry:
define half @fp16_vminmaxnm_neg0(half %a) {
; CHECK-LABEL: fp16_vminmaxnm_neg0:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldr.16 s0, .LCPI23_0
-; CHECK-NEXT: vmov.f16 s2, r0
-; CHECK-NEXT: vminnm.f16 s2, s2, s0
+; CHECK-NEXT: vldr.16 s2, .LCPI23_0
+; CHECK-NEXT: vmov.f16 s0, r0
; CHECK-NEXT: vcmp.f16 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vselgt.f16 s0, s2, s0
+; CHECK-NEXT: vselgt.f16 s0, s0, s2
+; CHECK-NEXT: vcmp.f16 s2, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f16 s0, s2, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
@@ -508,7 +532,9 @@ define half @fp16_vminmaxnm_e_0(half %a) {
; CHECK-NEXT: vcmp.f16 s0, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f16 s0, s2, s0
-; CHECK-NEXT: vmaxnm.f16 s0, s0, s2
+; CHECK-NEXT: vcmp.f16 s0, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f16 s0, s0, s2
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
@@ -526,12 +552,14 @@ entry:
define half @fp16_vminmaxnm_e_neg0(half %a) {
; CHECK-LABEL: fp16_vminmaxnm_e_neg0:
; CHECK: @ %bb.0: @ %entry
-; CHECK-NEXT: vldr.16 s0, .LCPI25_0
-; CHECK-NEXT: vmov.f16 s2, r0
-; CHECK-NEXT: vminnm.f16 s2, s2, s0
-; CHECK-NEXT: vcmp.f16 s0, s2
+; CHECK-NEXT: vldr.16 s2, .LCPI25_0
+; CHECK-NEXT: vmov.f16 s0, r0
+; CHECK-NEXT: vcmp.f16 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vselge.f16 s0, s0, s2
+; CHECK-NEXT: vselgt.f16 s0, s0, s2
+; CHECK-NEXT: vcmp.f16 s2, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f16 s0, s2, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 1
diff --git a/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll b/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll
index 8564d7d9996d3..01cb3ba404816 100644
--- a/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll
+++ b/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll
@@ -1300,9 +1300,9 @@ define void @pr65820(ptr %y, <4 x float> %splat) {
; ARMV8-LABEL: pr65820:
; ARMV8: @ %bb.0: @ %entry
; ARMV8-NEXT: vmov d16, r2, r3
-; ARMV8-NEXT: vmov.i32 q9, #0x0
; ARMV8-NEXT: vdup.32 q8, d16[0]
-; ARMV8-NEXT: vmaxnm.f32 q8, q8, q9
+; ARMV8-NEXT: vcgt.f32 q9, q8, #0
+; ARMV8-NEXT: vand q8, q9, q8
; ARMV8-NEXT: vst1.32 {d16, d17}, [r0]
; ARMV8-NEXT: bx lr
;
@@ -1312,7 +1312,8 @@ define void @pr65820(ptr %y, <4 x float> %splat) {
; ARMV8M-NEXT: vmov r1, s0
; ARMV8M-NEXT: vmov.i32 q0, #0x0
; ARMV8M-NEXT: vdup.32 q1, r1
-; ARMV8M-NEXT: vmaxnm.f32 q0, q1, q0
+; ARMV8M-NEXT: vcmp.f32 gt, q1, zr
+; ARMV8M-NEXT: vdupt.32 q0, r1
; ARMV8M-NEXT: vstrw.32 q0, [r0]
; ARMV8M-NEXT: bx lr
entry:
diff --git a/llvm/test/CodeGen/ARM/vminmaxnm-safe.ll b/llvm/test/CodeGen/ARM/vminmaxnm-safe.ll
index 9d0fef6452a38..04c11c35f99e5 100644
--- a/llvm/test/CodeGen/ARM/vminmaxnm-safe.ll
+++ b/llvm/test/CodeGen/ARM/vminmaxnm-safe.ll
@@ -309,12 +309,14 @@ define float @fp_armv8_vminnm_NNNo(float %a) {
; CHECK-LABEL: fp_armv8_vminnm_NNNo:
; CHECK: @ %bb.0:
; CHECK-NEXT: vmov.f32 s0, #1.200000e+01
-; CHECK-NEXT: vldr s2, .LCPI20_0
-; CHECK-NEXT: vmov s4, r0
-; CHECK-NEXT: vminnm.f32 s0, s4, s0
+; CHECK-NEXT: vldr s4, .LCPI20_0
+; CHECK-NEXT: vmov s2, r0
; CHECK-NEXT: vcmp.f32 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselgt.f32 s0, s2, s0
+; CHECK-NEXT: vcmp.f32 s0, s4
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s4, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 2
@@ -331,13 +333,15 @@ define float @fp_armv8_vminnm_NNNo(float %a) {
define double @fp_armv8_vminnm_NNNole(double %a) {
; CHECK-LABEL: fp_armv8_vminnm_NNNole:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vldr d16, .LCPI21_0
-; CHECK-NEXT: vmov d18, r0, r1
-; CHECK-NEXT: vldr d17, .LCPI21_1
-; CHECK-NEXT: vminnm.f64 d16, d18, d16
-; CHECK-NEXT: vcmp.f64 d16, d17
+; CHECK-NEXT: vmov d16, r0, r1
+; CHECK-NEXT: vldr d17, .LCPI21_0
+; CHECK-NEXT: vldr d18, .LCPI21_1
+; CHECK-NEXT: vcmp.f64 d17, d16
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vselge.f64 d16, d17, d16
+; CHECK-NEXT: vselge.f64 d16, d16, d17
+; CHECK-NEXT: vcmp.f64 d16, d18
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f64 d16, d18, d16
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 3
@@ -364,7 +368,9 @@ define float @fp_armv8_vminnm_NNNo_rev(float %a) {
; CHECK-NEXT: vcmp.f32 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselgt.f32 s0, s0, s2
-; CHECK-NEXT: vminnm.f32 s0, s0, s4
+; CHECK-NEXT: vcmp.f32 s4, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s0, s4
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 2
@@ -389,7 +395,9 @@ define double @fp_armv8_vminnm_NNNoge_rev(double %a) {
; CHECK-NEXT: vcmp.f64 d17, d16
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f64 d16, d16, d17
-; CHECK-NEXT: vminnm.f64 d16, d16, d18
+; CHECK-NEXT: vcmp.f64 d18, d16
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f64 d16, d16, d18
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 3
@@ -411,12 +419,14 @@ define float @fp_armv8_vminnm_NNNu(float %b) {
; CHECK-LABEL: fp_armv8_vminnm_NNNu:
; CHECK: @ %bb.0:
; CHECK-NEXT: vmov.f32 s0, #1.200000e+01
-; CHECK-NEXT: vldr s2, .LCPI24_0
-; CHECK-NEXT: vmov s4, r0
-; CHECK-NEXT: vminnm.f32 s0, s4, s0
-; CHECK-NEXT: vcmp.f32 s2, s0
+; CHECK-NEXT: vldr s4, .LCPI24_0
+; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: vcmp.f32 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vselgt.f32 s0, s0, s2
+; CHECK-NEXT: vselge.f32 s0, s2, s0
+; CHECK-NEXT: vcmp.f32 s0, s4
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f32 s0, s4, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 2
@@ -434,12 +444,14 @@ define float @fp_armv8_vminnm_NNNule(float %b) {
; CHECK-LABEL: fp_armv8_vminnm_NNNule:
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr s0, .LCPI25_0
-; CHECK-NEXT: vmov s4, r0
-; CHECK-NEXT: vldr s2, .LCPI25_1
-; CHECK-NEXT: vminnm.f32 s0, s4, s0
-; CHECK-NEXT: vcmp.f32 s2, s0
+; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: vldr s4, .LCPI25_1
+; CHECK-NEXT: vcmp.f32 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vselge.f32 s0, s0, s2
+; CHECK-NEXT: vselgt.f32 s0, s2, s0
+; CHECK-NEXT: vcmp.f32 s0, s4
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s4, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 2
@@ -464,7 +476,9 @@ define float @fp_armv8_vminnm_NNNu_rev(float %b) {
; CHECK-NEXT: vcmp.f32 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f32 s0, s0, s2
-; CHECK-NEXT: vminnm.f32 s0, s0, s4
+; CHECK-NEXT: vcmp.f32 s4, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f32 s0, s0, s4
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 2
@@ -489,7 +503,9 @@ define double @fp_armv8_vminnm_NNNuge_rev(double %b) {
; CHECK-NEXT: vcmp.f64 d17, d16
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselgt.f64 d16, d16, d17
-; CHECK-NEXT: vminnm.f64 d16, d16, d18
+; CHECK-NEXT: vcmp.f64 d18, d16
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f64 d16, d16, d18
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 3
@@ -511,12 +527,14 @@ define float @fp_armv8_vmaxnm_NNNo(float %a) {
; CHECK-LABEL: fp_armv8_vmaxnm_NNNo:
; CHECK: @ %bb.0:
; CHECK-NEXT: vmov.f32 s0, #1.200000e+01
-; CHECK-NEXT: vldr s2, .LCPI28_0
-; CHECK-NEXT: vmov s4, r0
-; CHECK-NEXT: vmaxnm.f32 s0, s4, s0
+; CHECK-NEXT: vldr s4, .LCPI28_0
+; CHECK-NEXT: vmov s2, r0
; CHECK-NEXT: vcmp.f32 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselgt.f32 s0, s2, s0
+; CHECK-NEXT: vcmp.f32 s4, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s4, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 2
@@ -534,12 +552,14 @@ define float @fp_armv8_vmaxnm_NNNoge(float %a) {
; CHECK-LABEL: fp_armv8_vmaxnm_NNNoge:
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr s0, .LCPI29_0
-; CHECK-NEXT: vmov s4, r0
-; CHECK-NEXT: vldr s2, .LCPI29_1
-; CHECK-NEXT: vmaxnm.f32 s0, s4, s0
+; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: vldr s4, .LCPI29_1
; CHECK-NEXT: vcmp.f32 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f32 s0, s2, s0
+; CHECK-NEXT: vcmp.f32 s4, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f32 s0, s4, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 2
@@ -564,7 +584,9 @@ define float @fp_armv8_vmaxnm_NNNo_rev(float %a) {
; CHECK-NEXT: vcmp.f32 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselgt.f32 s0, s0, s2
-; CHECK-NEXT: vmaxnm.f32 s0, s0, s4
+; CHECK-NEXT: vcmp.f32 s0, s4
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselgt.f32 s0, s0, s4
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 2
@@ -589,7 +611,9 @@ define float @fp_armv8_vmaxnm_NNNole_rev(float %a) {
; CHECK-NEXT: vcmp.f32 s0, s2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vselge.f32 s0, s0, s2
-; CHECK-NEXT: vmaxnm.f32 s0, s0, s4
+; CHECK-NEXT: vcmp.f32 s0, s4
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f32 s0, s0, s4
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 2
@@ -609,12 +633,14 @@ define float @fp_armv8_vmaxnm_NNNu(float %b) {
; CHECK-LABEL: fp_armv8_vmaxnm_NNNu:
; CHECK: @ %bb.0:
; CHECK-NEXT: vmov.f32 s0, #1.200000e+01
-; CHECK-NEXT: vldr s2, .LCPI32_0
-; CHECK-NEXT: vmov s4, r0
-; CHECK-NEXT: vmaxnm.f32 s0, s4, s0
-; CHECK-NEXT: vcmp.f32 s0, s2
+; CHECK-NEXT: vldr s4, .LCPI32_0
+; CHECK-NEXT: vmov s2, r0
+; CHECK-NEXT: vcmp.f32 s2, s0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vselgt.f32 s0, s0, s2
+; CHECK-NEXT: vselge.f32 s0, s2, s0
+; CHECK-NEXT: vcmp.f32 s4, s0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vselge.f32 s0, s4, s0
; CHECK-NEXT: vmov r0, s0
; CHECK-NEXT: bx lr
; CHECK-NEXT: .p2align 2
@@ -632,12 +658,14 @@ define float...
[truncated]
|
minnum/maxnum don't have the correct sNaN semantics, we must convert to minimumnum/maximumnum instead. To avoid an NVPTX regression, make it handle fmaximmumnum in one TableGen pattern.
645bb38 to
3360021
Compare
Even that can still alter NaN payloads as per the usual NaN rules, whereas select exactly returns one of the operands, doesn't it? So this still seems like an incorrect transform? |
Yes, that's correct. The goal should still be to remove this transform entirely (#93575), but it will need more work. |
minnum/maxnum don't have the correct sNaN semantics, we must convert to minimumnum/maximumnum instead.
To avoid an NVPTX regression, make it handle fmaximmumnum in one TableGen pattern.
This is intended as a targeted fix for the miscompile, as the complete removal of this transform (#93575) appears to be blocked.
Fixes #176624.