[SDAG] Drop select -> fmax/min folding in SelectionDAGBuilder#93575
[SDAG] Drop select -> fmax/min folding in SelectionDAGBuilder#93575
Conversation
|
@llvm/pr-subscribers-backend-risc-v @llvm/pr-subscribers-llvm-selectiondag Author: Yingwei Zheng (dtcxzyw) ChangesAs we already handle this pattern in InstCombine/DAGCombiner, I think it is ok to drop this folding in SelectionDAGBuilder to handle signed zero correctly. Fixes #93414. 6 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index ca352da5d36eb..799f748fb786e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3725,32 +3725,8 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
case SPF_UMAX: Opc = ISD::UMAX; break;
case SPF_UMIN: Opc = ISD::UMIN; break;
case SPF_SMAX: Opc = ISD::SMAX; break;
- case SPF_SMIN: Opc = ISD::SMIN; break;
- case SPF_FMINNUM:
- switch (SPR.NaNBehavior) {
- case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
- case SPNB_RETURNS_NAN: break;
- case SPNB_RETURNS_OTHER: Opc = ISD::FMINNUM; break;
- case SPNB_RETURNS_ANY:
- if (TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT) ||
- (UseScalarMinMax &&
- TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT.getScalarType())))
- Opc = ISD::FMINNUM;
- break;
- }
- break;
- case SPF_FMAXNUM:
- switch (SPR.NaNBehavior) {
- case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
- case SPNB_RETURNS_NAN: break;
- case SPNB_RETURNS_OTHER: Opc = ISD::FMAXNUM; break;
- case SPNB_RETURNS_ANY:
- if (TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT) ||
- (UseScalarMinMax &&
- TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT.getScalarType())))
- Opc = ISD::FMAXNUM;
- break;
- }
+ case SPF_SMIN:
+ Opc = ISD::SMIN;
break;
case SPF_NABS:
Negate = true;
diff --git a/llvm/test/CodeGen/AArch64/arm64-fmax-safe.ll b/llvm/test/CodeGen/AArch64/arm64-fmax-safe.ll
index 550e89f4a27f9..b96c3ffbd52a8 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fmax-safe.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fmax-safe.ll
@@ -23,7 +23,7 @@ define double @test_cross(float %in) {
}
; Same as previous, but with ordered comparison;
-; must become fminnm, not fmin.
+; must become fcmp + fcsel, not fmin/fminnm.
define double @test_cross_fail_nan(float %in) {
; CHECK-LABEL: test_cross_fail_nan:
%cmp = fcmp olt float %in, 0.000000e+00
@@ -31,7 +31,8 @@ define double @test_cross_fail_nan(float %in) {
%longer = fpext float %val to double
ret double %longer
-; CHECK: fminnm s
+; CHECK: fcmp
+; CHECK: fcsel
}
; This isn't a min or a max, but passes the first condition for swapping the
diff --git a/llvm/test/CodeGen/AArch64/arm64-fmax.ll b/llvm/test/CodeGen/AArch64/arm64-fmax.ll
index d7d54a6e48a92..b2fd4821cc6eb 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fmax.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fmax.ll
@@ -5,7 +5,8 @@ define double @test_direct(float %in) {
; CHECK-LABEL: test_direct:
; CHECK: // %bb.0:
; CHECK-NEXT: movi d1, #0000000000000000
-; CHECK-NEXT: fmaxnm s0, s0, s1
+; CHECK-NEXT: fcmp s0, #0.0
+; CHECK-NEXT: fcsel s0, s1, s0, lt
; CHECK-NEXT: fcvt d0, s0
; CHECK-NEXT: ret
%cmp = fcmp nnan olt float %in, 0.000000e+00
@@ -18,7 +19,8 @@ define double @test_cross(float %in) {
; CHECK-LABEL: test_cross:
; CHECK: // %bb.0:
; CHECK-NEXT: movi d1, #0000000000000000
-; CHECK-NEXT: fminnm s0, s0, s1
+; CHECK-NEXT: fcmp s0, #0.0
+; CHECK-NEXT: fcsel s0, s0, s1, lt
; CHECK-NEXT: fcvt d0, s0
; CHECK-NEXT: ret
%cmp = fcmp nnan ult float %in, 0.000000e+00
@@ -33,7 +35,8 @@ define double @test_cross_fail_nan(float %in) {
; CHECK-LABEL: test_cross_fail_nan:
; CHECK: // %bb.0:
; CHECK-NEXT: movi d1, #0000000000000000
-; CHECK-NEXT: fminnm s0, s0, s1
+; CHECK-NEXT: fcmp s0, #0.0
+; CHECK-NEXT: fcsel s0, s0, s1, lt
; CHECK-NEXT: fcvt d0, s0
; CHECK-NEXT: ret
%cmp = fcmp nnan olt float %in, 0.000000e+00
diff --git a/llvm/test/CodeGen/AArch64/select_fmf.ll b/llvm/test/CodeGen/AArch64/select_fmf.ll
index 92d8676ca04be..938217180676f 100644
--- a/llvm/test/CodeGen/AArch64/select_fmf.ll
+++ b/llvm/test/CodeGen/AArch64/select_fmf.ll
@@ -7,13 +7,14 @@
define float @select_select_fold_select_and(float %w, float %x, float %y, float %z) {
; CHECK-LABEL: select_select_fold_select_and:
; CHECK: // %bb.0:
-; CHECK-NEXT: fminnm s4, s1, s2
+; CHECK-NEXT: fcmp s0, s3
+; CHECK-NEXT: fcsel s4, s0, s3, gt
; CHECK-NEXT: fcmp s1, s2
-; CHECK-NEXT: fmaxnm s2, s0, s3
-; CHECK-NEXT: fmov s1, #0.50000000
-; CHECK-NEXT: fccmp s4, s0, #4, lt
-; CHECK-NEXT: fadd s1, s0, s1
-; CHECK-NEXT: fcsel s2, s2, s0, gt
+; CHECK-NEXT: fcsel s1, s1, s2, lt
+; CHECK-NEXT: fmov s2, #0.50000000
+; CHECK-NEXT: fccmp s1, s0, #4, lt
+; CHECK-NEXT: fadd s1, s0, s2
+; CHECK-NEXT: fcsel s2, s4, s0, gt
; CHECK-NEXT: fadd s4, s1, s2
; CHECK-NEXT: fcmp s4, s1
; CHECK-NEXT: b.le .LBB0_2
@@ -65,13 +66,13 @@ exit: ; preds = %if.end.i159.i.i, %if.then.i
define float @select_select_fold_select_or(float %w, float %x, float %y, float %z) {
; CHECK-LABEL: select_select_fold_select_or:
; CHECK: // %bb.0:
-; CHECK-NEXT: fminnm s4, s1, s2
; CHECK-NEXT: fcmp s1, s2
-; CHECK-NEXT: fmaxnm s2, s0, s3
-; CHECK-NEXT: fmov s1, #0.50000000
-; CHECK-NEXT: fccmp s4, s0, #0, ge
-; CHECK-NEXT: fadd s1, s0, s1
-; CHECK-NEXT: fcsel s2, s0, s2, gt
+; CHECK-NEXT: fcsel s1, s1, s2, lt
+; CHECK-NEXT: fccmp s0, s3, #0, ge
+; CHECK-NEXT: fmov s2, #0.50000000
+; CHECK-NEXT: fccmp s1, s0, #0, le
+; CHECK-NEXT: fadd s1, s0, s2
+; CHECK-NEXT: fcsel s2, s0, s3, gt
; CHECK-NEXT: fadd s4, s1, s2
; CHECK-NEXT: fcmp s4, s1
; CHECK-NEXT: b.le .LBB1_2
diff --git a/llvm/test/CodeGen/AArch64/sve-pred-selectop.ll b/llvm/test/CodeGen/AArch64/sve-pred-selectop.ll
index 8438e9d88f5de..6c3ff79f911bc 100644
--- a/llvm/test/CodeGen/AArch64/sve-pred-selectop.ll
+++ b/llvm/test/CodeGen/AArch64/sve-pred-selectop.ll
@@ -659,9 +659,10 @@ define <vscale x 4 x float> @fcmp_fast_olt_v4f32(<vscale x 4 x float> %z, <vscal
; CHECK-LABEL: fcmp_fast_olt_v4f32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, #0.0
-; CHECK-NEXT: fminnm z1.s, p0/m, z1.s, z2.s
-; CHECK-NEXT: mov z0.s, p1/m, z1.s
+; CHECK-NEXT: fcmgt p1.s, p0/z, z2.s, z1.s
+; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, #0.0
+; CHECK-NEXT: sel z1.s, p1, z1.s, z2.s
+; CHECK-NEXT: mov z0.s, p0/m, z1.s
; CHECK-NEXT: ret
entry:
%c = fcmp oeq <vscale x 4 x float> %z, zeroinitializer
@@ -675,9 +676,10 @@ define <vscale x 8 x half> @fcmp_fast_olt_v8f16(<vscale x 8 x half> %z, <vscale
; CHECK-LABEL: fcmp_fast_olt_v8f16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: fcmeq p1.h, p0/z, z0.h, #0.0
-; CHECK-NEXT: fminnm z1.h, p0/m, z1.h, z2.h
-; CHECK-NEXT: mov z0.h, p1/m, z1.h
+; CHECK-NEXT: fcmgt p1.h, p0/z, z2.h, z1.h
+; CHECK-NEXT: fcmeq p0.h, p0/z, z0.h, #0.0
+; CHECK-NEXT: sel z1.h, p1, z1.h, z2.h
+; CHECK-NEXT: mov z0.h, p0/m, z1.h
; CHECK-NEXT: ret
entry:
%c = fcmp oeq <vscale x 8 x half> %z, zeroinitializer
@@ -691,9 +693,10 @@ define <vscale x 4 x float> @fcmp_fast_ogt_v4f32(<vscale x 4 x float> %z, <vscal
; CHECK-LABEL: fcmp_fast_ogt_v4f32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, #0.0
-; CHECK-NEXT: fmaxnm z1.s, p0/m, z1.s, z2.s
-; CHECK-NEXT: mov z0.s, p1/m, z1.s
+; CHECK-NEXT: fcmgt p1.s, p0/z, z1.s, z2.s
+; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, #0.0
+; CHECK-NEXT: sel z1.s, p1, z1.s, z2.s
+; CHECK-NEXT: mov z0.s, p0/m, z1.s
; CHECK-NEXT: ret
entry:
%c = fcmp oeq <vscale x 4 x float> %z, zeroinitializer
@@ -707,9 +710,10 @@ define <vscale x 8 x half> @fcmp_fast_ogt_v8f16(<vscale x 8 x half> %z, <vscale
; CHECK-LABEL: fcmp_fast_ogt_v8f16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: fcmeq p1.h, p0/z, z0.h, #0.0
-; CHECK-NEXT: fmaxnm z1.h, p0/m, z1.h, z2.h
-; CHECK-NEXT: mov z0.h, p1/m, z1.h
+; CHECK-NEXT: fcmgt p1.h, p0/z, z1.h, z2.h
+; CHECK-NEXT: fcmeq p0.h, p0/z, z0.h, #0.0
+; CHECK-NEXT: sel z1.h, p1, z1.h, z2.h
+; CHECK-NEXT: mov z0.h, p0/m, z1.h
; CHECK-NEXT: ret
entry:
%c = fcmp oeq <vscale x 8 x half> %z, zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/float-select-fcmp.ll b/llvm/test/CodeGen/RISCV/float-select-fcmp.ll
index a2ff0d33e2d31..9e8ffb0104340 100644
--- a/llvm/test/CodeGen/RISCV/float-select-fcmp.ll
+++ b/llvm/test/CodeGen/RISCV/float-select-fcmp.ll
@@ -451,3 +451,29 @@ define signext i32 @select_fcmp_uge_1_2(float %a, float %b) nounwind {
%2 = select i1 %1, i32 1, i32 2
ret i32 %2
}
+
+; Test from PR93414
+; Make sure that we don't use fmin.s here to handle signed zero correctly.
+define float @select_fcmp_olt_pos_zero(float %x) {
+; CHECK-LABEL: select_fcmp_olt_pos_zero:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.w.x fa5, zero
+; CHECK-NEXT: flt.s a0, fa0, fa5
+; CHECK-NEXT: bnez a0, .LBB20_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: fmv.s fa0, fa5
+; CHECK-NEXT: .LBB20_2:
+; CHECK-NEXT: ret
+;
+; CHECKZFINX-LABEL: select_fcmp_olt_pos_zero:
+; CHECKZFINX: # %bb.0:
+; CHECKZFINX-NEXT: flt.s a1, a0, zero
+; CHECKZFINX-NEXT: bnez a1, .LBB20_2
+; CHECKZFINX-NEXT: # %bb.1:
+; CHECKZFINX-NEXT: li a0, 0
+; CHECKZFINX-NEXT: .LBB20_2:
+; CHECKZFINX-NEXT: ret
+ %cmp = fcmp olt float %x, 0.000000
+ %sel = select i1 %cmp, float %x, float 0.000000
+ ret float %sel
+}
|
| if (TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT) || | ||
| (UseScalarMinMax && | ||
| TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT.getScalarType()))) | ||
| Opc = ISD::FMINNUM; |
There was a problem hiding this comment.
I just noticed we don't actually have the DAG combine to form nnan select -> minimum/maximum. Ideally we would implement that before dropping this
There was a problem hiding this comment.
I just noticed we don't actually have the DAG combine to form nnan select -> minimum/maximum. Ideally we would implement that before dropping this
Are you working on this?
There was a problem hiding this comment.
Not yet, I was considering it
There was a problem hiding this comment.
Are you going to work on this? If not I can probably pick this up
|
|
@arsenm Have all (potential) regressions been fixed? |
No. We at least should have the nnan cases to minimum/maximum combine handled |
These are only theoretical regressions and I don't think should hold this up. It would only be helpful for targets that 1. only have legal fminimum/fmaximum and no legal fmin/fmax, and also those patterns appear during legalization. I believe wasm is the only relevant target for 1 |
|
@arsenm All assertion failures are fixed. Can I merge this PR? |
| @@ -1051,7 +1054,10 @@ define amdgpu_kernel void @v_test_legacy_fmed3_r_i_i_f32(ptr addrspace(1) %out, | |||
| ; GFX9-SDAG-NEXT: global_load_dword v1, v0, s[2:3] | |||
| ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) | |||
| ; GFX9-SDAG-NEXT: v_add_f32_e32 v1, 1.0, v1 | |||
| ; GFX9-SDAG-NEXT: v_med3_f32 v1, v1, 2.0, 4.0 | |||
| ; GFX9-SDAG-NEXT: v_cmp_lt_f32_e32 vcc, 2.0, v1 | |||
| ; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v1, 2.0, v1, vcc | |||
There was a problem hiding this comment.
This regression actually doesn't look good. I need to look at this, but running instcombine on the test first doesn't recover the v_med3_f32
You can test this locally with the following command:git-clang-format --diff origin/main HEAD --extensions cpp -- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp --diff_from_common_commit
View the diff from clang-format here.diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 93c46b822..aebc2b8db 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3863,7 +3863,9 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
case SPF_UMAX: Opc = ISD::UMAX; break;
case SPF_UMIN: Opc = ISD::UMIN; break;
case SPF_SMAX: Opc = ISD::SMAX; break;
- case SPF_SMIN: Opc = ISD::SMIN; break;
+ case SPF_SMIN:
+ Opc = ISD::SMIN;
+ break;
case SPF_NABS:
Negate = true;
[[fallthrough]];
|
🪟 Windows x64 Test Results
Failed Tests(click on a test name to see its output) LLVMLLVM.CodeGen/NVPTX/fma-relu-contract.llLLVM.CodeGen/NVPTX/fma-relu-fma-intrinsic.llLLVM.CodeGen/NVPTX/fma-relu-instruction-flag.llLLVM.CodeGen/NVPTX/masked-load-3xhalf.llIf these failures are unrelated to your changes (for example tests are broken or flaky at HEAD), please open an issue at https://github.com/llvm/llvm-project/issues and add the |
🐧 Linux x64 Test Results
Failed Tests(click on a test name to see its output) LLVMLLVM.CodeGen/NVPTX/fma-relu-contract.llLLVM.CodeGen/NVPTX/fma-relu-fma-intrinsic.llLLVM.CodeGen/NVPTX/fma-relu-instruction-flag.llLLVM.CodeGen/NVPTX/masked-load-3xhalf.llIf these failures are unrelated to your changes (for example tests are broken or flaky at HEAD), please open an issue at https://github.com/llvm/llvm-project/issues and add the |
arsenm
left a comment
There was a problem hiding this comment.
I'm not sure the combiner work ever happened?
| ; CHECK-LABEL: minnum_select_nsz: | ||
| %cmp = fcmp nnan ole float %x, %y | ||
| %sel = select nsz i1 %cmp, float %x, float %y | ||
| %sel = select nnan nsz i1 %cmp, float %x, float %y |
There was a problem hiding this comment.
Why does this need to gain nnan? It should be inferrable from the compare?
There was a problem hiding this comment.
…184590) minnum/maxnum don't have the correct sNaN semantics, we must convert to minimumnum/maximumnum instead. To avoid an NVPTX regression, make it handle fmaximmumnum in one TableGen pattern. This is intended as a targeted fix for the miscompile, as the complete removal of this transform (#93575) appears to be blocked. Fixes #176624.
As we already handle this pattern in InstCombine/DAGCombiner, I think it is ok to drop this folding in SelectionDAGBuilder to handle signed zero correctly.
Note: GlobalISel still suffers from this issue.
Fixes #93414.