[X86][SelectionDAG] - Add support for llvm.canonicalize intrinsic #106370

pawan-nirpal-031 · 2024-08-28T11:14:52Z

Enable support for fcanonicalize intrinsic lowering.

github-actions · 2024-08-28T11:15:10Z

Thank you for submitting a Pull Request (PR) to the LLVM Project!

This PR will be automatically labeled and the relevant teams will be notified.

If you wish to, you can add reviewers by using the "Reviewers" section on this page.

If this is not working for you, it is probably because you do not have write permissions for the repository. In which case you can instead tag reviewers by name in a comment by using @ followed by their GitHub username.

If you have received no comments on your PR for a week, you can request a review by "ping"ing the PR by adding a comment “Ping”. The common courtesy "ping" rate is once a week. Please remember that you are asking for valuable time from other developers.

If you have further questions, they may be answered by the LLVM GitHub User Guide.

You can also ask questions in a comment on this PR, on the LLVM Discord or on the forums.

llvmbot · 2024-08-28T11:15:40Z

@llvm/pr-subscribers-llvm-selectiondag

@llvm/pr-subscribers-backend-x86

Author: Pawan Nirpal (pawan-nirpal-031)

Changes

Enable support for fcanonicalize intrinsic lowering.

Patch is 30.88 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/106370.diff

5 Files Affected:

(modified) llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp (+50)
(modified) llvm/lib/Target/X86/X86ISelDAGToDAG.cpp (+46)
(added) llvm/test/CodeGen/X86/canonicalize-constants.ll (+210)
(added) llvm/test/CodeGen/X86/canonicalize-subnormals.ll (+287)
(added) llvm/test/CodeGen/X86/canonicalize-vars.ll (+193)

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 74e3a898569bea..c1679b1002df5e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -1275,6 +1275,56 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
       }
     }
     break;
+    case ISD::FCANONICALIZE: {
+      const Triple &TT = DAG.getTarget().getTargetTriple();
+      if (TT.getArch() == Triple::x86 || TT.getArch() == Triple::x86_64) {
+        SDValue Operand = Node->getOperand(0);
+        SDLoc dl(Node);
+        EVT VT = Operand.getValueType();
+
+        if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Operand)) {
+          const APFloat &C = CFP->getValueAPF();
+          if (C.isDenormal()) {
+            DenormalMode Mode =
+                DAG.getMachineFunction().getDenormalMode(C.getSemantics());
+            assert((Mode != DenormalMode::getPositiveZero()) &&
+                   "Positive denormal mode is not valid for X86 target.");
+            if (Mode == DenormalMode::getPreserveSign()) {
+              SDValue SDZero =
+                  DAG.getConstantFP((C.isNegative() ? -0.0 : 0.0), dl, VT);
+              ConstantFPSDNode *ZeroConstFP = cast<ConstantFPSDNode>(SDZero);
+              SDValue CanonZeroFPLoad = ExpandConstantFP(ZeroConstFP, true);
+              DAG.ReplaceAllUsesWith(Node, CanonZeroFPLoad.getNode());
+              LLVM_DEBUG(dbgs()
+                         << "Legalized Denormal under mode PreserveSign\n");
+              return;
+            } else if (Mode == DenormalMode::getIEEE()) {
+              DAG.ReplaceAllUsesWith(Node, Operand.getNode());
+              LLVM_DEBUG(dbgs() << "Legalized Denormal under mode IEEE\n");
+              return;
+            }
+          } else if (C.isNaN() && C.isSignaling()) {
+            APFloat CanonicalQNaN = APFloat::getQNaN(C.getSemantics());
+            SDValue QuitNaN = DAG.getConstantFP(CanonicalQNaN, dl, VT);
+            ConstantFPSDNode *QNaNConstFP = cast<ConstantFPSDNode>(QuitNaN);
+            SDValue QNanLoad = ExpandConstantFP(QNaNConstFP, true);
+            DAG.ReplaceAllUsesWith(Node, QNanLoad.getNode());
+            LLVM_DEBUG(dbgs() << "Legalized Signaling NaN to Quiet NaN\n");
+            return;
+          }
+        } else if (Operand.isUndef()) {
+          APFloat CanonicalQNaN = APFloat::getQNaN(VT.getFltSemantics());
+          SDValue QuitNaN = DAG.getConstantFP(CanonicalQNaN, dl, VT);
+          ConstantFPSDNode *QNaNConstFP = cast<ConstantFPSDNode>(QuitNaN);
+          SDValue QNanLoad = ExpandConstantFP(QNaNConstFP, true);
+          DAG.ReplaceAllUsesWith(Node, QNanLoad.getNode());
+          LLVM_DEBUG(dbgs() << "Legalized Undef to Quiet NaN\n");
+          return;
+        }
+        break;
+      }
+      break;
+    }
     case ISD::FSHL:
     case ISD::FSHR:
     case ISD::SRL_PARTS:
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index d0a54ab8993c26..4bb8c9afd23edc 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -5271,6 +5271,52 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
     }
     break;
   }
+  case ISD::FCANONICALIZE: {
+    SDValue Operand = Node->getOperand(0);
+    EVT VT = Node->getValueType(0);
+
+    // Perform canonicalization for constants. Replace the operand by a load
+    // from constant pool for this constant. At this point subnoraml values like
+    // denormals, snans have been canonicalized so no need to deal with those
+    // cases.
+    if (LoadSDNode *Load = dyn_cast<LoadSDNode>(Operand)) {
+      const X86TargetLowering *X86Lowering =
+          static_cast<const X86TargetLowering *>(TLI);
+      if (const Constant *CV = X86Lowering->getTargetConstantFromLoad(Load)) {
+        const ConstantFP *CFP = dyn_cast<ConstantFP>(CV);
+        if (CFP) {
+          ReplaceNode(Node, Load);
+          return;
+        }
+      }
+    }
+
+    // Canonicalize normal non-constant/non-undef FP Nodes.
+    SDValue MulNode;
+    SDValue One;
+    if (VT == MVT::f32 || VT == MVT::f64) {
+      One = CurDAG->getConstantFP(1.0f, dl, VT);
+    } else if (VT == MVT::f80) {
+      APFloat Val = APFloat::getOne(APFloat::x87DoubleExtended());
+      One = CurDAG->getConstantFP(Val, dl, VT);
+    } else if (VT == MVT::f16) {
+      APFloat Val(APFloat::IEEEhalf(), "1.0");
+      One = CurDAG->getConstantFP(Val, dl, VT);
+    } else if (VT == MVT::bf16) {
+      APFloat Val(APFloat::BFloat(), "1.0");
+      One = CurDAG->getConstantFP(Val, dl, VT);
+    } else {
+      // Is it better to assert? when we encounter an unknown FP type,Than to
+      // just replace with the operand! As this might be our last attempt at
+      // legalization.
+      ReplaceNode(Node, Operand.getNode());
+      return;
+    }
+    // TODO : Follow-up with tablegen pattern to generate mul * 1.0.
+    MulNode = CurDAG->getNode(ISD::FMUL, dl, VT, Operand, One);
+    ReplaceNode(Node, MulNode.getNode());
+    return;
+  }
   case ISD::BRIND:
   case X86ISD::NT_BRIND: {
     if (Subtarget->isTargetNaCl())
diff --git a/llvm/test/CodeGen/X86/canonicalize-constants.ll b/llvm/test/CodeGen/X86/canonicalize-constants.ll
new file mode 100644
index 00000000000000..b71c74bcd4472b
--- /dev/null
+++ b/llvm/test/CodeGen/X86/canonicalize-constants.ll
@@ -0,0 +1,210 @@
+; RUN: llc --mcpu=sapphirerapids -mtriple=x86_64 < %s | FileCheck %s
+
+define float @canon_fp32() {
+  ; CHECK-LABEL: .LCPI0_0:
+  ; CHECK: .long	0x40400000                      # float 3
+  ; CHECK-LABEL: canon_fp32
+  ; CHECK: # %bb.0:
+  ; CHECK-NEXT: vmovss	.LCPI0_0(%rip), %xmm0           # xmm0 = [3.0E+0,0.0E+0,0.0E+0,0.0E+0]
+  ; CHECK-NEXT: retq
+  %canonicalized = call float @llvm.canonicalize.f32(float 3.0)
+  ret float %canonicalized
+}
+
+define half @canon_fp16() {
+  ; CHECK-LABEL: .LCPI1_0:
+  ; CHECK: .short	0x4200                          # half 3
+  ; CHECK-LABEL: canon_fp16
+  ; CHECK: # %bb.0:
+  ; CHECK-NEXT: vmovsh	.LCPI1_0(%rip), %xmm0
+  ; CHECK-NEXT: retq
+  %canonicalized = call half @llvm.canonicalize.f16(half 0xH4200) ; half 3.0
+  ret half %canonicalized
+}
+
+define double @canon_fp64() {
+  ; CHECK-LABEL: .LCPI2_0:
+  ; CHECK: .quad	0x4008000000000000              # double 3
+  ; CHECK-LABEL: canon_fp64
+  ; CHECK: # %bb.0:
+  ; CHECK-NEXT: vmovsd	.LCPI2_0(%rip), %xmm0
+  ; CHECK-NEXT: retq
+  %canonicalized = call double @llvm.canonicalize.f64(double 3.0)
+  ret double %canonicalized
+}
+
+define x86_fp80 @canon_fp80() {
+  ; CHECK-LABEL: .LCPI3_0:
+  ; CHECK: .long	0x42b40000                      # float 90
+  ; CHECK-LABEL: canon_fp80
+  ; CHECK: # %bb.0:
+  ; CHECK-NEXT: flds	.LCPI3_0(%rip)
+  ; CHECK-NEXT: retq
+
+  %canonicalized = call x86_fp80 @llvm.canonicalize.f80(x86_fp80 0xK4005B400000000000000) ; 90.0
+  ret x86_fp80 %canonicalized
+}
+
+
+define x86_fp80 @complex_canonicalize_x86_fp80(x86_fp80 %a, x86_fp80 %b) {
+entry:
+  ; CHECK-LABEL: .LCPI4_0:
+  ; CHECK: .long	0x42b40000                      # float 90
+  ; CHECK-LABEL: complex_canonicalize_x86_fp80
+  ; CHECK: # %bb.0:
+  ; CHECK-NEXT: 	fldt	24(%rsp)
+  ; CHECK-NEXT: 	flds	.LCPI4_0(%rip)
+  ; CHECK-NEXT: 	fsubp	%st, %st(1)
+  ; CHECK-NEXT: 	retq
+
+  %mul1 = fsub x86_fp80 %a, %b
+  %add = fadd x86_fp80 %mul1, %b
+  %mul2 = fsub x86_fp80 %add, %mul1
+  %canonicalized = call x86_fp80 @llvm.canonicalize.f80(x86_fp80 0xK4005B400000000000000)
+  %result = fsub x86_fp80 %canonicalized, %b
+  ret x86_fp80 %result
+}
+
+define double @complex_canonicalize_fp64(double %a, double %b) unnamed_addr #0 {
+start:
+  ; CHECK-LABEL: .LCPI5_0:
+  ; CHECK: .quad	0x4008000000000000              # double 3
+  ; CHECK-LABEL: complex_canonicalize_fp64
+  ; CHECK: # %bb.0:
+  ; CHECK-NEXT: vmovsd	.LCPI5_0(%rip), %xmm0
+  ; CHECK-NEXT: retq
+
+  %c = fcmp olt double %a, %b
+  %d = fcmp uno double %a, 0.000000e+00
+  %or.cond.i.i = or i1 %d, %c
+  %e = select i1 %or.cond.i.i, double %b, double %a
+  %f = tail call double @llvm.canonicalize.f64(double 3.0) #2
+  ret double %f
+}
+
+define void @test_fold_canonicalize_p0_f32(float addrspace(1)* %out) #1 {
+  ; CHECK-LAEBL: test_fold_canonicalize_p0_f32
+  ; CHECK: # %bb.0:
+  ; CHECK-NEXT: vxorps	%xmm0, %xmm0, %xmm0
+  ; CHECK-NEXT: vmovss	%xmm0, (%rdi)
+  ; CHECK-NEXT: retq
+  %canonicalized = call float @llvm.canonicalize.f32(float 0.0)
+  store float %canonicalized, float addrspace(1)* %out
+  ret void
+}
+
+define void @test_fold_canonicalize_n0_f32(float addrspace(1)* %out) #1 {
+  ; CHECK-LAEBL: .LCPI7_0:
+  ; CHECK: .long	0x80000000                      # float -0
+  ; CHECK-LAEBL: test_fold_canonicalize_n0_f32
+  ; CHECK: # %bb.0:
+  ; CHECK-NEXT: vmovss	.LCPI7_0(%rip), %xmm0
+  ; CHECK-NEXT: vmovss	%xmm0, (%rdi)
+  ; CHECK-NEXT: retq
+  %canonicalized = call float @llvm.canonicalize.f32(float -0.0)
+  store float %canonicalized, float addrspace(1)* %out
+  ret void
+}
+
+
+define void @v_test_canonicalize_p90_x86_fp80(x86_fp80 addrspace(1)* %out) #1 {
+  ; CHECK-LAEBL: .LCPI8_0:
+  ; CHECK: .long	0x42b40000                      # float 90
+  ; CHECK-LAEBL: v_test_canonicalize_p90_x86_fp80
+  ; CHECK: # %bb.0:
+  ; CHECK-NEXT: flds	.LCPI8_0(%rip)
+  ; CHECK-NEXT: fstpt	(%rdi)
+  ; CHECK-NEXT: retq
+  %canonicalized = call x86_fp80 @llvm.canonicalize.f80(x86_fp80 0xK4005B400000000000000)
+  store x86_fp80 %canonicalized, x86_fp80 addrspace(1)* %out
+  ret void
+}
+
+define void @v_test_canonicalize_p3__half(half addrspace(1)* %out) {
+  ; CHECK-LABEL: .LCPI9_0:
+  ; CHECK: .short	0x4200                          # half 3
+  ; CHECK-LABEL: v_test_canonicalize_p3__half:
+  ; CHECK:       # %bb.0: # %entry
+  ; CHECK-NEXT:   vmovsh	.LCPI9_0(%rip), %xmm0
+  ; CHECK-NEXT: 	vmovsh	%xmm0, (%rdi)
+  ; CHECK-NEXT: 	retq
+
+entry:
+  %canonicalized = call half @llvm.canonicalize.f16(half 0xH4200)
+  store half %canonicalized, half addrspace(1)* %out
+  ret void
+}
+
+define void @v_test_canonicalize_p3_f64(double addrspace(1)* %out) #1 {
+  ; CHECK-LABEL: .LCPI10_0:
+  ; CHECK: .quad	0x4008000000000000              # double 3
+  ; CHECK-LAEBL: v_test_canonicalize_p3_f64
+  ; CHECK: # %bb.0:
+  ; CHECK-NEXT:       vmovsd	.LCPI10_0(%rip), %xmm0
+  ; CHECK-NEXT: 	vmovsd	%xmm0, (%rdi)
+  ; CHECK-NEXT: 	retq
+entry:
+  %canonicalized = call double @llvm.canonicalize.f64(double 3.0)
+  store double %canonicalized, double addrspace(1)* %out
+  ret void
+}
+
+define void @v_test_canonicalize_p3__bfloat(bfloat addrspace(1)* %out) {
+  ; CHECK-LABEL: .LCPI11_0:
+  ; CHECK: .long	0x40400000                      # float 3
+  ; CHECK-LABEL: v_test_canonicalize_p3__bfloat:
+  ; CHECK:       # %bb.0: # %entry
+  ; CHECK-NEXT: 	vmovss	.LCPI11_0(%rip), %xmm0          # xmm0 = [3.0E+0,0.0E+0,0.0E+0,0.0E+0]
+  ; CHECK-NEXT: 	vcvtneps2bf16	%xmm0, %xmm0
+  ; CHECK-NEXT: 	vpextrw	$0, %xmm0, (%rdi)
+  ; CHECK-NEXT: 	retq
+
+entry:
+  %canonicalized = call bfloat @llvm.canonicalize.bf16(bfloat 3.0)
+  store bfloat %canonicalized, bfloat addrspace(1)* %out
+  ret void
+}
+
+define void @v_test_canonicalize_n3__bfloat(bfloat addrspace(1)* %out) {
+  ; CHECK-LABEL: .LCPI12_0:
+  ; CHECK: .long	0xc0400000                      # float -3
+  ; CHECK-LABEL: v_test_canonicalize_n3__bfloat:
+  ; CHECK:       # %bb.0: # %entry
+  ; CHECK-NEXT: 	vmovss	.LCPI12_0(%rip), %xmm0          # xmm0 = [-3.0E+0,0.0E+0,0.0E+0,0.0E+0]
+  ; CHECK-NEXT: 	vcvtneps2bf16	%xmm0, %xmm0
+  ; CHECK-NEXT: 	vpextrw	$0, %xmm0, (%rdi)
+  ; CHECK-NEXT: 	retq
+
+entry:
+  %canonicalized = call bfloat @llvm.canonicalize.bf16(bfloat -3.0)
+  store bfloat %canonicalized, bfloat addrspace(1)* %out
+  ret void
+}
+
+define void @v_test_canonicalize_n90_x86_fp80(x86_fp80 addrspace(1)* %out) #1 {
+  ; CHECK-LAEBL: .LCPI13_0:
+  ; CHECK: .long	0xc2b40000                      # float -90
+  ; CHECK-LAEBL: v_test_canonicalize_n90_x86_fp80
+  ; CHECK: # %bb.0:
+  ; CHECK-NEXT: flds	.LCPI13_0(%rip)
+  ; CHECK-NEXT: fstpt	(%rdi)
+  ; CHECK-NEXT: retq
+  %canonicalized = call x86_fp80 @llvm.canonicalize.f80(x86_fp80 0xKC005B400000000000000)
+  store x86_fp80 %canonicalized, x86_fp80 addrspace(1)* %out
+  ret void
+}
+
+define void @v_test_canonicalize_n3__half(half addrspace(1)* %out) {
+  ; CHECK-LABEL: .LCPI14_0:
+  ; CHECK: .short	0xc200                          # half -3
+  ; CHECK-LABEL: v_test_canonicalize_n3__half:
+  ; CHECK:       # %bb.0: # %entry
+  ; CHECK-NEXT:   vmovsh	.LCPI14_0(%rip), %xmm0
+  ; CHECK-NEXT: 	vmovsh	%xmm0, (%rdi)
+  ; CHECK-NEXT: 	retq
+
+entry:
+  %canonicalized = call half @llvm.canonicalize.f16(half 0xHC200)
+  store half %canonicalized, half addrspace(1)* %out
+  ret void
+}
\ No newline at end of file
diff --git a/llvm/test/CodeGen/X86/canonicalize-subnormals.ll b/llvm/test/CodeGen/X86/canonicalize-subnormals.ll
new file mode 100644
index 00000000000000..8e7e04c2a67dc8
--- /dev/null
+++ b/llvm/test/CodeGen/X86/canonicalize-subnormals.ll
@@ -0,0 +1,287 @@
+; RUN: llc --mcpu=sapphirerapids -mtriple=x86_64 -denormal-fp-math=preserve-sign < %s | FileCheck %s
+; RUN: llc --mcpu=sapphirerapids -mtriple=x86_64 -denormal-fp-math=ieee < %s | FileCheck -check-prefix=IEEE-DENORMAL %s 
+; RUN: llc --mcpu=sapphirerapids -mtriple=x86_64 -denormal-fp-math=ieee < %s | FileCheck -check-prefix=DYN-DENORMAL %s
+
+define void @canonicalize_denormal1_f32_pre_sign(float addrspace(1)* %out) {
+  ; CHECK-LABEL: .LCPI0_0:
+  ; CHECK: 	.long	0x80000000                      # float -0
+  ; CHECK-LABEL: canonicalize_denormal1_f32_pre_sign:
+  ; CHECK: # %bb.0:
+  ; CHECK-NEXT: 	vmovss	.LCPI0_0(%rip), %xmm0
+  ; CHECK-NEXT: 	vmovss	%xmm0, (%rdi)
+  ; CHECK-NEXT: 	retq
+
+  %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 2155872255 to float))
+  store float %canonicalized, float addrspace(1)* %out
+  ret void
+}
+
+define void @canonicalize_denormal1_f64_pre_sign(double addrspace(1)* %out) {
+  ; CHECK-LABEL: .LCPI1_0:
+  ; CHECK: 	.quad	0x8000000000000000              # double -0
+  ; CHECK-LABEL: canonicalize_denormal1_f64_pre_sign:
+  ; CHECK: # %bb.0:
+  ; CHECK-NEXT: 	vmovsd	.LCPI1_0(%rip), %xmm0
+  ; CHECK-NEXT: 	vmovsd	%xmm0, (%rdi)
+  ; CHECK-NEXT: 	retq
+
+  %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9227875636482146303 to double))
+  store double %canonicalized, double addrspace(1)* %out
+  ret void
+}
+
+
+define void @canonicalize_qnan_f64(double addrspace(1)* %out) {
+  ;cCHECK-LABEL: .LCPI2_0:
+  ;cCHECK: 	.quad	0x7ff8000000000000              # double NaN
+  ; CHECK-LABEL: canonicalize_qnan_f64:
+  ; CHECK: # %bb.0:
+  ; CHECK-NEXT: 	vmovsd	.LCPI2_0(%rip), %xmm0
+  ; CHECK-NEXT: 	vmovsd	%xmm0, (%rdi)
+  ; CHECK-NEXT: 	retq
+
+  %canonicalized = call double @llvm.canonicalize.f64(double 0x7FF8000000000000)
+  store double %canonicalized, double addrspace(1)* %out
+  ret void
+}
+
+define void @canonicalize_qnan_value_neg1_f64(double addrspace(1)* %out) {
+  ;cCHECK-LABEL: .LCPI3_0:
+  ;cCHECK: 	.quad	0xffffffffffffffff              # double NaN
+  ; CHECK-LABEL: canonicalize_qnan_value_neg1_f64:
+  ; CHECK: # %bb.0:
+  ; CHECK-NEXT: 	vmovsd	.LCPI3_0(%rip), %xmm0
+  ; CHECK-NEXT: 	vmovsd	%xmm0, (%rdi)
+  ; CHECK-NEXT: 	retq
+
+  %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 -1 to double))
+  store double %canonicalized, double addrspace(1)* %out
+  ret void
+}
+
+define void @canonicalize_qnan_value_neg2_f64(double addrspace(1)* %out) {
+  ; CHECK-LABEL: .LCPI4_0:
+  ; CHECK: 	.quad	0xfffffffffffffffe              # double NaN
+  ; CHECK-LABEL: canonicalize_qnan_value_neg2_f64:
+  ; CHECK: # %bb.0:
+  ; CHECK-NEXT: 	vmovsd	.LCPI4_0(%rip), %xmm0
+  ; CHECK-NEXT: 	vmovsd	%xmm0, (%rdi)
+  ; CHECK-NEXT: 	retq
+
+  %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 -2 to double))
+  store double %canonicalized, double addrspace(1)* %out
+  ret void
+}
+
+define void @canonicalize_snan0_value_f64(double addrspace(1)* %out) {
+  ; CHECK-LABEL: .LCPI5_0:
+  ; CHECK: 	.quad	0x7ff8000000000000              # double NaN
+  ; CHECK-LABEL: canonicalize_snan0_value_f64:
+  ; CHECK: # %bb.0:
+  ; CHECK-NEXT: 	vmovsd	.LCPI5_0(%rip), %xmm0
+  ; CHECK-NEXT: 	vmovsd	%xmm0, (%rdi)
+  ; CHECK-NEXT: 	retq
+
+  %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9218868437227405313 to double))
+  store double %canonicalized, double addrspace(1)* %out
+  ret void
+}
+
+define void @canonicalize_undef(double addrspace(1)* %out) {
+  ; CHECK-LABEL: .LCPI6_0:
+  ; CHECK: 	.quad	0x7ff8000000000000              # double NaN
+  ; CHECK-LABEL: canonicalize_undef:
+  ; CHECK: # %bb.0:
+  ; CHECK-NEXT: 	vmovsd	.LCPI6_0(%rip), %xmm0
+  ; CHECK-NEXT: 	vmovsd	%xmm0, (%rdi)
+  ; CHECK-NEXT: 	retq
+
+  %canonicalized = call double @llvm.canonicalize.f64(double undef)
+  store double %canonicalized, double addrspace(1)* %out
+  ret void
+}
+
+define void @canonicalize_denormal1_f32_ieee(float addrspace(1)* %out) {
+  ; IEEE-DENORMAL-LABEL: .LCPI7_0:
+  ; IEEE-DENORMAL: 	.long	0x807fffff                      # float -1.17549421E-38
+  ; IEEE-DENORMAL-LABEL: canonicalize_denormal1_f32_ieee:
+  ; IEEE-DENORMAL: # %bb.0:
+  ; IEEE-DENORMAL-NEXT: 	vmovss	.LCPI7_0(%rip), %xmm0
+  ; IEEE-DENORMAL-NEXT: 	vmovss	%xmm0, (%rdi)
+  ; IEEE-DENORMAL-NEXT: 	retq
+
+  %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 2155872255 to float))
+  store float %canonicalized, float addrspace(1)* %out
+  ret void
+}
+
+define void @canonicalize_denormal1_f64_ieee(double addrspace(1)* %out) {
+  ; IEEE-DENORMAL-LABEL: .LCPI8_0:
+  ; IEEE-DENORMAL: 	.quad	0x800fffffffffffff              # double -2.2250738585072009E-308
+  ; IEEE-DENORMAL-LABEL: canonicalize_denormal1_f64_ieee:
+  ; IEEE-DENORMAL: # %bb.0:
+  ; IEEE-DENORMAL-NEXT: 	vmovsd	.LCPI8_0(%rip), %xmm0
+  ; IEEE-DENORMAL-NEXT: 	vmovsd	%xmm0, (%rdi)
+  ; IEEE-DENORMAL-NEXT: 	retq
+
+  %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9227875636482146303 to double))
+  store double %canonicalized, double addrspace(1)* %out
+  ret void
+}
+
+define void @canonicalize_denormal1_f32_dynamic(float addrspace(1)* %out) {
+  ; DYN-DENORMAL-LABEL: .LCPI9_0:
+  ; DYN-DENORMAL: 	.long	0x807fffff                      # float -1.17549421E-38
+  ; DYN-DENORMAL-LABEL: canonicalize_denormal1_f32_dynamic:
+  ; DYN-DENORMAL: # %bb.0:
+  ; DYN-DENORMAL-NEXT: 	vmovss	.LCPI9_0(%rip), %xmm0
+  ; DYN-DENORMAL-NEXT: 	vmovss	%xmm0, (%rdi)
+  ; DYN-DENORMAL-NEXT: 	retq
+
+  %canonicalized = call float @llvm.canonicalize.f32(float bitcast (i32 2155872255 to float))
+  store float %canonicalized, float addrspace(1)* %out
+  ret void
+}
+
+define void @canonicalize_denormal1_f64_dynamic(double addrspace(1)* %out) {
+  ; DYN-DENORMAL-LABEL: .LCPI10_0:
+  ; DYN-DENORMAL: 	.quad	0x800fffffffffffff              # double -2.2250738585072009E-308
+  ; DYN-DENORMAL-LABEL: canonicalize_denormal1_f64_dynamic:
+  ; DYN-DENORMAL: # %bb.0:
+  ; DYN-DENORMAL-NEXT: 	vmovsd	.LCPI10_0(%rip), %xmm0
+  ; DYN-DENORMAL-NEXT: 	vmovsd	%xmm0, (%rdi)
+  ; DYN-DENORMAL-NEXT: 	retq
+
+  %canonicalized = call double @llvm.canonicalize.f64(double bitcast (i64 9227875636482146303 to double))
+  store double %canonicalized, double addrspace(1)* %out
+  ret void
+}
+
+define void @canonicalize_denormal1_bfloat_pre_sign(bfloat addrspace(1)* %out) {
+  ; CHECK-LABEL: .LCPI11_0:
+  ; CHECK: 	.long	0x80000000                      # float -0
+  ; CHECK-LABEL: canonicalize_denormal1_bfloat_pre_sign:
+  ; CHECK: # %bb.0:
+  ; CHECK-NEXT: 	vmovss	.LCPI11_0(%rip), %xmm0
+  ; CHECK-NEXT: 	vcvtneps2bf16	%xmm0, %xmm0
+  ; CHECK-NEXT: 	vpextrw	$0, %xmm0, (%rdi)
+  ; CHECK-NEXT: 	retq
+
+  %canonicalized = call bfloat @llvm.canonicalize.bf16(bfloat bitcast (i16 32768 to bfloat))
+  store bfloat %canonicalized, bfloat addrspace(1)* %out
+  ret void
+}
+
+
+define void @canonicalize_denormal1_bfloat_ieee(bfloat addrspace(1)* %out) {
+  ; IEEE-DENORMAL-LABEL: .LCPI12_0:
+  ; IEEE-DENORMAL: 	.long	0x80000000                      # float -0
+  ; IEEE-DENORMAL-LABEL: canonicalize_denormal1_bfloat_ieee:
+  ; IEEE-DENORMAL: # %bb.0:
+  ; IEEE-DENORMAL-NEXT: 	vmovss	.LCPI12_0(%rip), %xmm0
+  ; IEEE-DENORMAL-NEXT: 	vcvtneps2bf16	%xmm0, %xmm0
+  ; IEEE-DENORMAL-NEXT: 	vpextrw	$0, %xmm0, (%rdi)
+  ; IEEE-DENORMAL-NEXT: 	retq
+
+  %canonicalized = call bfloat @llvm.canonicalize.bf16(bfloat bitcast (i16 32768 to...
[truncated]

pawan-nirpal-031 · 2024-08-28T11:27:15Z

@KanRobert @phoebewang @andykaylor @MalaySanghi Please review.

github-actions · 2024-08-28T12:15:00Z

⚠️ We detected that you are using a GitHub private e-mail address to contribute to the repo.
Please turn off Keep my email addresses private setting in your account.
See LLVM Discourse for more information.

RKSimon

vector support?

RKSimon · 2024-08-28T13:25:49Z

llvm/test/CodeGen/X86/canonicalize-constants.ll

@@ -0,0 +1,210 @@
+; RUN: llc --mcpu=sapphirerapids -mtriple=x86_64 < %s | FileCheck %s


Don't use a specific CPU for generic tests - do multiple RUNs with either -mattr SSE/AVX1/AVX2/AVX512 style or -mcpu x86-64/x86-64-v2/x86-64-v3/x86-64-v4

Noted. Will make changes.

RKSimon · 2024-08-28T13:27:07Z

llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp

+        SDLoc dl(Node);
+        EVT VT = Operand.getValueType();
+
+        if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Operand)) {


RKSimon · 2024-08-28T13:27:27Z

llvm/lib/Target/X86/X86ISelDAGToDAG.cpp

+          static_cast<const X86TargetLowering *>(TLI);
+      if (const Constant *CV = X86Lowering->getTargetConstantFromLoad(Load)) {
+        const ConstantFP *CFP = dyn_cast<ConstantFP>(CV);
+        if (CFP) {


if (auto *CFP = dyn_cast<ConstantFP>(CV))

RKSimon · 2024-08-28T13:30:00Z

llvm/test/CodeGen/X86/canonicalize-constants.ll

+  %canonicalized = call half @llvm.canonicalize.f16(half 0xHC200)
+  store half %canonicalized, half addrspace(1)* %out
+  ret void
+}


Can we test using some badly formed hexfloats to check they get canonicalised?

Sure will try such a case.

You mean something like a Nan?

RKSimon · 2024-08-28T13:30:23Z

llvm/test/CodeGen/X86/canonicalize-subnormals.ll

+
+define void @canonicalize_qnan_f64(double addrspace(1)* %out) {
+  ;cCHECK-LABEL: .LCPI2_0:
+  ;cCHECK: 	.quad	0x7ff8000000000000              # double NaN


I'm sorry, dk how that slipped past. It's a bit strange lit did not complain, shouldn't it? I will correct this.

llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp

arsenm · 2024-08-28T14:52:21Z

llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp

+              LLVM_DEBUG(dbgs()
+                         << "Legalized Denormal under mode PreserveSign\n");
+              return;
+            } else if (Mode == DenormalMode::getIEEE()) {


No else after return

arsenm · 2024-08-28T14:53:09Z

llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp

+        SDValue Operand = Node->getOperand(0);
+        SDLoc dl(Node);
+        EVT VT = Operand.getValueType();
+


Constant folding should be handled in another patch, if it's even necessary. This ideally would be factored into a separate function operating on the APFloat like other constant foldings.

This also does not belong in the legalization, it's a constant folding combine

arsenm · 2024-08-28T14:54:05Z

llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp

+          SDValue QuitNaN = DAG.getConstantFP(CanonicalQNaN, dl, VT);
+          ConstantFPSDNode *QNaNConstFP = cast<ConstantFPSDNode>(QuitNaN);
+          SDValue QNanLoad = ExpandConstantFP(QNaNConstFP, true);
+          DAG.ReplaceAllUsesWith(Node, QNanLoad.getNode());


Shouldn't need to do any manual replacement?

arsenm · 2024-08-28T14:54:45Z

llvm/lib/Target/X86/X86ISelDAGToDAG.cpp

+    SDValue Operand = Node->getOperand(0);
+    EVT VT = Node->getValueType(0);
+
+    // Perform canonicalization for constants. Replace the operand by a load


The constant case shouldn't really need special casing in selection, especially if you are constant folding it elsewhere

arsenm · 2024-08-28T14:55:33Z

llvm/lib/Target/X86/X86ISelDAGToDAG.cpp

+      return;
+    }
+    // TODO : Follow-up with tablegen pattern to generate mul * 1.0.
+    MulNode = CurDAG->getNode(ISD::FMUL, dl, VT, Operand, One);


This is still potentially dangerous to introduce an intermediate FMUL. If you want to implement this in terms of generic operations, you must use strict_fmul (in which case this wouldn't need to go in x86 code)

arsenm · 2024-08-28T14:56:09Z

llvm/test/CodeGen/X86/canonicalize-constants.ll

+  %canonicalized = call half @llvm.canonicalize.f16(half 0xHC200)
+  store half %canonicalized, half addrspace(1)* %out
+  ret void
+}


Missing end of line

Also test vectors

llvm/test/CodeGen/X86/canonicalize-constants.ll

RKSimon

Move constant folding into SelectionDAG::FoldConstantArithmetic?

…comments

llvm/test/CodeGen/X86/canonicalize-vars.ll

phoebewang · 2024-09-06T12:52:34Z

llvm/test/CodeGen/X86/canonicalize-vars.ll

+; SSE:       # %bb.0:
+; SSE-NEXT:    retq
+;
+; SSE2-LABEL: canon_fp32_varargsf32:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    retq
+;
+; AVX-LABEL: canon_fp32_varargsf32:
+; AVX:       # %bb.0:
+; AVX-NEXT:    retq
+;
+; AVX2-LABEL: canon_fp32_varargsf32:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: canon_fp32_varargsf32:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: canon_fp32_varargsf32:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    retq


You can merge the check by adding a shared prefix, e.g.:
; RUN: llc -mattr=sse -mtriple=x86_64 < %s | FileCheck %s -check-prefixes=CHECK,SSE

arsenm · 2024-09-06T17:22:52Z

llvm/lib/Target/X86/X86ISelLowering.cpp

+  if (Operand.isUndef()) {
+    APFloat CanonicalQNaN = APFloat::getQNaN(VT.getFltSemantics());
+    SDValue QuitNaN = DAG.getConstantFP(CanonicalQNaN, dl, VT);
+    return QuitNaN;
+  }


This case should definitely be handled by the generic combiner

Sure, is this a good place to do it? https://github.com/llvm/llvm-project/blob/main/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp#L1838

arsenm · 2024-09-06T17:23:35Z

llvm/lib/Target/X86/X86ISelLowering.cpp

+  if (auto *CFP = dyn_cast<ConstantFPSDNode>(Operand))
+    return combineConstantCanonicalize(Node, DAG);


Constant folding is a separate patch

Sure will make that as a follow up change. Thanks

arsenm · 2024-09-06T17:23:49Z

llvm/lib/Target/X86/X86ISelLowering.cpp

+  // leads to a crash SoftPromoteHalfResult #0: t11: bf16,ch = strict_fmul t0,
+  // ConstantFP:bf16<APFloat(16256)>, t5 LLVM ERROR: Do not know how to soft
+  // promote this operator's result!
+  if (isUsedByNonCanonicalizingOp(Node) && VT != MVT::bf16) {


Should not need to scan over users. Also don't add type hacks for this, just leave the unsupported cases as they were

arsenm · 2024-09-06T17:24:55Z

llvm/lib/Target/X86/X86ISelLowering.cpp

+  // ConstantFP:bf16<APFloat(16256)>, t5 LLVM ERROR: Do not know how to soft
+  // promote this operator's result!
+  if (isUsedByNonCanonicalizingOp(Node) && VT != MVT::bf16) {
+    SDValue Chain = findLastStrictOpChain(Node, DAG);


This doesn't make sense. You shouldn't be looking for other strictfp operations.

arsenm · 2024-09-06T17:26:16Z

llvm/lib/Target/X86/X86ISelLowering.cpp

+  return DAG.getEntryNode();
+}
+
+bool isNonCanonicalizingOperation(SDNode *N) {


Leave optimizations for a separate patch. It's likely unsafe to do mid lowering

arsenm · 2024-09-06T17:26:32Z

llvm/lib/Target/X86/X86ISelLowering.cpp

@@ -57976,6 +57977,124 @@ static SDValue combineINTRINSIC_VOID(SDNode *N, SelectionDAG &DAG,
  return SDValue();
 }

+SDValue combineConstantCanonicalize(SDNode *Node, SelectionDAG &DAG) {


This would best be an APFloat->APFloat function free of the DAG machinery

arsenm · 2024-09-06T17:26:54Z

llvm/lib/Target/X86/X86ISelLowering.cpp

+  return false;
+}
+
+SDValue combineCanonicalize(SDNode *Node, SelectionDAG &DAG) {


This is primarily lowering. This should be a lowering only patch with no optimizations

arsenm · 2024-09-06T17:27:08Z

llvm/lib/Target/X86/X86ISelLowering.cpp

+  if (isUsedByNonCanonicalizingOp(Node) && VT != MVT::bf16) {
+    SDValue Chain = findLastStrictOpChain(Node, DAG);
+    // TODO : Follow-up with tablegen pattern to generate mul * 1.0.
+    SDValue StrictFmul = DAG.getNode(ISD::STRICT_FMUL, dl, {VT, MVT::Other},


Lower to strict_fmul can be done in the generic legalizer

I'm trying to avoid any changes in common infra, so as to not interfere with how rest of the targets want to handle it. This can be moved over to custom selection too!

… to generic dag combiner

phoebewang · 2024-09-10T13:16:31Z

llvm/test/CodeGen/X86/canonicalize-vars.ll

@@ -0,0 +1,853 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --default-march x86_64-unknown-linux-gnu --version 5
+; RUN: llc -mattr=sse -mtriple=x86_64 < %s | FileCheck %s -check-prefix=SSE
+; RUN: llc -mattr=sse2 -mtriple=x86_64 < %s | FileCheck %s -check-prefix=SSE2


Can we use the same -check-prefix=SSE here?

Oh Yes, Sorry slipped that, will address it. Thanks for reminding me ;)

phoebewang · 2024-09-10T13:16:52Z

llvm/test/CodeGen/X86/canonicalize-vars.ll

+; RUN: llc -mattr=+avx2 -mtriple=x86_64 < %s | FileCheck %s -check-prefix=AVX2
+; RUN: llc -mattr=+avx512f -mtriple=x86_64 < %s | FileCheck %s -check-prefix=AVX512F
+; RUN: llc -mattr=+avx512bw -mtriple=x86_64 < %s | FileCheck %s -check-prefix=AVX512BW


Can we use the same -check-prefix=AVX for them?

Use common prefixes, and correctly disable sse2 on x64 targets

; RUN: llc < %s -mtriple=x86_64- -mattr=-sse2 | FileCheck %s -check-prefixes=SSE,SSE1 ; RUN: llc < %s -mtriple=x86_64- -mattr=+sse2 | FileCheck %s -check-prefixes=SSE,SSE2 ; RUN: llc < %s -mtriple=x86_64- -mattr=+avx | FileCheck %s -check-prefixes=AVX,AVX1 ; RUN: llc < %s -mtriple=x86_64- -mattr=+avx2 | FileCheck %s -check-prefixes=AVX,AVX2 ; RUN: llc < %s -mtriple=x86_64- -mattr=+avx512f | FileCheck %s -check-prefixes=AVX,AVX512F ; RUN: llc < %s -mtriple=x86_64- -mattr=+avx512bw | FileCheck %s -check-prefixes=AVX,AVX512BW

; RUN: llc < %s -mtriple=x86_64- -mattr=-sse2 | FileCheck %s -check-prefixes=SSE,SSE1 is causing the same crash as bf16, for half data type. I take this -sse2 means, sse is enabled but sse2 is not, and perhaps half datatype is not supported for sse mode.

We should not test half/bf16 against -sse2. You may put these test in a seperate file without -sse2.

llvm/lib/Target/X86/X86ISelLowering.cpp

phoebewang · 2024-09-12T05:56:12Z

llvm/test/CodeGen/X86/canonicalize-vars-f16-type.ll

+; RUN: llc -mattr=+avx512f -mtriple=x86_64 < %s | FileCheck %s -check-prefixes=AVX,AVX512F
+; RUN: llc -mattr=+avx512bw -mtriple=x86_64 < %s | FileCheck %s -check-prefixes=AVX,AVX512BW
+
+define void @v_test_canonicalize__half(half addrspace(1)* %out) {


Add nowind to remove those .cfi_

phoebewang · 2024-09-12T05:57:24Z

llvm/test/CodeGen/X86/canonicalize-vars-f16-type.ll

+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; AVX: {{.*}}
+; SSE2: {{.*}}


Remove these unused prefixes

phoebewang · 2024-09-12T05:57:33Z

llvm/test/CodeGen/X86/canonicalize-vars.ll

+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; SSE: {{.*}}


phoebewang · 2024-09-12T06:00:13Z

llvm/lib/Target/X86/X86ISelLowering.cpp

@@ -58159,6 +58160,25 @@ static SDValue combineINTRINSIC_VOID(SDNode *N, SelectionDAG &DAG,
  return SDValue();
 }

+SDValue combineCanonicalize(SDNode *Node, SelectionDAG &DAG) {


Add static

We prefer to use N

llvm/lib/Target/X86/X86ISelLowering.cpp

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

phoebewang

LGTM with some nits.

RKSimon · 2024-09-12T12:44:15Z

llvm/lib/Target/X86/X86ISelLowering.cpp

+
+  // Canonicalize scalar variable FP Nodes.
+  SDValue One =
+      DAG.getNode(ISD::SINT_TO_FP, dl, VT, DAG.getConstant(1, dl, MVT::i32));


If you change MVT::i32 to VT.changeTypeToInteger() I think this should work for vectors as well.

Should I handle it in a following PR? Or you recommend I do it now?

In this patch might be simpler thanks.

Okay, will do it here. Thanks for the suggestion.

I tried this suggestion, But I'm running into a crash for f80 scalar input, What I realized while debugging though is that changeTypeToInteger may not be required, I did following changes and I see that vector inputs are handled pretty seamlessly,

Change

- // Canonicalize scalar variable FP Nodes. - SDValue One = - DAG.getNode(ISD::SINT_TO_FP, dl, VT, DAG.getConstant(1, dl, MVT::i32)); + SDValue One = DAG.getConstantFP(1.0, dl, VT); +

Running via gdb, I get a BUILD_VECTOR as such.

t11: v4f32 = BUILD_VECTOR ConstantFP:f32<1.000000e+00>, ConstantFP:f32<1.000000e+00>, ConstantFP:f32<1.000000e+00>, ConstantFP:f32<1.000000e+00>

input

define <4 x float> @canon_fp32_varargsv4f32(<4 x float> %a) { %canonicalized = call <4 x float> @llvm.canonicalize.v4f32(<4 x float> %a) ret <4 x float> %canonicalized }

result

.LCPI9_0: .long 0x3f800000 # float 1 .long 0x3f800000 # float 1 .long 0x3f800000 # float 1 .long 0x3f800000 # float 1 .text .globl canon_fp32_varargsv4f32 .p2align 4, 0x90 .type canon_fp32_varargsv4f32,@function canon_fp32_varargsv4f32: # @canon_fp32_varargsv4f32 .cfi_startproc # %bb.0: mulps .LCPI9_0(%rip), %xmm0

input

define <4 x double> @canon_fp64_varargsv4f64(<4 x double> %a) { %canonicalized = call <4 x double> @llvm.canonicalize.v4f32(<4 x double> %a) ret <4 x double> %canonicalized }

result

.LCPI10_0: .quad 0x3ff0000000000000 # double 1 .quad 0x3ff0000000000000 # double 1 .text .globl canon_fp64_varargsv4f64 .p2align 4, 0x90 .type canon_fp64_varargsv4f64,@function canon_fp64_varargsv4f64: # @canon_fp64_varargsv4f64 .cfi_startproc # %bb.0: movapd .LCPI10_0(%rip), %xmm2 # xmm2 = [1.0E+0,1.0E+0] mulpd %xmm2, %xmm0 mulpd %xmm2, %xmm1 retq

input

define void @vec_canonicalize_x86_fp80(<4 x x86_fp80> addrspace(1)* %out) #1 { %val = load <4 x x86_fp80>, <4 x x86_fp80> addrspace(1)* %out %canonicalized = call <4 x x86_fp80> @llvm.canonicalize.f80(<4 x x86_fp80> %val) store <4 x x86_fp80> %canonicalized, <4 x x86_fp80> addrspace(1)* %out ret void }

result

# %bb.0: fldt 30(%rdi) fldt 20(%rdi) fldt 10(%rdi) fldt (%rdi) fld1 fmul %st, %st(1) fmul %st, %st(2) fmul %st, %st(3) fmulp %st, %st(4) fxch %st(3) fstpt 30(%rdi) fxch %st(1) fstpt 20(%rdi) fstpt 10(%rdi) fstpt (%rdi) retq

arsenm · 2024-09-13T18:13:53Z

llvm/lib/Target/X86/X86ISelLowering.cpp

+  SDValue One =
+      DAG.getNode(ISD::SINT_TO_FP, dl, VT, DAG.getConstant(1, dl, MVT::i32));


Why not just emit a regular getConstantFP instead of emitting this as an integer cast? This can also just go as the generic lowering implementation

This also is lowering, it is not combine. It should not be invoked through PerformDAGCombine

Is this correct place ( conditions under which setOperationAction is placed ) /method ( legal or custom or promote ?) of handling data types?

@@ -331,9 +331,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FP_TO_UINT_SAT, VT, Custom); setOperationAction(ISD::FP_TO_SINT_SAT, VT, Custom); } + setOperationAction(ISD::FCANONICALIZE, MVT::f32, Custom); if (Subtarget.is64Bit()) { setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Custom); setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i64, Custom); + setOperationAction(ISD::FCANONICALIZE, MVT::f64, Custom); } } @@ -708,6 +710,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::STRICT_FROUNDEVEN, MVT::f16, Promote); setOperationAction(ISD::STRICT_FTRUNC, MVT::f16, Promote); setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom); + setOperationAction(ISD::FCANONICALIZE, MVT::f16, Custom); setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Custom); setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Custom); @@ -924,6 +927,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, if (isTypeLegal(MVT::f80)) { setOperationAction(ISD::FP_ROUND, MVT::f80, Custom); setOperationAction(ISD::STRICT_FP_ROUND, MVT::f80, Custom); + setOperationAction(ISD::FCANONICALIZE, MVT::f80, Custom); } setOperationAction(ISD::SETCC, MVT::f128, Custom); @@ -1042,6 +1046,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMach ine &TM, // No operations on x86mmx supported, everything uses intrinsics. } if (!Subtarget.useSoftFloat() && Subtarget.hasSSE1()) { addRegisterClass(MVT::v4f32, Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass); @@ -1057,9 +1066,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMach ine &TM, setOperationAction(ISD::VSELECT, MVT::v4f32, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); setOperationAction(ISD::SELECT, MVT::v4f32, Custom); + setOperationAction(ISD::FCANONICALIZE, MVT::v4f32, Custom); setOperationAction(ISD::LOAD, MVT::v2f32, Custom); setOperationAction(ISD::STORE, MVT::v2f32, Custom); + setOperationAction(ISD::FCANONICALIZE, MVT::v2f32, Custom); setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal); setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal); @@ -1120,6 +1131,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachi ne &TM, setOperationAction(ISD::UMULO, MVT::v2i32, Custom); setOperationAction(ISD::FNEG, MVT::v2f64, Custom); + setOperationAction(ISD::FCANONICALIZE, MVT::v2f64, Custom); setOperationAction(ISD::FABS, MVT::v2f64, Custom); setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Custom); @@ -1452,6 +1464,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachi ne &TM, setOperationAction(ISD::FMAXIMUM, VT, Custom); setOperationAction(ISD::FMINIMUM, VT, Custom); + setOperationAction(ISD::FCANONICALIZE, VT, Custom); } setOperationAction(ISD::LRINT, MVT::v8f32, Custom); @@ -1796,6 +1809,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachi ne &TM, setOperationAction(ISD::FMA, VT, Legal); setOperationAction(ISD::STRICT_FMA, VT, Legal); setOperationAction(ISD::FCOPYSIGN, VT, Custom); + setOperationAction(ISD::FCANONICALIZE, VT, Custom); } setOperationAction(ISD::LRINT, MVT::v16f32, Subtarget.hasDQI() ? Legal : Custom);

arsenm

This is not an optimization combine. This is lowering and should go through lowering interfaces

arsenm · 2024-09-13T18:14:25Z

llvm/lib/Target/X86/X86ISelLowering.cpp

+  SDValue One =
+      DAG.getNode(ISD::SINT_TO_FP, dl, VT, DAG.getConstant(1, dl, MVT::i32));


This also is lowering, it is not combine. It should not be invoked through PerformDAGCombine

arsenm · 2024-09-13T18:15:09Z

llvm/lib/Target/X86/X86ISelLowering.cpp

+  // promote this operator's result!
+  SDValue Chain = DAG.getEntryNode();
+  SDValue StrictFmul = DAG.getNode(ISD::STRICT_FMUL, dl, {VT, MVT::Other},
+                                   {Chain, One, Operand});


Constant operands canonically should be the RHS

@arsenm @RKSimon friendly ping for re review ;)

pawan-nirpal-031 · 2024-09-23T07:40:29Z

Hi @arsenm @RKSimon friendly ping for review.

RKSimon

LGTM

wzssyqa · 2024-09-23T10:06:04Z

Ohh. Thanks so much. With this patch, I can continue to work on fminimum_num/fmaximum_num support of Clang.

pawan-nirpal-031 · 2024-09-23T10:10:35Z

Ohh. Thanks so much. With this patch, I can continue to work on fminimum_num/fmaximum_num support of Clang.

Awesome ;)

github-actions · 2024-09-23T11:15:56Z

@pawan-nirpal-031 Congratulations on having your first Pull Request (PR) merged into the LLVM Project!

Your changes will be combined with recent changes from other authors, then tested by our build bots. If there is a problem with a build, you may receive a report in an email or a comment on this PR.

Please check whether problems have been caused by your change specifically, as the builds can include changes from many authors. It is not uncommon for your change to be included in a build that fails due to someone else's changes, or infrastructure issues.

How to do this, and the rest of the post-merge process, is covered in detail here.

If your change does cause a problem, it may be reverted, or you can revert it yourself. This is a normal part of LLVM development. You can fix your changes and open a new PR to merge them again.

If you don't get any reports, no action is required from you. Your changes are working as expected, well done!

arsenm · 2024-09-23T11:18:22Z

llvm/lib/Target/X86/X86ISelLowering.cpp

+  // ConstantFP:bf16<APFloat(16256)>, t5 LLVM ERROR: Do not know how to soft
+  // promote this operator's result!
+  SDValue Chain = DAG.getEntryNode();
+  SDValue StrictFmul = DAG.getNode(ISD::STRICT_FMUL, dl, {VT, MVT::Other},


This should move to generic code as the default expansion

@pawan-nirpal-031 Are you happy to work on a follow up patch?

Yes I will address this in a follow up patch Simon.

@arsenm won't it interfere with how the other targets want to handle it? Which is why I was reluctant to place it in any common infra, in the first place. If it is feasible I will move it over there.

No, it will only provide a reasonable default for other targets. Targets are still free to make it legal or custom lower as they choose

Okay sure then. I will move it in the following PR, should I also handle constants in the next one or we keep it for later PR?

PRs are ideally always as minimal as possible, so keep it separate

Alright then, thanks for the suggestion. I will start creating the next PR.

Hi @arsenm , should I do it in visitFCANONICALIZE in the generic combiner? I am not aware if there is any generic lowering!

No. Usual place would be in LegalizeDAG, or a helper in TargetLowering used by LegalizeDAG

pawan-nirpal-031 and others added 2 commits August 28, 2024 13:11

[X86][SelectionDAG] - Add support for llvm.canonicalize intrinsic

a824ded

Enable support for fcanonicalize intrinsic lowering.

Merge branch 'llvm:main' into main

5ebc6b4

llvmbot added backend:X86 llvm:SelectionDAG labels Aug 28, 2024

pawan-nirpal-031 mentioned this pull request Aug 28, 2024

SelectionDAG: Use qNaN constant if FCANONICALIZE not LegalOrCustom #104564

Closed

RKSimon requested review from arsenm, phoebewang, KanRobert and andykaylor August 28, 2024 13:23

RKSimon reviewed Aug 28, 2024

View reviewed changes

arsenm reviewed Aug 28, 2024

View reviewed changes

wzssyqa reviewed Aug 29, 2024

View reviewed changes

llvm/test/CodeGen/X86/canonicalize-constants.ll Outdated Show resolved Hide resolved

RKSimon reviewed Aug 29, 2024

View reviewed changes

pawan-nirpal-031 and others added 5 commits September 6, 2024 09:26

Move combine operations to DAG combiner over from legalizer, address …

34d5244

…comments

Merge branch 'llvm:main' into main

3c961a8

Merge branch 'llvm:main' into main

74ae03e

addressing review comments, simplify condtions

d405230

Merge branch 'llvm:main' into main

96f7c43

wzssyqa reviewed Sep 6, 2024

View reviewed changes

llvm/test/CodeGen/X86/canonicalize-vars.ll Outdated Show resolved Hide resolved

phoebewang reviewed Sep 6, 2024

View reviewed changes

arsenm reviewed Sep 6, 2024

View reviewed changes

pawan-nirpal-031 and others added 2 commits September 10, 2024 15:11

Removed constant folding for another patch, moving undef canonicalize…

317dd6f

… to generic dag combiner

Merge branch 'llvm:main' into main

06f09f4

phoebewang reviewed Sep 10, 2024

View reviewed changes

RKSimon reviewed Sep 10, 2024

View reviewed changes

llvm/lib/Target/X86/X86ISelLowering.cpp Outdated Show resolved Hide resolved

phoebewang reviewed Sep 12, 2024

View reviewed changes

llvm/lib/Target/X86/X86ISelLowering.cpp Outdated Show resolved Hide resolved

phoebewang reviewed Sep 12, 2024

View reviewed changes

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Show resolved Hide resolved

phoebewang reviewed Sep 12, 2024

View reviewed changes

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Show resolved Hide resolved

phoebewang approved these changes Sep 12, 2024

View reviewed changes

minor refactors

b9d2cf8

RKSimon reviewed Sep 12, 2024

View reviewed changes

arsenm reviewed Sep 13, 2024

View reviewed changes

arsenm requested changes Sep 13, 2024

View reviewed changes

arsenm reviewed Sep 13, 2024

View reviewed changes

pawan-nirpal-031 and others added 5 commits September 18, 2024 13:21

handling vector inputs and moving to lowering

7a77677

Merge branch 'llvm:main' into main

9970720

Merge branch 'llvm:main' into main

a71759d

remove the rouge comment

fa04409

remove the rouge comment

ad86002

pawan-nirpal-031 requested review from arsenm and RKSimon September 18, 2024 14:48

RKSimon approved these changes Sep 23, 2024

View reviewed changes

RKSimon merged commit 26f272e into llvm:main Sep 23, 2024
8 checks passed

arsenm reviewed Sep 23, 2024

View reviewed changes

RKSimon mentioned this pull request Feb 17, 2025

Cannot select llvm.canonicalize.f64 #32650

Closed

		@@ -0,0 +1,210 @@
		; RUN: llc --mcpu=sapphirerapids -mtriple=x86_64 < %s \| FileCheck %s

		if (auto *CFP = dyn_cast<ConstantFPSDNode>(Operand))
		return combineConstantCanonicalize(Node, DAG);

		;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
		; SSE: {{.*}}

		SDValue One =
		DAG.getNode(ISD::SINT_TO_FP, dl, VT, DAG.getConstant(1, dl, MVT::i32));

[X86][SelectionDAG] - Add support for llvm.canonicalize intrinsic #106370

[X86][SelectionDAG] - Add support for llvm.canonicalize intrinsic #106370

Conversation

pawan-nirpal-031 commented Aug 28, 2024

github-actions bot commented Aug 28, 2024

llvmbot commented Aug 28, 2024 • edited Loading

pawan-nirpal-031 commented Aug 28, 2024

github-actions bot commented Aug 28, 2024

RKSimon left a comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

RKSimon left a comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

pawan-nirpal-031 Sep 9, 2024 • edited Loading

Choose a reason for hiding this comment

Choose a reason for hiding this comment

pawan-nirpal-031 Sep 9, 2024 • edited Loading

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

RKSimon Sep 11, 2024 • edited Loading

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

phoebewang left a comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

pawan-nirpal-031 Sep 17, 2024 • edited Loading

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

pawan-nirpal-031 Sep 17, 2024 • edited Loading

Choose a reason for hiding this comment

arsenm left a comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

pawan-nirpal-031 commented Sep 23, 2024

RKSimon left a comment

Choose a reason for hiding this comment

wzssyqa commented Sep 23, 2024

pawan-nirpal-031 commented Sep 23, 2024

github-actions bot commented Sep 23, 2024

Choose a reason for hiding this comment

Choose a reason for hiding this comment

llvmbot commented Aug 28, 2024 •

edited

Loading

pawan-nirpal-031 Sep 9, 2024 •

edited

Loading

pawan-nirpal-031 Sep 9, 2024 •

edited

Loading

RKSimon Sep 11, 2024 •

edited

Loading

pawan-nirpal-031 Sep 17, 2024 •

edited

Loading

pawan-nirpal-031 Sep 17, 2024 •

edited

Loading

pawan-nirpal-031 Sep 23, 2024 •

edited

Loading