[LoopUnroll] Support parallel reductions for minmax by madhur13490 · Pull Request #182473 · llvm/llvm-project

madhur13490 · 2026-02-20T11:01:02Z

This patch

Supports parallel reductions for min/max operations in LoopUnroller.
Adds relevant test (including intrinsics).
Renames flag -unroll-add-parallel-reduction to -unroll-parallel-reduction.
Relaxes check in IVDescriptors.cpp (getMinMaxRecurrence) to handle out-of-loop uses.

Planning to take support for vector types in the next patch.

llvmbot · 2026-02-20T11:01:36Z

@llvm/pr-subscribers-llvm-analysis

@llvm/pr-subscribers-llvm-transforms

Author: Madhur Amilkanthwar (madhur13490)

Changes

This patch

Supports parallel reductions for min/max operations in LoopUnroller.
Adds relevant test (including intrinsics) and their 8x unroll version.
Renames flag -unroll-add-parallel-reduction to -unroll-parallel-reduction.

Planning to take support for vector types in the next patch.

Patch is 79.96 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/182473.diff

3 Files Affected:

(modified) llvm/lib/Transforms/Utils/LoopUnroll.cpp (+14-13)
(modified) llvm/test/Transforms/LoopUnroll/partial-unroll-reductions.ll (+1-1)
(modified) llvm/test/Transforms/LoopUnroll/runtime-unroll-reductions.ll (+1153-11)

diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
index d9422afe5e82a..315f919060bea 100644
--- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -108,9 +108,9 @@ UnrollVerifyLoopInfo("unroll-verify-loopinfo", cl::Hidden,
 #endif
                     );
 
-static cl::opt<bool> UnrollAddParallelReductions(
-    "unroll-add-parallel-reductions", cl::init(false), cl::Hidden,
-    cl::desc("Allow unrolling to add parallel reduction phis."));
+static cl::opt<bool> UnrollParallelReductions(
+    "unroll-parallel-reductions", cl::init(false), cl::Hidden,
+    cl::desc("Allow unrolling to parallelize reductions."));
 
 /// Check if unrolling created a situation where we need to insert phi nodes to
 /// preserve LCSSA form.
@@ -670,8 +670,8 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
   // to not exit.
   DenseMap<PHINode *, RecurrenceDescriptor> Reductions;
   bool CanAddAdditionalAccumulators =
-      (UnrollAddParallelReductions.getNumOccurrences() > 0
-           ? UnrollAddParallelReductions
+      (UnrollParallelReductions.getNumOccurrences() > 0
+           ? UnrollParallelReductions
            : ULO.AddAdditionalAccumulators) &&
       !CompletelyUnroll && L->getNumBlocks() == 1 &&
       (ULO.Runtime ||
@@ -1102,9 +1102,12 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
         Builder.setFastMathFlags(Reductions.begin()->second.getFastMathFlags());
         RecurKind RK = Reductions.begin()->second.getRecurrenceKind();
         for (Instruction *RdxPart : drop_begin(PartialReductions)) {
-          RdxResult = Builder.CreateBinOp(
-              (Instruction::BinaryOps)RecurrenceDescriptor::getOpcode(RK),
-              RdxPart, RdxResult, "bin.rdx");
+          if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RK))
+            RdxResult = createMinMaxOp(Builder, RK, RdxResult, RdxPart);
+          else
+            RdxResult = Builder.CreateBinOp(
+                (Instruction::BinaryOps)RecurrenceDescriptor::getOpcode(RK),
+                RdxPart, RdxResult, "bin.rdx");
         }
         NeedToFixLCSSA = true;
         for (Instruction *RdxPart : PartialReductions)
@@ -1263,12 +1266,10 @@ llvm::canParallelizeReductionWhenUnrolling(PHINode &Phi, Loop *L,
   if (RdxDesc.hasUsesOutsideReductionChain())
     return std::nullopt;
   RecurKind RK = RdxDesc.getRecurrenceKind();
-  // Skip unsupported reductions.
-  // TODO: Handle additional reductions, including FP and min-max
-  // reductions.
+  // Skip unsupported reductions. Min/max (integer and FP) are supported.
+  // TODO: Handle additional reductions.
   if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK) ||
-      RecurrenceDescriptor::isFindRecurrenceKind(RK) ||
-      RecurrenceDescriptor::isMinMaxRecurrenceKind(RK))
+      RecurrenceDescriptor::isFindRecurrenceKind(RK))
     return std::nullopt;
 
   if (RdxDesc.hasExactFPMath())
diff --git a/llvm/test/Transforms/LoopUnroll/partial-unroll-reductions.ll b/llvm/test/Transforms/LoopUnroll/partial-unroll-reductions.ll
index e5e969d638acd..5aaabb7b73675 100644
--- a/llvm/test/Transforms/LoopUnroll/partial-unroll-reductions.ll
+++ b/llvm/test/Transforms/LoopUnroll/partial-unroll-reductions.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt -p loop-unroll -unroll-add-parallel-reductions -unroll-allow-partial -unroll-max-count=4 -S %s | FileCheck %s
+; RUN: opt -p loop-unroll -unroll-parallel-reductions -unroll-allow-partial -unroll-max-count=4 -S %s | FileCheck %s
 
 define i32 @test_add(ptr %src, i64 %n, i32 %start) {
 ; CHECK-LABEL: define i32 @test_add(
diff --git a/llvm/test/Transforms/LoopUnroll/runtime-unroll-reductions.ll b/llvm/test/Transforms/LoopUnroll/runtime-unroll-reductions.ll
index 840cc6c507c3d..f7cab64ef568e 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-unroll-reductions.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-unroll-reductions.ll
@@ -1,9 +1,9 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt -p loop-unroll -unroll-add-parallel-reductions -S %s | FileCheck %s
+; RUN: opt -p loop-unroll -unroll-parallel-reductions -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -S %s | FileCheck %s
 
 define i32 @test_add_reduction(ptr %a, i64 %n) {
 ; CHECK-LABEL: define i32 @test_add_reduction(
-; CHECK-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) {
+; CHECK-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:  [[ENTRY:.*]]:
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[N]], -1
 ; CHECK-NEXT:    [[XTRAITER:%.*]] = and i64 [[N]], 1
@@ -70,7 +70,7 @@ exit:
 
 define i32 @test_add_reduction_constant_op(ptr %a, i64 %n) {
 ; CHECK-LABEL: define i32 @test_add_reduction_constant_op(
-; CHECK-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) {
+; CHECK-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  [[ENTRY:.*]]:
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[N]], -1
 ; CHECK-NEXT:    [[XTRAITER:%.*]] = and i64 [[N]], 1
@@ -123,7 +123,7 @@ exit:
 
 define i32 @test_add_reduction_8x_unroll(ptr %a, i64 %n) {
 ; CHECK-LABEL: define i32 @test_add_reduction_8x_unroll(
-; CHECK-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) {
+; CHECK-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  [[ENTRY:.*]]:
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[N]], -1
 ; CHECK-NEXT:    [[XTRAITER:%.*]] = and i64 [[N]], 7
@@ -222,7 +222,7 @@ exit:
 
 define <4 x i32> @test_vector_add_reduction(ptr %a, i64 %n) {
 ; CHECK-LABEL: define <4 x i32> @test_vector_add_reduction(
-; CHECK-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) {
+; CHECK-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  [[ENTRY:.*]]:
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[N]], -1
 ; CHECK-NEXT:    [[XTRAITER:%.*]] = and i64 [[N]], 1
@@ -287,9 +287,77 @@ exit:
   ret <4 x i32> %res
 }
 
+; TODO: Add parallel reduction support for vector reduction phis (e.g. <4 x float>
+; fmin via llvm.minnum.v4f32). Currently they are unrolled with a chained
+; reduction only.
+define <4 x float> @test_vector_fmin_reduction_intrinsic(ptr %a, i64 %n) {
+; CHECK-LABEL: define <4 x float> @test_vector_fmin_reduction_intrinsic(
+; CHECK-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[N]], -1
+; CHECK-NEXT:    [[XTRAITER:%.*]] = and i64 [[N]], 1
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 1
+; CHECK-NEXT:    br i1 [[TMP1]], label %[[LOOP_EPIL_PREHEADER:.*]], label %[[ENTRY_NEW:.*]]
+; CHECK:       [[ENTRY_NEW]]:
+; CHECK-NEXT:    [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]]
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[IV_NEXT_1:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[RDX:%.*]] = phi <4 x float> [ splat (float 0x7FF0000000000000), %[[ENTRY_NEW]] ], [ [[RDX_NEXT_1:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[NITER:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[NITER_NEXT_1:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[GEP_A:%.*]] = getelementptr inbounds nuw <4 x float>, ptr [[A]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr [[GEP_A]], align 16
+; CHECK-NEXT:    [[RDX_NEXT:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[RDX]], <4 x float> [[TMP2]])
+; CHECK-NEXT:    [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[GEP_A_1:%.*]] = getelementptr inbounds nuw <4 x float>, ptr [[A]], i64 [[IV_NEXT]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x float>, ptr [[GEP_A_1]], align 16
+; CHECK-NEXT:    [[RDX_NEXT_1]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[RDX_NEXT]], <4 x float> [[TMP3]])
+; CHECK-NEXT:    [[IV_NEXT_1]] = add nuw nsw i64 [[IV]], 2
+; CHECK-NEXT:    [[NITER_NEXT_1]] = add i64 [[NITER]], 2
+; CHECK-NEXT:    [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]]
+; CHECK-NEXT:    br i1 [[NITER_NCMP_1]], label %[[EXIT_UNR_LCSSA:.*]], label %[[LOOP]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK:       [[EXIT_UNR_LCSSA]]:
+; CHECK-NEXT:    [[RES_PH:%.*]] = phi <4 x float> [ [[RDX_NEXT_1]], %[[LOOP]] ]
+; CHECK-NEXT:    [[IV_UNR:%.*]] = phi i64 [ [[IV_NEXT_1]], %[[LOOP]] ]
+; CHECK-NEXT:    [[RDX_UNR:%.*]] = phi <4 x float> [ [[RDX_NEXT_1]], %[[LOOP]] ]
+; CHECK-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; CHECK-NEXT:    br i1 [[LCMP_MOD]], label %[[LOOP_EPIL_PREHEADER]], label %[[EXIT:.*]]
+; CHECK:       [[LOOP_EPIL_PREHEADER]]:
+; CHECK-NEXT:    [[IV_EPIL_INIT:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR]], %[[EXIT_UNR_LCSSA]] ]
+; CHECK-NEXT:    [[RDX_EPIL_INIT:%.*]] = phi <4 x float> [ splat (float 0x7FF0000000000000), %[[ENTRY]] ], [ [[RDX_UNR]], %[[EXIT_UNR_LCSSA]] ]
+; CHECK-NEXT:    [[LCMP_MOD2:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; CHECK-NEXT:    call void @llvm.assume(i1 [[LCMP_MOD2]])
+; CHECK-NEXT:    br label %[[LOOP_EPIL:.*]]
+; CHECK:       [[LOOP_EPIL]]:
+; CHECK-NEXT:    [[GEP_A_EPIL:%.*]] = getelementptr inbounds nuw <4 x float>, ptr [[A]], i64 [[IV_EPIL_INIT]]
+; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x float>, ptr [[GEP_A_EPIL]], align 16
+; CHECK-NEXT:    [[RDX_NEXT_EPIL:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[RDX_EPIL_INIT]], <4 x float> [[TMP4]])
+; CHECK-NEXT:    br label %[[EXIT]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[RES:%.*]] = phi <4 x float> [ [[RES_PH]], %[[EXIT_UNR_LCSSA]] ], [ [[RDX_NEXT_EPIL]], %[[LOOP_EPIL]] ]
+; CHECK-NEXT:    ret <4 x float> [[RES]]
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %rdx = phi <4 x float> [ <float 0x7FF0000000000000, float 0x7FF0000000000000, float 0x7FF0000000000000, float 0x7FF0000000000000>, %entry ], [ %rdx.next, %loop ]
+  %gep.a = getelementptr inbounds nuw <4 x float>, ptr %a, i64 %iv
+  %1 = load <4 x float>, ptr %gep.a, align 16
+  %rdx.next = call <4 x float> @llvm.minnum.v4f32(<4 x float> %rdx, <4 x float> %1)
+  %iv.next = add nuw nsw i64 %iv, 1
+  %ec = icmp eq i64 %iv.next, %n
+  br i1 %ec, label %exit, label %loop, !llvm.loop !0
+
+exit:
+  %res = phi <4 x float> [ %rdx.next, %loop ]
+  ret <4 x float> %res
+}
+
 define float @test_fadd_reduction(ptr %a, i64 %n) {
 ; CHECK-LABEL: define float @test_fadd_reduction(
-; CHECK-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) {
+; CHECK-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  [[ENTRY:.*]]:
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[N]], -1
 ; CHECK-NEXT:    [[XTRAITER:%.*]] = and i64 [[N]], 1
@@ -313,7 +381,7 @@ define float @test_fadd_reduction(ptr %a, i64 %n) {
 ; CHECK-NEXT:    [[IV_NEXT_1]] = add nuw nsw i64 [[IV]], 2
 ; CHECK-NEXT:    [[NITER_NEXT_1]] = add i64 [[NITER]], 2
 ; CHECK-NEXT:    [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]]
-; CHECK-NEXT:    br i1 [[NITER_NCMP_1]], label %[[EXIT_UNR_LCSSA:.*]], label %[[LOOP]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK-NEXT:    br i1 [[NITER_NCMP_1]], label %[[EXIT_UNR_LCSSA:.*]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
 ; CHECK:       [[EXIT_UNR_LCSSA]]:
 ; CHECK-NEXT:    [[RES_PH:%.*]] = phi float [ [[RDX_NEXT_1]], %[[LOOP]] ]
 ; CHECK-NEXT:    [[IV_UNR:%.*]] = phi i64 [ [[IV_NEXT_1]], %[[LOOP]] ]
@@ -356,7 +424,7 @@ exit:
 
 define float @test_fadd_no_reassoc(ptr %a, i64 %n) {
 ; CHECK-LABEL: define float @test_fadd_no_reassoc(
-; CHECK-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) {
+; CHECK-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  [[ENTRY:.*]]:
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[N]], -1
 ; CHECK-NEXT:    [[XTRAITER:%.*]] = and i64 [[N]], 1
@@ -379,7 +447,7 @@ define float @test_fadd_no_reassoc(ptr %a, i64 %n) {
 ; CHECK-NEXT:    [[IV_NEXT_1]] = add nuw nsw i64 [[IV]], 2
 ; CHECK-NEXT:    [[NITER_NEXT_1]] = add i64 [[NITER]], 2
 ; CHECK-NEXT:    [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]]
-; CHECK-NEXT:    br i1 [[NITER_NCMP_1]], label %[[EXIT_UNR_LCSSA:.*]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK-NEXT:    br i1 [[NITER_NCMP_1]], label %[[EXIT_UNR_LCSSA:.*]], label %[[LOOP]], !llvm.loop [[LOOP8:![0-9]+]]
 ; CHECK:       [[EXIT_UNR_LCSSA]]:
 ; CHECK-NEXT:    [[RES_PH:%.*]] = phi float [ [[RDX_NEXT_1]], %[[LOOP]] ]
 ; CHECK-NEXT:    [[IV_UNR:%.*]] = phi i64 [ [[IV_NEXT_1]], %[[LOOP]] ]
@@ -421,7 +489,7 @@ exit:
 
 define float @test_fadd_other_fastmath(ptr %a, i64 %n) {
 ; CHECK-LABEL: define float @test_fadd_other_fastmath(
-; CHECK-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) {
+; CHECK-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  [[ENTRY:.*]]:
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[N]], -1
 ; CHECK-NEXT:    [[XTRAITER:%.*]] = and i64 [[N]], 1
@@ -444,7 +512,7 @@ define float @test_fadd_other_fastmath(ptr %a, i64 %n) {
 ; CHECK-NEXT:    [[IV_NEXT_1]] = add nuw nsw i64 [[IV]], 2
 ; CHECK-NEXT:    [[NITER_NEXT_1]] = add i64 [[NITER]], 2
 ; CHECK-NEXT:    [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]]
-; CHECK-NEXT:    br i1 [[NITER_NCMP_1]], label %[[EXIT_UNR_LCSSA:.*]], label %[[LOOP]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK-NEXT:    br i1 [[NITER_NCMP_1]], label %[[EXIT_UNR_LCSSA:.*]], label %[[LOOP]], !llvm.loop [[LOOP9:![0-9]+]]
 ; CHECK:       [[EXIT_UNR_LCSSA]]:
 ; CHECK-NEXT:    [[RES_PH:%.*]] = phi float [ [[RDX_NEXT_1]], %[[LOOP]] ]
 ; CHECK-NEXT:    [[IV_UNR:%.*]] = phi i64 [ [[IV_NEXT_1]], %[[LOOP]] ]
@@ -484,6 +552,1061 @@ exit:
   ret float %res
 }
 
+define i32 @test_smin_reduction(ptr %a, i64 %n) {
+; CHECK-LABEL: define i32 @test_smin_reduction(
+; CHECK-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[N]], -1
+; CHECK-NEXT:    [[XTRAITER:%.*]] = and i64 [[N]], 1
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 1
+; CHECK-NEXT:    br i1 [[TMP1]], label %[[LOOP_EPIL_PREHEADER:.*]], label %[[ENTRY_NEW:.*]]
+; CHECK:       [[ENTRY_NEW]]:
+; CHECK-NEXT:    [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]]
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[IV_NEXT_1:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[MIN_1:%.*]] = phi i32 [ 2147483647, %[[ENTRY_NEW]] ], [ [[RDX_NEXT_1:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[MIN:%.*]] = phi i32 [ 2147483647, %[[ENTRY_NEW]] ], [ [[RDX_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[NITER:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[NITER_NEXT_1:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[GEP_A:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[GEP_A]], align 4
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[MIN]], [[TMP2]]
+; CHECK-NEXT:    [[RDX_NEXT]] = select i1 [[CMP]], i32 [[MIN]], i32 [[TMP2]]
+; CHECK-NEXT:    [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[GEP_A_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV_NEXT]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[GEP_A_1]], align 4
+; CHECK-NEXT:    [[CMP_1:%.*]] = icmp slt i32 [[MIN_1]], [[TMP3]]
+; CHECK-NEXT:    [[RDX_NEXT_1]] = select i1 [[CMP_1]], i32 [[MIN_1]], i32 [[TMP3]]
+; CHECK-NEXT:    [[IV_NEXT_1]] = add nuw nsw i64 [[IV]], 2
+; CHECK-NEXT:    [[NITER_NEXT_1]] = add i64 [[NITER]], 2
+; CHECK-NEXT:    [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]]
+; CHECK-NEXT:    br i1 [[NITER_NCMP_1]], label %[[EXIT_UNR_LCSSA:.*]], label %[[LOOP]], !llvm.loop [[LOOP10:![0-9]+]]
+; CHECK:       [[EXIT_UNR_LCSSA]]:
+; CHECK-NEXT:    [[RES_PH:%.*]] = phi i32 [ [[RDX_NEXT_1]], %[[LOOP]] ]
+; CHECK-NEXT:    [[IV_UNR:%.*]] = phi i64 [ [[IV_NEXT_1]], %[[LOOP]] ]
+; CHECK-NEXT:    [[MIN_UNR:%.*]] = phi i32 [ [[RDX_NEXT_1]], %[[LOOP]] ]
+; CHECK-NEXT:    [[RDX_MINMAX:%.*]] = call i32 @llvm.smin.i32(i32 [[RDX_NEXT]], i32 [[RDX_NEXT_1]])
+; CHECK-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; CHECK-NEXT:    br i1 [[LCMP_MOD]], label %[[LOOP_EPIL_PREHEADER]], label %[[EXIT:.*]]
+; CHECK:       [[LOOP_EPIL_PREHEADER]]:
+; CHECK-NEXT:    [[IV_EPIL_INIT:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR]], %[[EXIT_UNR_LCSSA]] ]
+; CHECK-NEXT:    [[MIN_EPIL_INIT:%.*]] = phi i32 [ 2147483647, %[[ENTRY]] ], [ [[RDX_MINMAX]], %[[EXIT_UNR_LCSSA]] ]
+; CHECK-NEXT:    [[LCMP_MOD2:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; CHECK-NEXT:    call void @llvm.assume(i1 [[LCMP_MOD2]])
+; CHECK-NEXT:    br label %[[LOOP_EPIL:.*]]
+; CHECK:       [[LOOP_EPIL]]:
+; CHECK-NEXT:    [[GEP_A_EPIL:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV_EPIL_INIT]]
+; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[GEP_A_EPIL]], align 4
+; CHECK-NEXT:    [[CMP_EPIL:%.*]] = icmp slt i32 [[MIN_EPIL_INIT]], [[TMP4]]
+; CHECK-NEXT:    [[RDX_NEXT_EPIL:%.*]] = select i1 [[CMP_EPIL]], i32 [[MIN_EPIL_INIT]], i32 [[TMP4]]
+; CHECK-NEXT:    br label %[[EXIT]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[RES:%.*]] = phi i32 [ [[RDX_MINMAX]], %[[EXIT_UNR_LCSSA]] ], [ [[RDX_NEXT_EPIL]], %[[LOOP_EPIL]] ]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %min = phi i32 [ 2147483647, %entry ], [ %rdx.next, %loop ]
+  %gep.a = getelementptr inbounds i32, ptr %a, i64 %iv
+  %1 = load i32, ptr %gep.a, align 4
+  %cmp = icmp slt i32 %min, %1
+  %rdx.next = select i1 %cmp, i32 %min, i32 %1
+  %iv.next = add nuw nsw i64 %iv, 1
+  %ec = icmp eq i64 %iv.next, %n
+  br i1 %ec, label %exit, label %loop, !llvm.loop !0
+
+exit:
+  %res = phi i32 [ %rdx.next, %loop ]
+  ret i32 %res
+}
+
+define i32 @test_smax_reduction(ptr %a, i64 %n) {
+; CHECK-LABEL: define i32 @test_smax_reduction(
+; CHECK-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[N]], -1
+; CHECK-NEXT:    [[XTRAITER:%.*]] = and i64 [[N]], 1
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 1
+; CHECK-NEXT:    br i1 [[TMP1]], label %[[LOOP_EPIL_PREHEADER:.*]], label %[[ENTRY_NEW:.*]]
+; CHECK:       [[ENTRY_NEW]]:
+; CHECK-NEXT:    [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]]
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[IV_NEXT_1:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[MAX_1:%.*]] = phi i32 [ -2147483648, %[[ENTRY_NEW]] ], [ [[RDX_NEXT_1:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[MAX:%.*]] = phi i32 [ -2147483648, %[[ENTRY_NEW]] ], [ [[RDX_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[NITER:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[NITER_NEXT_1:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[GEP_A:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[GEP_A]], align 4
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[MAX]], [[TMP2]]
+; CHECK-NEXT:    [[RDX_NEXT]] = select i1 [[CMP]], i32 [[MAX]], i32 [[TMP2]]
+; CHECK-NEXT:    [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[GEP_A_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV_NEXT]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[GEP_A_1]], align 4
+; CHECK-NEXT:    [[CMP_1:%.*]] = icmp sgt i32 [[MAX_1]], [[TMP3]]
+; CHECK-NEXT:    [[RDX_NEXT_1]] = select i1 [[CMP_1]], i32 [[MAX_1]], i32 [[TMP3]]
+; CHECK-NEXT:    [[IV_NEXT_1]] = add nuw nsw i64 [[IV]], 2
+; CHECK-NEXT:    [[NITER_NEXT_1]] = add i64 [[NITER]], 2
+; CHECK-NEXT:    [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]]
+; CHECK-NEXT:    br i1 [[NITER_NCMP_1]], label %[[EXIT_UNR_LCSSA:.*]], label %[[LOOP]], !llvm.loop [[LOOP11:![0-9]+]]
+; CHECK:       [[EXIT_UNR_LCSSA]]:
+; CHECK-NEXT:    [[RES_PH:%.*]] = phi i32 [ [[RDX_NEXT_1]], %[[LOOP]] ]
+; CHECK-NEXT:    [[IV_UNR:%.*]] = phi i64 [ [[IV_NEXT_1]], %[[LOOP]] ]
+; CHECK-NEXT:    [[MAX_UNR:%.*]] = phi i32 [ [[RDX_NEXT_1]], %[[LOOP]] ]
+; CHECK-NEXT:    [[RDX_MINMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[RDX_NEXT]], i32 [[RDX_NEXT_1]])
+; CHECK-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; CHECK-NEXT:    br i1 [[LCMP_MOD]], label %[[LOOP_EPIL_PREHEADER]], label %[[EXIT:.*]]
+; CHECK:       [[LOOP_EPIL_PREHEADER]]:
+; CHECK-NEXT:    [[IV_EPIL_INIT:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR]], %[[EXIT_UNR_LCSSA]] ]
+; CHECK-NEXT:    [[MAX_EPIL_INIT:%.*]] = phi i32 [ -2147483648, %[[ENTRY]] ], [ [[RDX_MINMAX]], %[[EXIT_UNR_LCSSA]] ]
+; CHECK-NEXT:    [[LCMP_MOD2:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; CHECK-NEXT:    call void @llvm.assume(i1 [[LCMP_MOD2]])
+; CHECK-NEXT:    br label %[[LOOP_EPIL:.*]]
+; CHECK...
[truncated]

fhahn · 2026-02-23T12:25:00Z

@@ -484,6 +552,1061 @@ exit:
  ret float %res
 }

+define i32 @test_smin_reduction(ptr %a, i64 %n) {


probably best to move the min/max reductiosn to a separate test file, as this one is growing quite big

fhahn · 2026-02-23T12:26:41Z

@@ -1,9 +1,9 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt -p loop-unroll -unroll-add-parallel-reductions -S %s | FileCheck %s
+; RUN: opt -p loop-unroll -unroll-parallel-reductions -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -S %s | FileCheck %s


Why use -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math? Can we instead have tests with and without the needed fast-math flags, to make sure we handle them correctly?

Do you mean to use nnan ninf nsz on select and instrinsics?

Thanks. BTW, in case you didn't notice, tests with intrinsics are still chained. Either I can add TODO to handle them or try harder to fix in this patch. (I don't see a clear way in the code to fix this unless I am missing something already there)

Ping @fhahn! Are you okay to have FIXME/TODO comments for min/max intrinsic tests in this patch and let me handle them in a separate patch?

It's better to have the necessary tests in place here, in this patch.

yes, please update with the split tests

sjoerdmeijer · 2026-03-17T08:54:18Z

Taking a first look at this, catching up on this.... looks like there's one unresolved comment from Florian:

probably best to move the min/max reductiosn to a separate test file, as this one is growing quite big

madhur13490 · 2026-03-17T08:57:09Z

Taking a first look at this, catching up on this.... looks like there's one unresolved comment from Florian:

probably best to move the min/max reductiosn to a separate test file, as this one is growing quite big

Yes, I have this addressed locally; I don't have any objections. I am waiting for @fhahn to comment on the other comment. Based on the response, will update the PR.

sjoerdmeijer · 2026-03-17T09:09:05Z

Yes, I have this addressed locally; I don't have any objections. I am waiting for @fhahn to comment on the other comment. Based on the response, will update the PR.

Ok, I think it's better just to upload the latest revision, and I've left a remark above about the test.

github-actions · 2026-03-18T08:11:21Z

🐧 Linux x64 Test Results

196046 tests passed
5292 tests skipped

✅ The build succeeded and all tests passed.

github-actions · 2026-03-18T08:11:21Z

🪟 Windows x64 Test Results

135284 tests passed
3343 tests skipped

✅ The build succeeded and all tests passed.

madhur13490 · 2026-03-18T10:15:40Z

      auto *I = dyn_cast<Instruction>(U);
-      if (!I || (!TheLoop->contains(I) &&
-                 (V != BackedgeValue || ++OutOfLoopUses > 1)))
+      if (!I || (!TheLoop->contains(I) && V != BackedgeValue))


Relaxing the strict check to allow out-of-loop uses.

This is unrelated to the patch, right? If so, please split off with separate tests. It will also need LoopVectorize test to make sure this is handled correctly there.

Unfortunately not. Without this isReductionPHI() returns false as getMinMaxRecurrence() fails to return true.

Ping @fhahn. Can you please let me know what you think about the patch?

Right, this is needed because we query after we already (partial) transformed the input so there are extra uses? Can we check earlier?

If not, would be good to split this off + a loop-vectorize test

I prefer to split. Please have a look at #189906

For more context of this patch, please see llvm#182473. By relaxing, OutOfLoopUses check, we can recognize more case for minmax reduction and thus vectorize.

For more context of this patch, please see llvm#182473. By relaxing, `OutOfLoopUses` check, we can recognize more case for minmax reduction and thus vectorize.

For more context of this patch, please see #182473. By relaxing, `OutOfLoopUses` check, we can recognize more case for minmax reduction and thus vectorize.

For more context of this patch, please see llvm#182473. By relaxing, `OutOfLoopUses` check, we can recognize more case for minmax reduction and thus vectorize.

fhahn

Do this need a rebase after #189906 landed?

This patch * Supports parallel reductions for min/max operations in LoopUnroller. * Adds relevant test (including intrinsics) and their 8x unroll version. * Renames flag -unroll-add-parallel-reduction to -unroll-parallel-reduction. Planning to take support for vector types in the next patch.

madhur13490 · 2026-05-29T06:07:12Z

Do this need a rebase after #189906 landed?

Thanks for resurrecting this PR. It was buried in my list. I rebased and everything looks as expected.

fhahn · 2026-05-31T19:30:46Z

+static cl::opt<bool> UnrollParallelReductions(
+    "unroll-parallel-reductions", cl::init(false), cl::Hidden,
+    cl::desc("Allow unrolling to parallelize reductions."));


Renaming/changing option is unrelated to support for min/max, would be good to split off, although renaming the option may break some users.

Hmm..but we no longer support only add and that is the reason I changed the flag. I am keeping it unchanged but I don't mind if this is appropriate for a new patch.

fhahn · 2026-05-31T19:35:04Z

+  // Skip unsupported reductions. Min/max (integer and FP) are supported.
+  // TODO: Handle additional reductions.


Suggested change

// Skip unsupported reductions. Min/max (integer and FP) are supported.

// TODO: Handle additional reductions.

// Skip unsupported reductions.

// TODO: Handle any-of and find-last reductions.

We support all bin-op reductions and min/max (with the patch).

Remaining cases are any-of and findlast, not sure if there is anything that makes them difficult to support though

fhahn · 2026-05-31T19:38:33Z

+
+; -------------------------------------------------------------
+; floating-point min and max reductions without fast-math flags.
+; These are currently chained together.


I think without fast math flags we cannot parallelize them, better drop the currently to avoid potential confusion

Suggested change

; These are currently chained together.

; These are chained together sequentially.

fhahn · 2026-05-31T19:40:29Z

+  %min = phi float [ 0x7FF0000000000000, %entry ], [ %rdx.next, %loop ]
+  %gep.a = getelementptr inbounds float, ptr %a, i64 %iv
+  %1 = load float, ptr %gep.a, align 4
+  %rdx.next = call nnan ninf nsz float @llvm.minnum.f32(float %min, float %1)


for completeness, could you also add a test with umin/umax/smin intrinsics?

done. (We may need to revisit this file in future to have better logical groupings of the tests)

fhahn · 2026-05-31T19:40:49Z

+;.
+; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]]}
+; CHECK: [[META1]] = !{!"llvm.loop.unroll.disable"}
+; CHECK: [[LOOP2]] = distinct !{[[LOOP2]], [[META1]]}
+; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]}
+; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]]}
+; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]]}
+; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]]}
+; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]]}
+; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]]}
+; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META1]]}
+;.


can generate checks with --check-globals=none to remove those

fhahn

LGTM, thanks

fhahn · 2026-06-01T13:26:58Z

+  %max = phi i32 [ 0, %entry ], [ %rdx.next, %loop ]
+  %gep.a = getelementptr inbounds i32, ptr %a, i64 %iv
+  %1 = load i32, ptr %gep.a, align 4
+  %rdx.next = call i32 @llvm.umax.i32(i32 %max, i32 %1)


hmm, surprised we do not support this form yet, as I thought the IVDescriptors logic would match it.

Yes, it does indeed but we need something like #200868. I will merge this one and rebase #200868.

…max.ll Co-authored-by: Florian Hahn <[email protected]>

llvm-ci · 2026-06-01T17:57:42Z

LLVM Buildbot has detected a new failure on builder profcheck running on profcheck-b2 while building llvm at step 5 "annotate".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/223/builds/4214

Here is the relevant piece of the build log for the reference

Step 5 (annotate) failure: '../llvm-zorg/zorg/buildbot/builders/annotated/profcheck.sh --jobs=64' (failure)
...
UNSUPPORTED: LLVM :: MC/Disassembler/Xtensa/windowed.txt (44872 of 44882)
UNSUPPORTED: LLVM :: MC/M68k/Data/Classes/MxMoveCCR.s (44873 of 44882)
UNSUPPORTED: LLVM :: MC/M68k/Data/Classes/MxMove_RR.s (44874 of 44882)
UNSUPPORTED: LLVM :: MC/Xtensa/Core/branch.s (44875 of 44882)
UNSUPPORTED: LLVM :: tools/gold/X86/emit-llvm.ll (44876 of 44882)
UNSUPPORTED: LLVM :: CodeGen/DirectX/ContainerData/RootSignature-RootDescriptor-Invalid-Flags-LargeNumber.ll (44877 of 44882)
UNSUPPORTED: LLVM :: CodeGen/DirectX/ContainerData/RootSignature-StaticSamplers_V3.ll (44878 of 44882)
UNSUPPORTED: LLVM :: CodeGen/DirectX/cosh_error.ll (44879 of 44882)
UNSUPPORTED: LLVM :: CodeGen/DirectX/forward_handle_on_alloca.ll (44880 of 44882)
UNSUPPORTED: LLVM :: CodeGen/SystemZ/Large/branch-range-06.py (44881 of 44882)
FAIL: LLVM :: Transforms/LoopUnroll/runtime-unroll-reductions-min-max.ll (44882 of 44882)
********************
Failed Tests (1):
  LLVM :: Transforms/LoopUnroll/runtime-unroll-reductions-min-max.ll


Testing Time: 204.94s

Total Discovered Tests: 54568
  Excluded   :   305 (0.56%)
  Skipped    :    19 (0.03%)
  Unsupported:  1134 (2.08%)
  Passed     : 53109 (97.33%)
  Failed     :     1 (0.00%)
FAILED: test/CMakeFiles/check-llvm /b/profcheck-build/build/test/CMakeFiles/check-llvm 
cd /b/profcheck-build/build/test && /usr/bin/python3 /b/profcheck-build/build/./bin/llvm-lit --exclude-xfail /b/profcheck-build/build/test
ninja: build stopped: subcommand failed.
Step 7 (Ninja) failure: Ninja (failure)
...
UNSUPPORTED: LLVM :: MC/Disassembler/Xtensa/windowed.txt (44872 of 44882)
UNSUPPORTED: LLVM :: MC/M68k/Data/Classes/MxMoveCCR.s (44873 of 44882)
UNSUPPORTED: LLVM :: MC/M68k/Data/Classes/MxMove_RR.s (44874 of 44882)
UNSUPPORTED: LLVM :: MC/Xtensa/Core/branch.s (44875 of 44882)
UNSUPPORTED: LLVM :: tools/gold/X86/emit-llvm.ll (44876 of 44882)
UNSUPPORTED: LLVM :: CodeGen/DirectX/ContainerData/RootSignature-RootDescriptor-Invalid-Flags-LargeNumber.ll (44877 of 44882)
UNSUPPORTED: LLVM :: CodeGen/DirectX/ContainerData/RootSignature-StaticSamplers_V3.ll (44878 of 44882)
UNSUPPORTED: LLVM :: CodeGen/DirectX/cosh_error.ll (44879 of 44882)
UNSUPPORTED: LLVM :: CodeGen/DirectX/forward_handle_on_alloca.ll (44880 of 44882)
UNSUPPORTED: LLVM :: CodeGen/SystemZ/Large/branch-range-06.py (44881 of 44882)
FAIL: LLVM :: Transforms/LoopUnroll/runtime-unroll-reductions-min-max.ll (44882 of 44882)
********************
Failed Tests (1):
  LLVM :: Transforms/LoopUnroll/runtime-unroll-reductions-min-max.ll


Testing Time: 204.94s

Total Discovered Tests: 54568
  Excluded   :   305 (0.56%)
  Skipped    :    19 (0.03%)
  Unsupported:  1134 (2.08%)
  Passed     : 53109 (97.33%)
  Failed     :     1 (0.00%)
FAILED: test/CMakeFiles/check-llvm /b/profcheck-build/build/test/CMakeFiles/check-llvm 
cd /b/profcheck-build/build/test && /usr/bin/python3 /b/profcheck-build/build/./bin/llvm-lit --exclude-xfail /b/profcheck-build/build/test
ninja: build stopped: subcommand failed.
program finished with exit code 1
elapsedTime=234.666619

#200892) This reverts commit 1e79ea1.

madhur13490 requested review from fhahn and huntergr-arm February 20, 2026 11:01

llvmbot added the llvm:transforms label Feb 20, 2026

fhahn reviewed Feb 23, 2026

View reviewed changes

sjoerdmeijer requested a review from davemgreen March 17, 2026 08:43

madhur13490 force-pushed the par-min-max branch from 01a956a to 6bcc8a0 Compare March 18, 2026 08:39

llvmbot added the llvm:analysis Includes value tracking, cost tables and constant folding label Mar 18, 2026

madhur13490 commented Mar 18, 2026

View reviewed changes

madhur13490 mentioned this pull request Apr 1, 2026

[LV] Relax OutOfLoopUses check in getMinMaxRecurrence() #189906

Merged

fhahn reviewed May 28, 2026

View reviewed changes

madhur13490 added 4 commits May 28, 2026 22:43

fixup! address review comments

3a4393f

fixup! fix test failure.

955e5f3

fixup! rebase after recent fix

4e9388a

madhur13490 force-pushed the par-min-max branch from 8819f12 to 4e9388a Compare May 29, 2026 06:05

fhahn reviewed May 31, 2026

View reviewed changes

fixup! address review comments

d39be66

madhur13490 requested a review from fhahn June 1, 2026 06:02

fhahn approved these changes Jun 1, 2026

View reviewed changes

Update llvm/test/Transforms/LoopUnroll/runtime-unroll-reductions-min-…

35f6118

…max.ll Co-authored-by: Florian Hahn <[email protected]>

madhur13490 enabled auto-merge (squash) June 1, 2026 17:05

madhur13490 merged commit 1e79ea1 into llvm:main Jun 1, 2026
9 of 10 checks passed

madhur13490 added a commit that referenced this pull request Jun 1, 2026

Revert "[LoopUnroll] Support parallel reductions for minmax (#182473)" (

56ccbc2

#200892) This reverts commit 1e79ea1.

madhur13490 mentioned this pull request Jun 2, 2026

Reapply "[LoopUnroll] Support parallel reductions for minmax" #201010

Open

adams381 mentioned this pull request Jun 4, 2026

[CIR] Lower constant block addresses for goto #201644

Open

		// Skip unsupported reductions. Min/max (integer and FP) are supported.
		// TODO: Handle additional reductions.

	; These are currently chained together.
	; These are chained together sequentially.

Conversation

madhur13490 commented Feb 20, 2026 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Feb 20, 2026 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

sjoerdmeijer commented Mar 17, 2026

Uh oh!

madhur13490 commented Mar 17, 2026

Uh oh!

sjoerdmeijer commented Mar 17, 2026

Uh oh!

github-actions Bot commented Mar 18, 2026 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

🐧 Linux x64 Test Results

Uh oh!

github-actions Bot commented Mar 18, 2026 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

🪟 Windows x64 Test Results

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

fhahn left a comment

Choose a reason for hiding this comment

Uh oh!

madhur13490 commented May 29, 2026

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

fhahn left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

madhur13490 commented Feb 20, 2026 •

edited

Loading

llvmbot commented Feb 20, 2026 •

edited

Loading

github-actions Bot commented Mar 18, 2026 •

edited

Loading

github-actions Bot commented Mar 18, 2026 •

edited

Loading