pytorch · Isalia20 · Feb 9, 2025 · Feb 9, 2025 · Feb 12, 2025 · Feb 12, 2025
@@ -685,7 +685,7 @@ TORCH_META_FUNC(linalg_cholesky_ex)(const Tensor& A,
   auto ndim = A_shape.size();
 
   // L
-  auto L_strides = at::native::batched_matrix_contiguous_strides(A_shape, /*f-contig*=*/true);
+  auto L_strides = at::native::batched_matrix_contiguous_strides(A_shape, /*f-contig*=*/A.device().type() != at::kMPS);
   set_output_strided(0, A_shape, L_strides, A.options(), {});
 
   // info

@@ -83,7 +83,7 @@ inline float blockReduceSum(
 
 kernel void factorDiagonalBlock(
     device float* A [[buffer(0)]],
-    device int* success [[buffer(1)]],
+    device int* info [[buffer(1)]],
     constant uint& N [[buffer(2)]],
     constant uint& NB [[buffer(3)]],
     constant uint& k [[buffer(4)]],
@@ -142,7 +142,7 @@ kernel void factorDiagonalBlock(
     if (linear_tid == 0) {
       float diagVal = tile[kk][kk] - diagElt;
       if (diagVal <= 0.0f) {
-        success[bid.x] = 0;
+        info[bid.x] = kk + 1;
         return;
       }
       tile[kk][kk] = sqrt(diagVal);

@@ -20,6 +20,7 @@
 #include <ATen/ops/baddbmm_native.h>
 #include <ATen/ops/bmm_native.h>
 #include <ATen/ops/cholesky_native.h>
+#include <ATen/ops/linalg_cholesky_ex_native.h>
 #include <ATen/ops/linalg_cholesky_native.h>
 #include <ATen/ops/linalg_lu_factor_ex_native.h>
 #include <ATen/ops/linalg_lu_factor_native.h>
@@ -1051,7 +1052,11 @@ static void lu_unpack_mps_impl(const Tensor& LU_data,
   }
 }
 
-static Tensor& linalg_cholesky_mps_impl(const Tensor& input, bool upper, Tensor& out) {
+static void linalg_cholesky_mps_impl(const Tensor& input,
+                                     bool upper,
+                                     bool check_errors,
+                                     const Tensor& out,
+                                     const Tensor& info) {
   using namespace mps;
 
   TORCH_CHECK(out.is_mps());
@@ -1061,9 +1066,11 @@ static void lu_unpack_mps_impl(const Tensor& LU_data,
 
   if (input.numel() == 0 || out.numel() == 0) {
     out.zero_();
-    return out;
+    return;
   }
-  resize_output(out, input.sizes());
+  auto input_sizes = input.sizes();
+  resize_output(out, input_sizes);
+  resize_output(info, {input_sizes.begin(), input_sizes.end() - 2});
   out.copy_(input);
 
   int64_t ndim = out.dim();
@@ -1083,14 +1090,16 @@ static void lu_unpack_mps_impl(const Tensor& LU_data,
   int64_t NB = std::min<int64_t>(32, N);
   int64_t numBlocks = (N + NB - 1) / NB;
 
-  Tensor success = at::empty({B}, input.options().dtype(kInt)).fill_(1);
+  auto info_ = info.dim() >= 2 ? info.view({B}) : info;
+  auto info_sizes = info.sizes();
+  info_.fill_(0);
 
   MTLSize threadGroupSize = MTLSizeMake(32, 8, 1);
 
   @autoreleasepool {
     dispatch_sync_with_rethrow(stream->queue(), ^() {
       auto computeEncoder = stream->commandEncoder();
-      mtl_setArgs(computeEncoder, out, success, N, NB);
+      mtl_setArgs(computeEncoder, out, info_, N, NB);
       for (int64_t k = 0; k < numBlocks; k++) {
         [computeEncoder setComputePipelineState:factorDiagonalPSO];
         mtl_setBytes(computeEncoder, k, 4);
@@ -1118,10 +1127,32 @@ static void lu_unpack_mps_impl(const Tensor& LU_data,
       }
     });
   }
-
-  TORCH_CHECK(success.all().item<bool>(), "linalg.cholesky: Input matrix is not positive definite");
-  out.tril_(); //
-  return upper ? out.transpose_(ndim - 2, ndim - 1) : out;
+  int status;
+  if (check_errors) {
+    if (info_.dim() > 0) {
+      // batch case
+      for (const auto i : c10::irange(B)) {
+        status = info_[i].item<int>();
+        TORCH_CHECK(
+            status == 0,
+            "linalg.cholesky(): (Batch element ",
+            i,
+            "):  The factorization could not be completed because the input is not positive-definite (the leading minor of order ",
+            status,
+            " is not positive-definite).");
+      }
+    } else {
+      // single matrix case(no batch size)
+      status = info.item<int>();
+      TORCH_CHECK(
+          status == 0,
+          "linalg.cholesky(): The factorization could not be completed because the input is not positive-definite (the leading minor of order ",
+          status,
+          " is not positive-definite).");
+    }
+  }
+  out.tril_();
+  upper ? out.transpose_(ndim - 2, ndim - 1) : out;
 }
 } // namespace mps
 
@@ -1285,21 +1316,19 @@ Tensor addr_mps(const Tensor& self, const Tensor& vec1, const Tensor& vec2, cons
 
 Tensor cholesky_mps(const Tensor& self, bool upper) {
   auto out = at::empty_like(self, MemoryFormat::Contiguous);
-  mps::linalg_cholesky_mps_impl(self, upper, out);
+  cholesky_mps_out(self, upper, out);
   return out;
 }
 
 Tensor& cholesky_mps_out(const Tensor& self, bool upper, Tensor& out) {
-  return mps::linalg_cholesky_mps_impl(self, upper, out);
-}
-
-Tensor& linalg_cholesky_out_mps(const Tensor& self, bool upper, Tensor& out) {
-  return mps::linalg_cholesky_mps_impl(self, upper, out);
+  auto info = at::empty({}, self.options().dtype(kInt));
+  mps::linalg_cholesky_mps_impl(self, upper, true, out, info);
+  return out;
 }
 
-Tensor linalg_cholesky_mps(const Tensor& self, bool upper) {
-  auto out = at::empty_like(self, MemoryFormat::Contiguous);
-  return mps::linalg_cholesky_mps_impl(self, upper, out);
+TORCH_IMPL_FUNC(linalg_cholesky_ex_out_mps)
+(const Tensor& self, bool upper, bool check_errors, const Tensor& L, const Tensor& info) {
+  mps::linalg_cholesky_mps_impl(self, upper, check_errors, L, info);
 }
 
 Tensor addbmm_mps(const Tensor& self,

diff --git a/aten/src/ATen/native/native_functions.yaml b/aten/src/ATen/native/native_functions.yaml
@@ -13901,18 +13901,13 @@
   structured: True
   dispatch:
     CPU, CUDA: linalg_cholesky_ex_out
+    MPS: linalg_cholesky_ex_out_mps
 
 - func: linalg_cholesky(Tensor self, *, bool upper=False) -> Tensor
   python_module: linalg
-  dispatch:
-    CompositeImplicitAutograd: linalg_cholesky
-    MPS: linalg_cholesky_mps
 
 - func: linalg_cholesky.out(Tensor self, *, bool upper=False, Tensor(a!) out) -> Tensor(a!)
   python_module: linalg
-  dispatch:
-    CompositeImplicitAutograd: linalg_cholesky_out
-    MPS: linalg_cholesky_out_mps
 
 - func: linalg_cross(Tensor self, Tensor other, *, int dim=-1) -> Tensor
   python_module: linalg

@@ -699,7 +699,6 @@ def mps_ops_modifier(ops):
         'index_reduceamin': None,
         'kthvalue': None,
         'lcm': None,
-        'linalg.cholesky_ex': None,
         'linalg.cond': None,
         'linalg.eigh': None,
         'linalg.eigvalsh': None,
@@ -6525,14 +6524,23 @@ def test_sort(self):
                 atol=0, rtol=0
             )
 
-    def test_cholesky(self):
+    def test_linalg_cholesky(self):
         from torch.testing._internal.common_utils import random_hermitian_pd_matrix
 
-        def run_cholesky_test(size, *batch_dims, upper):
+        def run_cholesky_test(size, *batch_dims, upper=False, check_errors=False):
+            if check_errors:
+                # expect failure for non-positive definite matrix
+                input_mps = torch.eye(size, dtype=torch.float32, device="mps")
+                input_mps[0, 0] = -1
+                error_msg = r'The factorization could not be completed because the input is not positive-definite'
+                with self.assertRaisesRegex(RuntimeError, error_msg):
+                    torch.linalg.cholesky_ex(input_mps, upper=upper, check_errors=check_errors)
+                return
+            # output checks for positive definite matrix
             input_cpu = random_hermitian_pd_matrix(size, *batch_dims, dtype=torch.float32, device="cpu")
             input_mps = input_cpu.to('mps')
-            output_cpu = torch.linalg.cholesky(input_cpu, upper=upper)
-            output_mps = torch.linalg.cholesky(input_mps, upper=upper)
+            output_cpu = torch.linalg.cholesky_ex(input_cpu, upper=upper)
+            output_mps = torch.linalg.cholesky_ex(input_mps, upper=upper)
             self.assertEqual(output_cpu, output_mps, atol=2e-5, rtol=1e-6)
 
         # test with different even/odd matrix sizes
@@ -6548,6 +6556,18 @@ def run_cholesky_test(size, *batch_dims, upper):
         # test >3D matrices
         run_cholesky_test(128, 10, 10, upper=False)
         run_cholesky_test(128, 2, 2, 2, 2, 10, 10, upper=True)
+        run_cholesky_test(32, 2, upper=False, check_errors=True)
+        run_cholesky_test(32, 2, upper=True, check_errors=True)
+
+    def test_linalg_cholesky_info(self):
+        # non psd matrix with leading minor of order 2 being not positive definite
+        A = torch.tensor([
+            [4.0, 1.0, 0.0],
+            [1.0, -2.0, 1.0],
+            [0.0, 1.0, 3.0]
+        ], device="mps")
+        with self.assertRaisesRegex(RuntimeError, r'leading minor of order 2 is not positive-definite'):
+            torch.linalg.cholesky_ex(A, check_errors=True)
 
     def test_upsample_nearest2d(self):
         def helper(N, C, H, W, memory_format):

@@ -410,11 +410,6 @@
   self: cholesky_backward(grad, upper, L)
   L: cholesky_jvp(self_t, L, upper)
 
-# temporarily here before linalg_cholesky dispatches to linalg_cholesky_ex on MPS device
-- name: linalg_cholesky(Tensor self, *, bool upper=False) -> Tensor
-  self: cholesky_backward(grad, upper, result)
-  result: cholesky_jvp(self_t, result, upper)
-
 - name: cholesky_solve(Tensor self, Tensor input2, bool upper=False) -> Tensor
   self, input2: cholesky_solve_backward(grad, self, input2, result, upper, grad_input_mask)
   result: cholesky_solve_jvp(result, input2_p, input2_t, self_t, upper)