pytorch
diff --git a/‎.ci/docker/common/install_onnx.sh‎
Lines changed: 1 addition & 1 deletion b/‎.ci/docker/common/install_onnx.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/pull.yml‎
Lines changed: 15 additions & 15 deletions b/‎.github/workflows/pull.yml‎
Lines changed: 15 additions & 15 deletions
diff --git a/‎aten/src/ATen/SparseCsrTensorUtils.h‎
Lines changed: 2 additions & 2 deletions b/‎aten/src/ATen/SparseCsrTensorUtils.h‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎aten/src/ATen/ThreadLocalState.h‎
Lines changed: 1 addition & 1 deletion b/‎aten/src/ATen/ThreadLocalState.h‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎aten/src/ATen/cuda/CUDAGraph.cpp‎
Lines changed: 1 addition & 1 deletion b/‎aten/src/ATen/cuda/CUDAGraph.cpp‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎aten/src/ATen/cuda/cub.cuh‎
Lines changed: 2 additions & 2 deletions b/‎aten/src/ATen/cuda/cub.cuh‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎aten/src/ATen/functorch/BatchRulesConvolution.cpp‎
Lines changed: 1 addition & 0 deletions b/‎aten/src/ATen/functorch/BatchRulesConvolution.cpp‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎aten/src/ATen/functorch/BatchRulesIndexing.cpp‎
Lines changed: 2 additions & 2 deletions b/‎aten/src/ATen/functorch/BatchRulesIndexing.cpp‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎aten/src/ATen/functorch/BatchRulesModules.cpp‎
Lines changed: 2 additions & 2 deletions b/‎aten/src/ATen/functorch/BatchRulesModules.cpp‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎aten/src/ATen/functorch/BatchRulesRandomness.cpp‎
Lines changed: 3 additions & 3 deletions b/‎aten/src/ATen/functorch/BatchRulesRandomness.cpp‎
Lines changed: 3 additions & 3 deletions
@@ -32,7 +32,7 @@ pip_install coloredlogs packaging
 
 pip_install onnxruntime==1.18.1
 pip_install onnx==1.16.2
-pip_install onnxscript==0.1.0.dev20240831 --no-deps
+pip_install onnxscript==0.1.0.dev20241008 --no-deps
 # required by onnxscript
 pip_install ml_dtypes
 
 
@@ -185,10 +185,10 @@ jobs:
       docker-image-name: pytorch-linux-focal-py3.9-clang10
       test-matrix: |
         { include: [
-          { config: "default", shard: 1, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
-          { config: "default", shard: 2, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
-          { config: "default", shard: 3, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
-          { config: "default", shard: 4, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
+          { config: "default", shard: 1, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
+          { config: "default", shard: 2, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
+          { config: "default", shard: 3, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
+          { config: "default", shard: 4, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
           { config: "crossref", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
           { config: "crossref", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
           { config: "dynamo", shard: 1, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
@@ -217,10 +217,10 @@ jobs:
       docker-image-name: pytorch-linux-focal-py3.11-clang10
       test-matrix: |
         { include: [
-          { config: "default", shard: 1, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
-          { config: "default", shard: 2, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
-          { config: "default", shard: 3, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
-          { config: "default", shard: 4, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
+          { config: "default", shard: 1, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
+          { config: "default", shard: 2, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
+          { config: "default", shard: 3, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
+          { config: "default", shard: 4, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
           { config: "crossref", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
           { config: "crossref", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
           { config: "dynamo", shard: 1, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
@@ -251,10 +251,10 @@ jobs:
       docker-image-name: pytorch-linux-focal-py3.12-clang10
       test-matrix: |
         { include: [
-          { config: "default", shard: 1, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
-          { config: "default", shard: 2, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
-          { config: "default", shard: 3, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
-          { config: "default", shard: 4, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
+          { config: "default", shard: 1, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
+          { config: "default", shard: 2, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
+          { config: "default", shard: 3, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
+          { config: "default", shard: 4, num_shards: 4, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
           { config: "dynamo", shard: 1, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
           { config: "dynamo", shard: 2, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
           { config: "dynamo", shard: 3, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.2xlarge" },
@@ -588,9 +588,9 @@ jobs:
       docker-image-name: pytorch-linux-focal-py3.12-clang10
       test-matrix: |
         { include: [
-          { config: "default", shard: 1, num_shards: 3, runner: "linux.4xlarge" },
-          { config: "default", shard: 2, num_shards: 3, runner: "linux.4xlarge" },
-          { config: "default", shard: 3, num_shards: 3, runner: "linux.4xlarge" },
+          { config: "default", shard: 1, num_shards: 3, runner: "linux.2xlarge" },
+          { config: "default", shard: 2, num_shards: 3, runner: "linux.2xlarge" },
+          { config: "default", shard: 3, num_shards: 3, runner: "linux.2xlarge" },
           { config: "dynamo", shard: 1, num_shards: 3, runner: "linux.2xlarge" },
           { config: "dynamo", shard: 2, num_shards: 3, runner: "linux.2xlarge" },
           { config: "dynamo", shard: 3, num_shards: 3, runner: "linux.2xlarge" },
 
@@ -144,8 +144,8 @@ class CheckSparseTensorInvariants {
   bool old_state;
 
  public:
-  CheckSparseTensorInvariants(bool state) {
-    old_state = at::globalContext().checkSparseTensorInvariants();
+  CheckSparseTensorInvariants(bool state)
+      : old_state(at::globalContext().checkSparseTensorInvariants()) {
     at::globalContext().setCheckSparseTensorInvariants(state);
   }
 
 
@@ -82,7 +82,7 @@ class TORCH_API ThreadLocalState {
     !defined(BUILD_LITE_INTERPRETER)
   // TLS for autocast dtypes
   std::array<at::ScalarType, at::COMPILE_TIME_MAX_DEVICE_TYPES>
-      autocast_dtypes_;
+      autocast_dtypes_{};
 #endif
 
   friend class ThreadLocalStateGuard;
 
@@ -125,7 +125,7 @@ void CUDAGraph::capture_begin(MempoolId_t pool/*=0*/, cudaStreamCaptureMode capt
   // due to the capture status being updated _after_ a capture had already started.
   c10::cuda::CUDACachingAllocator::beginAllocateToPool(capture_dev_, mempool_id_, [this](cudaStream_t stream) {
       cudaStreamCaptureStatus status;
-      CaptureId_t stream_capture_id;
+      CaptureId_t stream_capture_id = 0;
       AT_CUDA_CHECK(cudaStreamGetCaptureInfo(stream, &status, &stream_capture_id));
       return status == cudaStreamCaptureStatus::cudaStreamCaptureStatusActive && stream_capture_id == capture_id_;
   });
 
@@ -234,7 +234,7 @@ inline void inclusive_scan(InputIteratorT input, OutputIteratorT output, ScanOpT
       scan_op,
       num_items,
       at::cuda::getCurrentCUDAStream());
-  C10_CUDA_KERNEL_LAUNCH_CHECK();
+  C10_HIP_KERNEL_LAUNCH_CHECK();
 #else
   // non synchronizing cub call
   // even though cub is supposed to support tensors with int_max elements, in reality it doesn't,
@@ -302,7 +302,7 @@ inline void exclusive_scan(InputIteratorT input, OutputIteratorT output, ScanOpT
       init_value,
       num_items,
       at::cuda::getCurrentCUDAStream());
-  C10_CUDA_KERNEL_LAUNCH_CHECK();
+  C10_HIP_KERNEL_LAUNCH_CHECK();
 #else
   // non synchronizing cub call
   // even though cub is supposed to support tensors with int_max elements, in reality it doesn't,
 
@@ -362,6 +362,7 @@ static std::tuple<Tensor,Tensor,Tensor> convolution_backward_plumbing(
     const Tensor& grad_output_, const Tensor& input_, const Tensor& weight_,
     const c10::OptionalArrayRef<SymInt> bias_sizes_opt,
     c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation, bool transposed,
+    // NOLINTNEXTLINE(performance-unnecessary-value-param)
     c10::SymIntArrayRef output_padding, c10::SymInt groups, std::array<bool, 3> output_mask) {
   const auto maybe_layer = maybeCurrentDynamicLayer();
   vmap_check_escaped(maybe_layer, "convolution_backward_plumbing");
 
@@ -8,7 +8,7 @@
 #include <ATen/core/dispatch/Dispatcher.h>
 #include <ATen/functorch/BatchRulesHelper.h>
 
-namespace at { namespace functorch {
+namespace at::functorch {
 
 #define OP_DECOMPOSE(op)  m.impl(#op, static_cast<decltype(&ATEN_FN(op))>(native::op));
 #define OP_DECOMPOSE2(op, overload)  m.impl(#op"."#overload, static_cast<decltype(&ATEN_FN2(op, overload))>(native::op));
@@ -20,4 +20,4 @@ TORCH_LIBRARY_IMPL(aten, FuncTorchBatched, m) {
   OP_DECOMPOSE(_unsafe_masked_index_put_accumulate);
 }
 
-}}
+}
@@ -226,7 +226,7 @@ static Tensor one_hot_decomposition_hack(const Tensor &self, int64_t num_classes
         if (num_classes <= 0) {
             AT_ERROR("Can not infer total number of classes from empty tensor.");
         } else {
-            shape.push_back(num_classes);
+            shape.emplace_back(num_classes);
             return at::empty_symint(shape, self.options());
         }
     }
@@ -246,7 +246,7 @@ static Tensor one_hot_decomposition_hack(const Tensor &self, int64_t num_classes
     //   TORCH_CHECK(num_classes > self.max().item().toLong(), "Class values must be smaller than num_classes.");
     // }
 
-    shape.push_back(num_classes);
+    shape.emplace_back(num_classes);
     Tensor ret = at::zeros_symint(shape, self.options());
     return ret.scatter(-1, self.unsqueeze(-1), 1);
 }
 
@@ -213,7 +213,7 @@ static std::tuple<Tensor,Tensor> native_dropout_batching_rule(const Tensor& tens
   return std::make_tuple(output, mask);
 }
 
-static Tensor multinomial_batching_rule(const Tensor& self, const int64_t num_samples, const bool replacement, const std::optional<Generator> generator) {
+static Tensor multinomial_batching_rule(const Tensor& self, const int64_t num_samples, const bool replacement, std::optional<Generator> generator) {
   c10::impl::ExcludeDispatchKeyGuard guard(DispatchKey::FuncTorchVmapMode);
   auto maybe_layer = maybeCurrentDynamicLayer();
   const auto cur_level = maybe_layer->layerId();
@@ -237,7 +237,7 @@ static Tensor multinomial_batching_rule(const Tensor& self, const int64_t num_sa
     if (is_2D_case) {
       self_value = reshape_dim_into(0, 0, self_value);
     }
-    auto out = multinomial(self_value, num_samples, replacement, generator);
+    auto out = multinomial(self_value, num_samples, replacement, std::move(generator));
     if (is_2D_case) {
       out = reshape_dim_outof_symint(0, maybe_layer->batchSize(), out);
     }
@@ -249,7 +249,7 @@ static Tensor multinomial_batching_rule(const Tensor& self, const int64_t num_sa
   // Must be same randomness with unbatched input
   // 1D case: S -> multinomial(S) -> S
   // 2D case: MS -> multinomial(MS) -> MS
-  return multinomial(self_value, num_samples, replacement, generator);
+  return multinomial(self_value, num_samples, replacement, std::move(generator));
 }
 
 template <typename A, A a, typename C>
Original file line number	Diff line number	Diff line change
`@@ -144,8 +144,8 @@ class CheckSparseTensorInvariants {`
`144`	`144`	`bool old_state;`
`145`	`145`
`146`	`146`	`public:`
`147`		`- CheckSparseTensorInvariants(bool state) {`
`148`		`- old_state = at::globalContext().checkSparseTensorInvariants();`
	`147`	`+ CheckSparseTensorInvariants(bool state)`
	`148`	`+ : old_state(at::globalContext().checkSparseTensorInvariants()) {`
`149`	`149`	`at::globalContext().setCheckSparseTensorInvariants(state);`
`150`	`150`	`}`
`151`	`151`
Original file line number	Diff line number	Diff line change
`@@ -226,7 +226,7 @@ static Tensor one_hot_decomposition_hack(const Tensor &self, int64_t num_classes`
`226`	`226`	`if (num_classes <= 0) {`
`227`	`227`	`AT_ERROR("Can not infer total number of classes from empty tensor.");`
`228`	`228`	`} else {`
`229`		`- shape.push_back(num_classes);`
	`229`	`+ shape.emplace_back(num_classes);`
`230`	`230`	`return at::empty_symint(shape, self.options());`
`231`	`231`	`}`
`232`	`232`	`}`
`@@ -246,7 +246,7 @@ static Tensor one_hot_decomposition_hack(const Tensor &self, int64_t num_classes`
`246`	`246`	`// TORCH_CHECK(num_classes > self.max().item().toLong(), "Class values must be smaller than num_classes.");`
`247`	`247`	`// }`
`248`	`248`
`249`		`- shape.push_back(num_classes);`
	`249`	`+ shape.emplace_back(num_classes);`
`250`	`250`	`Tensor ret = at::zeros_symint(shape, self.options());`
`251`	`251`	`return ret.scatter(-1, self.unsqueeze(-1), 1);`
`252`	`252`	`}`
Original file line number	Diff line number	Diff line change
`@@ -213,7 +213,7 @@ static std::tuple<Tensor,Tensor> native_dropout_batching_rule(const Tensor& tens`
`213`	`213`	`return std::make_tuple(output, mask);`
`214`	`214`	`}`
`215`	`215`
`216`		`-static Tensor multinomial_batching_rule(const Tensor& self, const int64_t num_samples, const bool replacement, const std::optional<Generator> generator) {`
	`216`	`+static Tensor multinomial_batching_rule(const Tensor& self, const int64_t num_samples, const bool replacement, std::optional<Generator> generator) {`
`217`	`217`	`c10::impl::ExcludeDispatchKeyGuard guard(DispatchKey::FuncTorchVmapMode);`
`218`	`218`	`auto maybe_layer = maybeCurrentDynamicLayer();`
`219`	`219`	`const auto cur_level = maybe_layer->layerId();`
`@@ -237,7 +237,7 @@ static Tensor multinomial_batching_rule(const Tensor& self, const int64_t num_sa`
`237`	`237`	`if (is_2D_case) {`
`238`	`238`	`self_value = reshape_dim_into(0, 0, self_value);`
`239`	`239`	`}`
`240`		`- auto out = multinomial(self_value, num_samples, replacement, generator);`
	`240`	`+ auto out = multinomial(self_value, num_samples, replacement, std::move(generator));`
`241`	`241`	`if (is_2D_case) {`
`242`	`242`	`out = reshape_dim_outof_symint(0, maybe_layer->batchSize(), out);`
`243`	`243`	`}`
`@@ -249,7 +249,7 @@ static Tensor multinomial_batching_rule(const Tensor& self, const int64_t num_sa`
`249`	`249`	`// Must be same randomness with unbatched input`
`250`	`250`	`// 1D case: S -> multinomial(S) -> S`
`251`	`251`	`// 2D case: MS -> multinomial(MS) -> MS`
`252`		`- return multinomial(self_value, num_samples, replacement, generator);`
	`252`	`+ return multinomial(self_value, num_samples, replacement, std::move(generator));`
`253`	`253`	`}`
`254`	`254`
`255`	`255`	`template <typename A, A a, typename C>`