pytorch
diff --git a/‎.ci/docker/ci_commit_pins/executorch.txt‎
Lines changed: 1 addition & 1 deletion b/‎.ci/docker/ci_commit_pins/executorch.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/docker/ci_commit_pins/nccl-cu11.txt‎
Lines changed: 1 addition & 0 deletions b/‎.ci/docker/ci_commit_pins/nccl-cu11.txt‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.ci/docker/ci_commit_pins/nccl-cu12.txt‎
Lines changed: 1 addition & 0 deletions b/‎.ci/docker/ci_commit_pins/nccl-cu12.txt‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.ci/docker/common/install_base.sh‎
Lines changed: 4 additions & 0 deletions b/‎.ci/docker/common/install_base.sh‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎.ci/docker/common/install_cuda.sh‎
Lines changed: 4 additions & 3 deletions b/‎.ci/docker/common/install_cuda.sh‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎.ci/docker/common/install_cuda_aarch64.sh‎
Lines changed: 1 addition & 1 deletion b/‎.ci/docker/common/install_cuda_aarch64.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/docker/common/install_cudnn.sh‎
Lines changed: 3 additions & 1 deletion b/‎.ci/docker/common/install_cudnn.sh‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎.ci/docker/common/install_onnx.sh‎
Lines changed: 1 addition & 1 deletion b/‎.ci/docker/common/install_onnx.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/pytorch/smoke_test/smoke_test.py‎
Lines changed: 1 addition & 1 deletion b/‎.ci/pytorch/smoke_test/smoke_test.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/pytorch/test.sh‎
Lines changed: 1 addition & 0 deletions b/‎.ci/pytorch/test.sh‎
Lines changed: 1 addition & 0 deletions
@@ -1 +1 @@
-41e7ffa8b7ff09206aa5b9b5c1bbd82b9e0ff277
+8148603e3f3a618acef447a73bdeec9b749a95fb
@@ -0,0 +1 @@
+v2.21.5-1
@@ -0,0 +1 @@
+v2.25.1-1
@@ -32,8 +32,12 @@ install_ubuntu() {
 
   # HACK: UCC testing relies on libnccl library from NVIDIA repo, and version 2.16 crashes
   # See https://github.com/pytorch/pytorch/pull/105260#issuecomment-1673399729
+  # TODO: Eliminate this hack, we should not relay on apt-get installation
+  # See https://github.com/pytorch/pytorch/issues/144768
   if [[ "$UBUNTU_VERSION" == "20.04"* && "$CUDA_VERSION" == "11.8"* ]]; then
     maybe_libnccl_dev="libnccl2=2.15.5-1+cuda11.8 libnccl-dev=2.15.5-1+cuda11.8 --allow-downgrades --allow-change-held-packages"
+  elif [[ "$UBUNTU_VERSION" == "20.04"* && "$CUDA_VERSION" == "12.4"* ]]; then
+    maybe_libnccl_dev="libnccl2=2.25.1-1+cuda12.4 libnccl-dev=2.25.1-1+cuda12.4 --allow-downgrades --allow-change-held-packages"
   else
     maybe_libnccl_dev=""
   fi
 
@@ -2,7 +2,7 @@
 
 set -ex
 
-NCCL_VERSION=v2.21.5-1
+NCCL_VERSION=v2.25.1-1
 CUDNN_VERSION=9.5.1.17
 
 function install_cusparselt_040 {
@@ -40,6 +40,7 @@ function install_cusparselt_063 {
 
 function install_118 {
     CUDNN_VERSION=9.1.0.70
+    NCCL_VERSION=v2.21.5-1
     echo "Installing CUDA 11.8 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.4.0"
     rm -rf /usr/local/cuda-11.8 /usr/local/cuda
     # install CUDA 11.8.0 in the same container
@@ -239,7 +240,7 @@ function prune_126 {
 }
 
 function install_128 {
-  CUDNN_VERSION=9.7.0.66
+  CUDNN_VERSION=9.7.1.26
   echo "Installing CUDA 12.8.0 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.3"
   rm -rf /usr/local/cuda-12.8 /usr/local/cuda
   # install CUDA 12.8.0 in the same container
@@ -288,4 +289,4 @@ do
         ;;
     esac
     shift
-done
+done
@@ -161,7 +161,7 @@ function prune_126 {
 }
 
 function install_128 {
-  CUDNN_VERSION=9.7.0.66
+  CUDNN_VERSION=9.7.1.26
   echo "Installing CUDA 12.8.0 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.3"
   rm -rf /usr/local/cuda-12.8 /usr/local/cuda
   # install CUDA 12.8.0 in the same container
 
@@ -4,7 +4,9 @@ if [[ -n "${CUDNN_VERSION}" ]]; then
     # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
     mkdir tmp_cudnn
     pushd tmp_cudnn
-    if [[ ${CUDA_VERSION:0:4} == "12.6" ]]; then
+    if [[ ${CUDA_VERSION:0:4} == "12.8" ]]; then
+        CUDNN_NAME="cudnn-linux-x86_64-9.7.1.26_cuda12-archive"
+    elif [[ ${CUDA_VERSION:0:4} == "12.6" ]]; then
         CUDNN_NAME="cudnn-linux-x86_64-9.5.1.17_cuda12-archive"
     elif [[ ${CUDA_VERSION:0:2} == "12" ]]; then
         CUDNN_NAME="cudnn-linux-x86_64-9.1.0.70_cuda12-archive"
 
@@ -39,7 +39,7 @@ pip_install ml_dtypes
 # Cache the transformers model to be used later by ONNX tests. We need to run the transformers
 # package to download the model. By default, the model is cached at ~/.cache/huggingface/hub/
 IMPORT_SCRIPT_FILENAME="/tmp/onnx_import_script.py"
-as_jenkins echo 'import transformers; transformers.AutoModel.from_pretrained("sshleifer/tiny-gpt2"); transformers.AutoTokenizer.from_pretrained("sshleifer/tiny-gpt2"); transformers.AutoModelForSpeechSeq2Seq.from_pretrained("openai/whisper-large-v3");' > "${IMPORT_SCRIPT_FILENAME}"
+as_jenkins echo 'import transformers; transformers.GPTJForCausalLM.from_pretrained("hf-internal-testing/tiny-random-gptj");' > "${IMPORT_SCRIPT_FILENAME}"
 
 # Need a PyTorch version for transformers to work
 pip_install --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu
 
@@ -174,7 +174,7 @@ def test_cuda_gds_errors_captured() -> None:
     try:
         print("Testing test_cuda_gds_errors_captured")
         with NamedTemporaryFile() as f:
-            torch.cuda.gds._GdsFile(f.name, os.O_CREAT | os.O_RDWR)
+            torch.cuda.gds.GdsFile(f.name, os.O_CREAT | os.O_RDWR)
     except RuntimeError as e:
         expected_error = "cuFileHandleRegister failed"
         if re.search(expected_error, f"{e}"):
 
@@ -420,6 +420,7 @@ test_inductor_cpp_wrapper_shard() {
   python test/run_test.py \
     --include inductor/test_torchinductor inductor/test_max_autotune inductor/test_cpu_repro \
     --verbose
+  python test/run_test.py --inductor --include test_torch -k 'take' --verbose
 
   # Run inductor benchmark tests with cpp wrapper.
   # Skip benchmark tests if it's in rerun-disabled-mode.
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-41e7ffa8b7ff09206aa5b9b5c1bbd82b9e0ff277`
	`1`	`+8148603e3f3a618acef447a73bdeec9b749a95fb`
Original file line number	Diff line number	Diff line change
`@@ -161,7 +161,7 @@ function prune_126 {`
`161`	`161`	`}`
`162`	`162`
`163`	`163`	`function install_128 {`
`164`		`- CUDNN_VERSION=9.7.0.66`
	`164`	`+ CUDNN_VERSION=9.7.1.26`
`165`	`165`	`echo "Installing CUDA 12.8.0 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.3"`
`166`	`166`	`rm -rf /usr/local/cuda-12.8 /usr/local/cuda`
`167`	`167`	`# install CUDA 12.8.0 in the same container`