pytorch
diff --git a/‎.ci/docker/README.md‎
Lines changed: 2 additions & 2 deletions b/‎.ci/docker/README.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎.ci/docker/common/install_cuda.sh‎
Lines changed: 2 additions & 3 deletions b/‎.ci/docker/common/install_cuda.sh‎
Lines changed: 2 additions & 3 deletions
diff --git a/‎.ci/docker/common/install_triton.sh‎
Lines changed: 1 addition & 1 deletion b/‎.ci/docker/common/install_triton.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/pytorch/check_binary.sh‎
Lines changed: 0 additions & 21 deletions b/‎.ci/pytorch/check_binary.sh‎
Lines changed: 0 additions & 21 deletions
diff --git a/‎.ci/pytorch/common_utils.sh‎
Lines changed: 13 additions & 4 deletions b/‎.ci/pytorch/common_utils.sh‎
Lines changed: 13 additions & 4 deletions
diff --git a/‎.ci/pytorch/multigpu-test.sh‎
Lines changed: 1 addition & 0 deletions b/‎.ci/pytorch/multigpu-test.sh‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.ci/pytorch/test.sh‎
Lines changed: 2 additions & 5 deletions b/‎.ci/pytorch/test.sh‎
Lines changed: 2 additions & 5 deletions
diff --git a/‎.github/scripts/generate_binary_build_matrix.py‎
Lines changed: 2 additions & 2 deletions b/‎.github/scripts/generate_binary_build_matrix.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎.github/workflows/_link_check.yml‎
Lines changed: 2 additions & 0 deletions b/‎.github/workflows/_link_check.yml‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎.github/workflows/_linux-test.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/_linux-test.yml‎
Lines changed: 1 addition & 1 deletion
@@ -120,8 +120,8 @@ If your new Docker image needs a library installed from a specific pinned commit
    If you're introducing a new argument to the Docker build, make sure to add it in the Docker build step in `.ci/docker/build.sh`:
    ```bash
    docker build \
-      ....
-      --build-arg "NEW_ARG_1=${NEW_ARG_1}"
+     ....
+     --build-arg "NEW_ARG_1=${NEW_ARG_1}"
    ```
 
 3. **Update Dockerfile logic**:
 
@@ -10,7 +10,7 @@ else
   arch_path='sbsa'
 fi
 
-NVSHMEM_VERSION=3.3.20
+NVSHMEM_VERSION=3.3.24
 
 function install_cuda {
   version=$1
@@ -65,7 +65,7 @@ function install_nvshmem {
   # This pattern is a lie as it is not consistent across versions, for 3.3.9 it was cuda_ver-arch-nvshhem-ver
   filename="libnvshmem-linux-${arch_path}-${nvshmem_version}_cuda${cuda_major_version}-archive"
   suffix=".tar.xz"
-  url="https://developer.download.nvidia.com/compute/redist/nvshmem/${nvshmem_version}/builds/cuda${cuda_major_version}/txz/agnostic/${dl_arch}/${filename}${suffix}"
+  url="https://developer.download.nvidia.com/compute/nvshmem/redist/libnvshmem/linux-${arch_path}/${filename}${suffix}"
 
   # download, unpack, install
   wget -q "${url}"
@@ -148,7 +148,6 @@ function install_128 {
 
 function install_130 {
   CUDNN_VERSION=9.12.0.46
-  NVSHMEM_VERSION=3.3.20
   echo "Installing CUDA 13.0 and cuDNN ${CUDNN_VERSION} and NVSHMEM and NCCL and cuSparseLt-0.7.1"
   # install CUDA 13.0 in the same container
   install_cuda 13.0.0 cuda_13.0.0_580.65.06_linux
 
@@ -57,7 +57,7 @@ if [ ! -f setup.py ]; then
   cd python
 fi
 
-pip_install pybind11==2.13.6
+pip_install pybind11==3.0.1
 
 # TODO: remove patch setup.py once we have a proper fix for https://github.com/triton-lang/triton/issues/4527
 as_jenkins sed -i -e 's/https:\/\/tritonlang.blob.core.windows.net\/llvm-builds/https:\/\/oaitriton.blob.core.windows.net\/public\/llvm-builds/g' setup.py
 
@@ -300,24 +300,3 @@ except RuntimeError as e:
     exit 1
   fi
 fi
-
-###############################################################################
-# Check for C++ ABI compatibility to GCC-11 - GCC 13
-###############################################################################
-if [[ "$(uname)" == 'Linux' &&  "$PACKAGE_TYPE" == 'manywheel' ]]; then
-  pushd /tmp
-  # Per https://gcc.gnu.org/onlinedocs/gcc/C_002b_002b-Dialect-Options.html
-  # gcc-11 is ABI16, gcc-13 is ABI18, gcc-14 is ABI19
-  # gcc 11 - CUDA 11.8, xpu, rocm
-  # gcc 13 - CUDA 12.6, 12.8 and cpu
-  # Please see issue for reference: https://github.com/pytorch/pytorch/issues/152426
-  if [[ "$(uname -m)" == "s390x" ]]; then
-    cxx_abi="19"
-  elif [[ "$DESIRED_CUDA" != 'xpu' && "$DESIRED_CUDA" != 'rocm'* ]]; then
-    cxx_abi="18"
-  else
-    cxx_abi="16"
-  fi
-  python -c "import torch; exit(0 if torch._C._PYBIND11_BUILD_ABI == '_cxxabi10${cxx_abi}' else 1)"
-  popd
-fi
@@ -149,13 +149,22 @@ function get_pinned_commit() {
   cat .github/ci_commit_pins/"${1}".txt
 }
 
+function detect_cuda_arch() {
+  if [[ "${BUILD_ENVIRONMENT}" == *cuda* ]]; then
+    if command -v nvidia-smi; then
+      TORCH_CUDA_ARCH_LIST=$(nvidia-smi --query-gpu=compute_cap --format=csv | tail -n 1)
+    elif [[ "${TEST_CONFIG}" == *nogpu* ]]; then
+      # There won't be nvidia-smi in nogpu tests, so just set TORCH_CUDA_ARCH_LIST to the default
+      # minimum supported value here
+      TORCH_CUDA_ARCH_LIST=8.0
+    fi
+    export TORCH_CUDA_ARCH_LIST
+  fi
+}
+
 function install_torchaudio() {
   local commit
   commit=$(get_pinned_commit audio)
-  if [[ "${BUILD_ENVIRONMENT}" == *cuda* ]] && command -v nvidia-smi; then
-    TORCH_CUDA_ARCH_LIST=$(nvidia-smi --query-gpu=compute_cap --format=csv | tail -n 1)
-    export TORCH_CUDA_ARCH_LIST
-  fi
   pip_build_and_install "git+https://github.com/pytorch/audio.git@${commit}" dist/audio
 }
 
 
@@ -45,6 +45,7 @@ if [[ "${SHARD_NUMBER:-2}" == "2" ]]; then
     # DTensor tests
     time python test/run_test.py --verbose -i distributed/tensor/test_random_ops
     time python test/run_test.py --verbose -i distributed/tensor/test_dtensor_compile
+    time python test/run_test.py --verbose -i distributed/tensor/test_utils.py
 
     # DeviceMesh test
     time python test/run_test.py --verbose -i distributed/test_device_mesh
 
@@ -91,6 +91,7 @@ if [[ "$BUILD_ENVIRONMENT" == *clang9* || "$BUILD_ENVIRONMENT" == *xpu* ]]; then
   export VALGRIND=OFF
 fi
 
+detect_cuda_arch
 
 if [[ "$BUILD_ENVIRONMENT" == *s390x* ]]; then
   # There are additional warnings on s390x, maybe due to newer gcc.
@@ -1630,11 +1631,7 @@ elif [[ "${TEST_CONFIG}" == *xla* ]]; then
   build_xla
   test_xla
 elif [[ "$TEST_CONFIG" == *vllm* ]]; then
-    if [[ "${BUILD_ENVIRONMENT}" == *cuda* ]]; then
-      TORCH_CUDA_ARCH_LIST=$(nvidia-smi --query-gpu=compute_cap --format=csv | tail -n 1)
-      export TORCH_CUDA_ARCH_LIST
-    fi
-    echo "VLLM CI TORCH_CUDA_ARCH_LIST: $TORCH_CUDA_ARCH_LIST"
+    echo "vLLM CI uses TORCH_CUDA_ARCH_LIST: $TORCH_CUDA_ARCH_LIST"
     (cd .ci/lumen_cli && python -m pip install -e .)
     python -m cli.run test external vllm --test-plan "$TEST_CONFIG" --shard-id "$SHARD_NUMBER" --num-shards "$NUM_TEST_SHARDS"
 elif [[ "${TEST_CONFIG}" == *executorch* ]]; then
 
@@ -107,7 +107,7 @@
         "nvidia-cusparse==12.6.2.49; platform_system == 'Linux' and platform_machine == 'x86_64' | "
         "nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' and platform_machine == 'x86_64' | "
         "nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' and platform_machine == 'x86_64' | "
-        "nvidia-nvshmem-cu13==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | "
+        "nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | "
         "nvidia-nvtx==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | "
         "nvidia-nvjitlink==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | "
         "nvidia-cufile==1.15.0.42; platform_system == 'Linux' and platform_machine == 'x86_64'"
@@ -210,7 +210,7 @@ def arch_type(arch_version: str) -> str:
     "cpu": "libtorch-cxx11-builder:cpu",
 }
 
-FULL_PYTHON_VERSIONS = ["3.9", "3.10", "3.11", "3.12", "3.13", "3.13t", "3.14", "3.14t"]
+FULL_PYTHON_VERSIONS = ["3.10", "3.11", "3.12", "3.13", "3.13t", "3.14", "3.14t"]
 
 
 def translate_desired_cuda(gpu_arch_type: str, gpu_arch_version: str) -> str:
 
@@ -13,6 +13,7 @@ jobs:
     if: ${{ github.event_name != 'pull_request' || !contains(github.event.pull_request.labels.*.name, 'skip-url-lint') }}
     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
     with:
+      job-name: lint-urls
       timeout: 120
       runner: ${{ inputs.runner }}linux.2xlarge
       docker-image: ci-image:pytorch-linux-jammy-linter
@@ -38,6 +39,7 @@ jobs:
     if: ${{ github.event_name != 'pull_request' || !contains(github.event.pull_request.labels.*.name, 'skip-xref-lint') }}
     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
     with:
+      job-name: lint-xrefs
       timeout: 60
       runner: ${{ inputs.runner }}linux.2xlarge
       docker-image: ci-image:pytorch-linux-jammy-linter
 
@@ -409,7 +409,7 @@ jobs:
           job_identifier: ${{ github.workflow }}_${{ inputs.build-environment }}
 
       - name: Authenticate with AWS
-        if: ${{ contains(matrix.runner, 'b200') }}
+        if: ${{ always() && contains(matrix.runner, 'b200') }}
         uses: aws-actions/configure-aws-credentials@ececac1a45f3b08a01d2dd070d28d111c5fe6722 # v4.1.0
         with:
           role-to-assume: arn:aws:iam::308535385114:role/gha_workflow_upload-benchmark-results