Skip to content

Commit e02bcaa

Browse files
committed
Update
[ghstack-poisoned]
2 parents f6f2076 + fa268ea commit e02bcaa

File tree

366 files changed

+8722
-5696
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

366 files changed

+8722
-5696
lines changed
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
41e7ffa8b7ff09206aa5b9b5c1bbd82b9e0ff277
1+
8148603e3f3a618acef447a73bdeec9b749a95fb
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
v2.21.5-1
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
v2.25.1-1

.ci/docker/common/install_base.sh

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,12 @@ install_ubuntu() {
3232

3333
# HACK: UCC testing relies on libnccl library from NVIDIA repo, and version 2.16 crashes
3434
# See https://github.com/pytorch/pytorch/pull/105260#issuecomment-1673399729
35+
# TODO: Eliminate this hack, we should not relay on apt-get installation
36+
# See https://github.com/pytorch/pytorch/issues/144768
3537
if [[ "$UBUNTU_VERSION" == "20.04"* && "$CUDA_VERSION" == "11.8"* ]]; then
3638
maybe_libnccl_dev="libnccl2=2.15.5-1+cuda11.8 libnccl-dev=2.15.5-1+cuda11.8 --allow-downgrades --allow-change-held-packages"
39+
elif [[ "$UBUNTU_VERSION" == "20.04"* && "$CUDA_VERSION" == "12.4"* ]]; then
40+
maybe_libnccl_dev="libnccl2=2.25.1-1+cuda12.4 libnccl-dev=2.25.1-1+cuda12.4 --allow-downgrades --allow-change-held-packages"
3741
else
3842
maybe_libnccl_dev=""
3943
fi

.ci/docker/common/install_cuda.sh

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
set -ex
44

5-
NCCL_VERSION=v2.21.5-1
5+
NCCL_VERSION=v2.25.1-1
66
CUDNN_VERSION=9.5.1.17
77

88
function install_cusparselt_040 {
@@ -40,6 +40,7 @@ function install_cusparselt_063 {
4040

4141
function install_118 {
4242
CUDNN_VERSION=9.1.0.70
43+
NCCL_VERSION=v2.21.5-1
4344
echo "Installing CUDA 11.8 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.4.0"
4445
rm -rf /usr/local/cuda-11.8 /usr/local/cuda
4546
# install CUDA 11.8.0 in the same container
@@ -239,7 +240,7 @@ function prune_126 {
239240
}
240241

241242
function install_128 {
242-
CUDNN_VERSION=9.7.0.66
243+
CUDNN_VERSION=9.7.1.26
243244
echo "Installing CUDA 12.8.0 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.3"
244245
rm -rf /usr/local/cuda-12.8 /usr/local/cuda
245246
# install CUDA 12.8.0 in the same container
@@ -288,4 +289,4 @@ do
288289
;;
289290
esac
290291
shift
291-
done
292+
done

.ci/docker/common/install_cuda_aarch64.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,7 @@ function prune_126 {
161161
}
162162

163163
function install_128 {
164-
CUDNN_VERSION=9.7.0.66
164+
CUDNN_VERSION=9.7.1.26
165165
echo "Installing CUDA 12.8.0 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.3"
166166
rm -rf /usr/local/cuda-12.8 /usr/local/cuda
167167
# install CUDA 12.8.0 in the same container

.ci/docker/common/install_cudnn.sh

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,9 @@ if [[ -n "${CUDNN_VERSION}" ]]; then
44
# cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
55
mkdir tmp_cudnn
66
pushd tmp_cudnn
7-
if [[ ${CUDA_VERSION:0:4} == "12.6" ]]; then
7+
if [[ ${CUDA_VERSION:0:4} == "12.8" ]]; then
8+
CUDNN_NAME="cudnn-linux-x86_64-9.7.1.26_cuda12-archive"
9+
elif [[ ${CUDA_VERSION:0:4} == "12.6" ]]; then
810
CUDNN_NAME="cudnn-linux-x86_64-9.5.1.17_cuda12-archive"
911
elif [[ ${CUDA_VERSION:0:2} == "12" ]]; then
1012
CUDNN_NAME="cudnn-linux-x86_64-9.1.0.70_cuda12-archive"

.ci/docker/common/install_onnx.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ pip_install ml_dtypes
3939
# Cache the transformers model to be used later by ONNX tests. We need to run the transformers
4040
# package to download the model. By default, the model is cached at ~/.cache/huggingface/hub/
4141
IMPORT_SCRIPT_FILENAME="/tmp/onnx_import_script.py"
42-
as_jenkins echo 'import transformers; transformers.AutoModel.from_pretrained("sshleifer/tiny-gpt2"); transformers.AutoTokenizer.from_pretrained("sshleifer/tiny-gpt2"); transformers.AutoModelForSpeechSeq2Seq.from_pretrained("openai/whisper-large-v3");' > "${IMPORT_SCRIPT_FILENAME}"
42+
as_jenkins echo 'import transformers; transformers.GPTJForCausalLM.from_pretrained("hf-internal-testing/tiny-random-gptj");' > "${IMPORT_SCRIPT_FILENAME}"
4343

4444
# Need a PyTorch version for transformers to work
4545
pip_install --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu

.ci/pytorch/smoke_test/smoke_test.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,7 @@ def test_cuda_gds_errors_captured() -> None:
174174
try:
175175
print("Testing test_cuda_gds_errors_captured")
176176
with NamedTemporaryFile() as f:
177-
torch.cuda.gds._GdsFile(f.name, os.O_CREAT | os.O_RDWR)
177+
torch.cuda.gds.GdsFile(f.name, os.O_CREAT | os.O_RDWR)
178178
except RuntimeError as e:
179179
expected_error = "cuFileHandleRegister failed"
180180
if re.search(expected_error, f"{e}"):

.ci/pytorch/test.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -420,6 +420,7 @@ test_inductor_cpp_wrapper_shard() {
420420
python test/run_test.py \
421421
--include inductor/test_torchinductor inductor/test_max_autotune inductor/test_cpu_repro \
422422
--verbose
423+
python test/run_test.py --inductor --include test_torch -k 'take' --verbose
423424

424425
# Run inductor benchmark tests with cpp wrapper.
425426
# Skip benchmark tests if it's in rerun-disabled-mode.

0 commit comments

Comments
 (0)