Skip to content

Commit ebfa33a

Browse files
Merge branch 'pytorch:main' into torch_distributed
2 parents 7772ced + 228963a commit ebfa33a

File tree

1,686 files changed

+33827
-20429
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,686 files changed

+33827
-20429
lines changed
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
cd1c833b079adb324871dcbbe75b43d42ffc0ade
1+
16b633b4daa7f3d3442be62a3589bd60b2f7fdc7

.ci/docker/common/install_cuda.sh

Lines changed: 70 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,39 @@ function install_124 {
137137
ldconfig
138138
}
139139

140+
function install_126 {
141+
echo "Installing CUDA 12.6.2 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.2"
142+
rm -rf /usr/local/cuda-12.6 /usr/local/cuda
143+
# install CUDA 12.6.2 in the same container
144+
wget -q https://developer.download.nvidia.com/compute/cuda/12.6.2/local_installers/cuda_12.6.2_560.35.03_linux.run
145+
chmod +x cuda_12.6.2_560.35.03_linux.run
146+
./cuda_12.6.2_560.35.03_linux.run --toolkit --silent
147+
rm -f cuda_12.6.2_560.35.03_linux.run
148+
rm -f /usr/local/cuda && ln -s /usr/local/cuda-12.6 /usr/local/cuda
149+
150+
# cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
151+
mkdir tmp_cudnn && cd tmp_cudnn
152+
wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz -O cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz
153+
tar xf cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz
154+
cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive/include/* /usr/local/cuda/include/
155+
cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive/lib/* /usr/local/cuda/lib64/
156+
cd ..
157+
rm -rf tmp_cudnn
158+
159+
# NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses
160+
# Follow build: https://github.com/NVIDIA/nccl/tree/master?tab=readme-ov-file#build
161+
git clone -b $NCCL_VERSION --depth 1 https://github.com/NVIDIA/nccl.git
162+
cd nccl && make -j src.build
163+
cp -a build/include/* /usr/local/cuda/include/
164+
cp -a build/lib/* /usr/local/cuda/lib64/
165+
cd ..
166+
rm -rf nccl
167+
168+
install_cusparselt_062
169+
170+
ldconfig
171+
}
172+
140173
function prune_118 {
141174
echo "Pruning CUDA 11.8 and cuDNN"
142175
#####################################################################################
@@ -227,12 +260,46 @@ function prune_124 {
227260
$NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublasLt_static.a -o $CUDA_LIB_DIR/libcublasLt_static.a
228261

229262
#####################################################################################
230-
# CUDA 12.1 prune visual tools
263+
# CUDA 12.4 prune visual tools
231264
#####################################################################################
232265
export CUDA_BASE="/usr/local/cuda-12.4/"
233266
rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2024.1.0 $CUDA_BASE/nsight-systems-2023.4.4/
234267
}
235268

269+
function prune_126 {
270+
echo "Pruning CUDA 12.6"
271+
#####################################################################################
272+
# CUDA 12.6 prune static libs
273+
#####################################################################################
274+
export NVPRUNE="/usr/local/cuda-12.6/bin/nvprune"
275+
export CUDA_LIB_DIR="/usr/local/cuda-12.6/lib64"
276+
277+
export GENCODE="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90"
278+
export GENCODE_CUDNN="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90"
279+
280+
if [[ -n "$OVERRIDE_GENCODE" ]]; then
281+
export GENCODE=$OVERRIDE_GENCODE
282+
fi
283+
if [[ -n "$OVERRIDE_GENCODE_CUDNN" ]]; then
284+
export GENCODE_CUDNN=$OVERRIDE_GENCODE_CUDNN
285+
fi
286+
287+
# all CUDA libs except CuDNN and CuBLAS
288+
ls $CUDA_LIB_DIR/ | grep "\.a" | grep -v "culibos" | grep -v "cudart" | grep -v "cudnn" | grep -v "cublas" | grep -v "metis" \
289+
| xargs -I {} bash -c \
290+
"echo {} && $NVPRUNE $GENCODE $CUDA_LIB_DIR/{} -o $CUDA_LIB_DIR/{}"
291+
292+
# prune CuDNN and CuBLAS
293+
$NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublas_static.a -o $CUDA_LIB_DIR/libcublas_static.a
294+
$NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublasLt_static.a -o $CUDA_LIB_DIR/libcublasLt_static.a
295+
296+
#####################################################################################
297+
# CUDA 12.6 prune visual tools
298+
#####################################################################################
299+
export CUDA_BASE="/usr/local/cuda-12.6/"
300+
rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2024.3.2 $CUDA_BASE/nsight-systems-2024.5.1/
301+
}
302+
236303
# idiomatic parameter and option handling in sh
237304
while test $# -gt 0
238305
do
@@ -243,6 +310,8 @@ do
243310
;;
244311
12.4) install_124; prune_124
245312
;;
313+
12.6) install_126; prune_126
314+
;;
246315
*) echo "bad argument $1"; exit 1
247316
;;
248317
esac

.ci/docker/common/install_xpu.sh

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -41,13 +41,16 @@ function install_ubuntu() {
4141
libegl-mesa0 libegl1-mesa libegl1-mesa-dev libgbm1 libgl1-mesa-dev libgl1-mesa-dri \
4242
libglapi-mesa libgles2-mesa-dev libglx-mesa0 libigdgmm12 libxatracker2 mesa-va-drivers \
4343
mesa-vdpau-drivers mesa-vulkan-drivers va-driver-all vainfo hwinfo clinfo
44+
if [[ "${XPU_DRIVER_TYPE,,}" == "rolling" ]]; then
45+
apt-get install -y intel-ocloc
46+
fi
4447
# Development Packages
4548
apt-get install -y libigc-dev intel-igc-cm libigdfcl-dev libigfxcmrt-dev level-zero-dev
4649
# Install Intel Support Packages
4750
if [ -n "$XPU_VERSION" ]; then
48-
apt-get install -y intel-for-pytorch-gpu-dev-${XPU_VERSION} intel-pti-dev
51+
apt-get install -y intel-for-pytorch-gpu-dev-${XPU_VERSION} intel-pti-dev-0.9
4952
else
50-
apt-get install -y intel-for-pytorch-gpu-dev intel-pti-dev
53+
apt-get install -y intel-for-pytorch-gpu-dev-0.5 intel-pti-dev-0.9
5154
fi
5255

5356
# Cleanup
@@ -97,7 +100,7 @@ EOF
97100
intel-igc-opencl-devel level-zero-devel intel-gsc-devel libmetee-devel \
98101
level-zero-devel
99102
# Install Intel Support Packages
100-
yum install -y intel-for-pytorch-gpu-dev intel-pti-dev
103+
yum install -y intel-for-pytorch-gpu-dev-0.5 intel-pti-dev-0.9
101104

102105
# Cleanup
103106
dnf clean all
@@ -131,7 +134,7 @@ function install_sles() {
131134
zypper install -y libigdfcl-devel intel-igc-cm libigfxcmrt-devel level-zero-devel
132135

133136
# Install Intel Support Packages
134-
zypper install -y intel-for-pytorch-gpu-dev intel-pti-dev
137+
zypper install -y intel-for-pytorch-gpu-dev-0.5 intel-pti-dev-0.9
135138

136139
}
137140

.ci/docker/conda/Dockerfile

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,10 @@ FROM cuda as cuda12.4
7070
RUN bash ./install_cuda.sh 12.4
7171
ENV DESIRED_CUDA=12.4
7272

73+
FROM cuda as cuda12.6
74+
RUN bash ./install_cuda.sh 12.6
75+
ENV DESIRED_CUDA=12.6
76+
7377
# Install MNIST test data
7478
FROM base as mnist
7579
ADD ./common/install_mnist.sh install_mnist.sh
@@ -79,6 +83,7 @@ FROM base as all_cuda
7983
COPY --from=cuda11.8 /usr/local/cuda-11.8 /usr/local/cuda-11.8
8084
COPY --from=cuda12.1 /usr/local/cuda-12.1 /usr/local/cuda-12.1
8185
COPY --from=cuda12.4 /usr/local/cuda-12.4 /usr/local/cuda-12.4
86+
COPY --from=cuda12.6 /usr/local/cuda-12.6 /usr/local/cuda-12.6
8287

8388
# Final step
8489
FROM ${BASE_TARGET} as final

.ci/docker/libtorch/Dockerfile

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,11 @@ RUN bash ./install_cuda.sh 12.4
6666
RUN bash ./install_magma.sh 12.4
6767
RUN ln -sf /usr/local/cuda-12.4 /usr/local/cuda
6868

69+
FROM cuda as cuda12.6
70+
RUN bash ./install_cuda.sh 12.6
71+
RUN bash ./install_magma.sh 12.6
72+
RUN ln -sf /usr/local/cuda-12.6 /usr/local/cuda
73+
6974
FROM cpu as rocm
7075
ARG PYTORCH_ROCM_ARCH
7176
ENV PYTORCH_ROCM_ARCH ${PYTORCH_ROCM_ARCH}

.ci/docker/manywheel/build_scripts/ssl-check.py

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,21 @@
11
# cf. https://github.com/pypa/manylinux/issues/53
22

3+
import sys
4+
from urllib.request import urlopen
5+
6+
37
GOOD_SSL = "https://google.com"
48
BAD_SSL = "https://self-signed.badssl.com"
59

6-
import sys
7-
810

911
print("Testing SSL certificate checking for Python:", sys.version)
1012

1113
if sys.version_info[:2] < (2, 7) or sys.version_info[:2] < (3, 4):
1214
print("This version never checks SSL certs; skipping tests")
1315
sys.exit(0)
1416

15-
if sys.version_info[0] >= 3:
16-
from urllib.request import urlopen
17-
18-
EXC = OSError
19-
else:
20-
from urllib import urlopen
2117

22-
EXC = IOError
18+
EXC = OSError
2319

2420
print(f"Connecting to {GOOD_SSL} should work")
2521
urlopen(GOOD_SSL)

.ci/docker/requirements-ci.txt

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
#Pinned versions: 1.6
66
#test that import:
77

8-
boto3==1.19.12
8+
boto3==1.35.42
99
#Description: AWS SDK for python
1010
#Pinned versions: 1.19.12, 1.16.34
1111
#test that import:
@@ -257,7 +257,7 @@ tb-nightly==2.13.0a20230426
257257
#test that import:
258258

259259
# needed by torchgen utils
260-
typing-extensions
260+
typing-extensions>=4.10.0
261261
#Description: type hints for python
262262
#Pinned versions:
263263
#test that import:
@@ -331,7 +331,7 @@ sympy==1.13.1 ; python_version >= "3.9"
331331
#Pinned versions:
332332
#test that import:
333333

334-
onnx==1.16.1
334+
onnx==1.17.0
335335
#Description: Required by mypy and test_public_bindings.py when checking torch.onnx._internal
336336
#Pinned versions:
337337
#test that import:
@@ -363,3 +363,8 @@ setuptools
363363

364364
ninja==1.11.1 ; platform_machine == "aarch64"
365365
scons==4.5.2 ; platform_machine == "aarch64"
366+
367+
pulp==2.9.0 ; python_version >= "3.8"
368+
#Description: required for testing ilp formulaiton under torch/distributed/_tools
369+
#Pinned versions: 2.9.0
370+
#test that import: test_sac_ilp.py

.ci/libtorch/build.sh

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
#!/usr/bin/env bash
2+
3+
# This is mostly just a shim to manywheel/build.sh
4+
# TODO: Make this a dedicated script to build just libtorch
5+
6+
set -ex
7+
8+
SCRIPTPATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
9+
10+
USE_CUSPARSELT=0 BUILD_PYTHONLESS=1 DESIRED_PYTHON="3.9" ${SCRIPTPATH}/../manywheel/build.sh

.ci/manywheel/LICENSE

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
The MIT License (MIT)
2+
3+
Copyright (c) 2016 manylinux
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a copy
6+
of this software and associated documentation files (the "Software"), to deal
7+
in the Software without restriction, including without limitation the rights
8+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
copies of the Software, and to permit persons to whom the Software is
10+
furnished to do so, subject to the following conditions:
11+
12+
The above copyright notice and this permission notice shall be included in all
13+
copies or substantial portions of the Software.
14+
15+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
SOFTWARE.

.ci/manywheel/build.sh

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
#!/usr/bin/env bash
2+
3+
set -ex
4+
5+
SCRIPTPATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
6+
7+
case "${GPU_ARCH_TYPE:-BLANK}" in
8+
BLANK)
9+
# Legacy behavior for CircleCI
10+
bash "${SCRIPTPATH}/build_cuda.sh"
11+
;;
12+
cuda)
13+
bash "${SCRIPTPATH}/build_cuda.sh"
14+
;;
15+
rocm)
16+
bash "${SCRIPTPATH}/build_rocm.sh"
17+
;;
18+
cpu | cpu-cxx11-abi | cpu-s390x | xpu)
19+
bash "${SCRIPTPATH}/build_cpu.sh"
20+
;;
21+
*)
22+
echo "Un-recognized GPU_ARCH_TYPE '${GPU_ARCH_TYPE}', exiting..."
23+
exit 1
24+
;;
25+
esac

0 commit comments

Comments
 (0)