Skip to content

Commit 890cb53

Browse files
committed
Update
[ghstack-poisoned]
2 parents e7f4762 + e7adc37 commit 890cb53

File tree

337 files changed

+8241
-3395
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

337 files changed

+8241
-3395
lines changed

.ci/docker/build.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -510,7 +510,7 @@ docker build \
510510
--build-arg "NINJA_VERSION=${NINJA_VERSION:-}" \
511511
--build-arg "KATEX=${KATEX:-}" \
512512
--build-arg "ROCM_VERSION=${ROCM_VERSION:-}" \
513-
--build-arg "PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH:-gfx906;gfx90a}" \
513+
--build-arg "PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH:-gfx90a}" \
514514
--build-arg "IMAGE_NAME=${IMAGE_NAME}" \
515515
--build-arg "UCX_COMMIT=${UCX_COMMIT}" \
516516
--build-arg "UCC_COMMIT=${UCC_COMMIT}" \
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
cf34004b8a67d290a962da166f5aa2fc66751326
1+
35c6c7c6284582b3f41c71c150e11b517acf074a

.ci/docker/common/install_miopen.sh

Lines changed: 12 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ case "$ID" in
1616
ubuntu)
1717
IS_UBUNTU=1
1818
;;
19-
centos)
19+
centos|almalinux)
2020
IS_UBUNTU=0
2121
;;
2222
*)
@@ -43,12 +43,6 @@ else
4343
fi
4444
ROCM_INT=$(($ROCM_VERSION_MAJOR * 10000 + $ROCM_VERSION_MINOR * 100 + $ROCM_VERSION_PATCH))
4545

46-
# Install custom MIOpen + COMgr for ROCm >= 4.0.1
47-
if [[ $ROCM_INT -lt 40001 ]]; then
48-
echo "ROCm version < 4.0.1; will not install custom MIOpen"
49-
exit 0
50-
fi
51-
5246
# Function to retry functions that sometimes timeout or have flaky failures
5347
retry () {
5448
$* || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*)
@@ -66,75 +60,35 @@ else
6660
ROCM_INSTALL_PATH="/opt/rocm-${ROCM_VERSION}"
6761
fi
6862

69-
# MIOPEN_USE_HIP_KERNELS is a Workaround for COMgr issues
7063
MIOPEN_CMAKE_COMMON_FLAGS="
7164
-DMIOPEN_USE_COMGR=ON
7265
-DMIOPEN_BUILD_DRIVER=OFF
7366
"
74-
# Pull MIOpen repo and set DMIOPEN_EMBED_DB based on ROCm version
75-
if [[ $ROCM_INT -ge 60300 ]]; then
76-
echo "ROCm 6.3+ MIOpen does not need any patches, do not build from source"
77-
exit 0
78-
elif [[ $ROCM_INT -ge 60204 ]] && [[ $ROCM_INT -lt 60300 ]]; then
79-
echo "ROCm 6.2.4+ MIOpen does not need any patches, do not build from source"
80-
exit 0
81-
elif [[ $ROCM_INT -ge 60200 ]] && [[ $ROCM_INT -lt 60204 ]]; then
67+
if [[ $ROCM_INT -ge 60200 ]] && [[ $ROCM_INT -lt 60204 ]]; then
8268
MIOPEN_BRANCH="release/rocm-rel-6.2-staging"
83-
elif [[ $ROCM_INT -ge 60100 ]] && [[ $ROCM_INT -lt 60200 ]]; then
84-
echo "ROCm 6.1 MIOpen does not need any patches, do not build from source"
85-
exit 0
86-
elif [[ $ROCM_INT -ge 60000 ]] && [[ $ROCM_INT -lt 60100 ]]; then
87-
echo "ROCm 6.0 MIOpen does not need any patches, do not build from source"
88-
exit 0
89-
elif [[ $ROCM_INT -ge 50700 ]] && [[ $ROCM_INT -lt 60000 ]]; then
90-
echo "ROCm 5.7 MIOpen does not need any patches, do not build from source"
91-
exit 0
92-
elif [[ $ROCM_INT -ge 50600 ]] && [[ $ROCM_INT -lt 50700 ]]; then
93-
MIOPEN_BRANCH="release/rocm-rel-5.6-staging"
94-
elif [[ $ROCM_INT -ge 50500 ]] && [[ $ROCM_INT -lt 50600 ]]; then
95-
MIOPEN_BRANCH="release/rocm-rel-5.5-gfx11"
96-
elif [[ $ROCM_INT -ge 50400 ]] && [[ $ROCM_INT -lt 50500 ]]; then
97-
MIOPEN_CMAKE_DB_FLAGS="-DMIOPEN_EMBED_DB=gfx900_56;gfx906_60;gfx90878;gfx90a6e;gfx1030_36 -DMIOPEN_USE_MLIR=Off"
98-
MIOPEN_BRANCH="release/rocm-rel-5.4-staging"
99-
elif [[ $ROCM_INT -ge 50300 ]] && [[ $ROCM_INT -lt 50400 ]]; then
100-
MIOPEN_CMAKE_DB_FLAGS="-DMIOPEN_EMBED_DB=gfx900_56;gfx906_60;gfx90878;gfx90a6e;gfx1030_36 -DMIOPEN_USE_MLIR=Off"
101-
MIOPEN_BRANCH="release/rocm-rel-5.3-staging"
102-
elif [[ $ROCM_INT -ge 50200 ]] && [[ $ROCM_INT -lt 50300 ]]; then
103-
MIOPEN_CMAKE_DB_FLAGS="-DMIOPEN_EMBED_DB=gfx900_56;gfx906_60;gfx90878;gfx90a6e;gfx1030_36 -DMIOPEN_USE_MLIR=Off"
104-
MIOPEN_BRANCH="release/rocm-rel-5.2-staging"
105-
elif [[ $ROCM_INT -ge 50100 ]] && [[ $ROCM_INT -lt 50200 ]]; then
106-
MIOPEN_CMAKE_DB_FLAGS="-DMIOPEN_EMBED_DB=gfx900_56;gfx906_60;gfx90878;gfx90a6e;gfx1030_36"
107-
MIOPEN_BRANCH="release/rocm-rel-5.1-staging"
108-
elif [[ $ROCM_INT -ge 50000 ]] && [[ $ROCM_INT -lt 50100 ]]; then
109-
MIOPEN_CMAKE_DB_FLAGS="-DMIOPEN_EMBED_DB=gfx900_56;gfx906_60;gfx90878;gfx90a6e;gfx1030_36"
110-
MIOPEN_BRANCH="release/rocm-rel-5.0-staging"
11169
else
112-
echo "Unhandled ROCM_VERSION ${ROCM_VERSION}"
113-
exit 1
70+
echo "ROCm ${ROCM_VERSION} does not need any patches, do not build from source"
71+
exit 0
11472
fi
11573

11674

11775
if [[ ${IS_UBUNTU} == 1 ]]; then
11876
apt-get remove -y miopen-hip
11977
else
120-
yum remove -y miopen-hip
78+
# Workaround since almalinux manylinux image already has this and cget doesn't like that
79+
rm -rf /usr/local/lib/pkgconfig/sqlite3.pc
80+
81+
# Versioned package name needs regex match
82+
# Use --noautoremove to prevent other rocm packages from being uninstalled
83+
yum remove -y miopen-hip* --noautoremove
12184
fi
12285

12386
git clone https://github.com/ROCm/MIOpen -b ${MIOPEN_BRANCH}
12487
pushd MIOpen
12588
# remove .git to save disk space since CI runner was running out
12689
rm -rf .git
12790
# Don't build CK to save docker build time
128-
if [[ $ROCM_INT -ge 60200 ]]; then
129-
sed -i '/composable_kernel/d' requirements.txt
130-
fi
131-
# Don't build MLIR to save docker build time
132-
# since we are disabling MLIR backend for MIOpen anyway
133-
if [[ $ROCM_INT -ge 50400 ]] && [[ $ROCM_INT -lt 50500 ]]; then
134-
sed -i '/rocMLIR/d' requirements.txt
135-
elif [[ $ROCM_INT -ge 50200 ]] && [[ $ROCM_INT -lt 50400 ]]; then
136-
sed -i '/llvm-project-mlir/d' requirements.txt
137-
fi
91+
sed -i '/composable_kernel/d' requirements.txt
13892
## MIOpen minimum requirements
13993
cmake -P install_deps.cmake --minimum
14094

@@ -156,7 +110,7 @@ cd build
156110
PKG_CONFIG_PATH=/usr/local/lib/pkgconfig CXX=${ROCM_INSTALL_PATH}/llvm/bin/clang++ cmake .. \
157111
${MIOPEN_CMAKE_COMMON_FLAGS} \
158112
${MIOPEN_CMAKE_DB_FLAGS} \
159-
-DCMAKE_PREFIX_PATH="${ROCM_INSTALL_PATH}/hip;${ROCM_INSTALL_PATH}"
113+
-DCMAKE_PREFIX_PATH="${ROCM_INSTALL_PATH}"
160114
make MIOpen -j $(nproc)
161115

162116
# Build MIOpen package

.ci/docker/common/install_onnx.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ pip_install coloredlogs packaging
3232

3333
pip_install onnxruntime==1.18.1
3434
pip_install onnx==1.16.2
35-
pip_install onnxscript==0.1.0.dev20241009 --no-deps
35+
pip_install onnxscript==0.1.0.dev20241124 --no-deps
3636
# required by onnxscript
3737
pip_install ml_dtypes
3838

.ci/docker/common/install_rocm_drm.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ case "$ID" in
1212
apt-get install -y libpciaccess-dev pkg-config
1313
apt-get clean
1414
;;
15-
centos)
15+
centos|almalinux)
1616
yum install -y libpciaccess-devel pkgconfig
1717
;;
1818
*)

.ci/docker/common/install_rocm_magma.sh

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,18 @@
33

44
set -ex
55

6+
# Magma build scripts need `python`
7+
ln -sf /usr/bin/python3 /usr/bin/python
8+
9+
ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
10+
case "$ID" in
11+
almalinux)
12+
yum install -y gcc-gfortran
13+
;;
14+
*)
15+
echo "No preinstalls to build magma..."
16+
;;
17+
esac
618

719
MKLROOT=${MKLROOT:-/opt/conda/envs/py_$ANACONDA_PYTHON_VERSION}
820

.ci/docker/manywheel/Dockerfile_2_28

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
# syntax = docker/dockerfile:experimental
2-
ARG ROCM_VERSION=3.7
32
ARG BASE_CUDA_VERSION=11.8
43
ARG GPU_IMAGE=amd64/almalinux:8
54
FROM quay.io/pypa/manylinux_2_28_x86_64 as base
@@ -130,10 +129,10 @@ RUN for cpython_version in "cp312-cp312" "cp313-cp313" "cp313-cp313t"; do \
130129
done;
131130

132131

133-
# cmake-3.18.4 from pip
132+
# cmake-3.18.4 from pip; force in case cmake3 already exists
134133
RUN yum install -y python3-pip && \
135134
python3 -mpip install cmake==3.18.4 && \
136-
ln -s /usr/local/bin/cmake /usr/bin/cmake3
135+
ln -sf /usr/local/bin/cmake /usr/bin/cmake3
137136

138137
FROM cpu_final as cuda_final
139138
RUN rm -rf /usr/local/cuda-${BASE_CUDA_VERSION}
@@ -142,17 +141,22 @@ COPY --from=magma /usr/local/cuda-${BASE_CUDA_VERSION} /usr/local/cuda-${BAS
142141
RUN ln -sf /usr/local/cuda-${BASE_CUDA_VERSION} /usr/local/cuda
143142
ENV PATH=/usr/local/cuda/bin:$PATH
144143

145-
146-
FROM common as rocm_final
147-
ARG ROCM_VERSION=3.7
148-
# Install ROCm
149-
ADD ./common/install_rocm.sh install_rocm.sh
150-
RUN bash ./install_rocm.sh ${ROCM_VERSION} && rm install_rocm.sh
151-
# cmake is already installed inside the rocm base image, but both 2 and 3 exist
152-
# cmake3 is needed for the later MIOpen custom build, so that step is last.
153-
RUN yum install -y cmake3 && \
154-
rm -f /usr/bin/cmake && \
155-
ln -s /usr/bin/cmake3 /usr/bin/cmake
144+
FROM cpu_final as rocm_final
145+
ARG ROCM_VERSION=6.0
146+
ARG PYTORCH_ROCM_ARCH
147+
ENV PYTORCH_ROCM_ARCH ${PYTORCH_ROCM_ARCH}
148+
# Somewhere in ROCm stack, we still use non-existing /opt/rocm/hip path,
149+
# below workaround helps avoid error
150+
ENV ROCM_PATH /opt/rocm
151+
# cmake-3.28.4 from pip to get enable_language(HIP)
152+
# and avoid 3.21.0 cmake+ninja issues with ninja inserting "-Wl,--no-as-needed" in LINK_FLAGS for static linker
153+
RUN python3 -m pip install --upgrade pip && \
154+
python3 -mpip install cmake==3.28.4
155+
ADD ./common/install_rocm_drm.sh install_rocm_drm.sh
156+
RUN bash ./install_rocm_drm.sh && rm install_rocm_drm.sh
157+
ENV MKLROOT /opt/intel
158+
ADD ./common/install_rocm_magma.sh install_rocm_magma.sh
159+
RUN bash ./install_rocm_magma.sh && rm install_rocm_magma.sh
156160
ADD ./common/install_miopen.sh install_miopen.sh
157161
RUN bash ./install_miopen.sh ${ROCM_VERSION} && rm install_miopen.sh
158162

.ci/docker/manywheel/build.sh

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -87,21 +87,22 @@ case ${GPU_ARCH_TYPE} in
8787
MANY_LINUX_VERSION="aarch64"
8888
DOCKERFILE_SUFFIX="_cuda_aarch64"
8989
;;
90-
rocm)
90+
rocm|rocm-manylinux_2_28)
9191
TARGET=rocm_final
9292
DOCKER_TAG=rocm${GPU_ARCH_VERSION}
9393
GPU_IMAGE=rocm/dev-centos-7:${GPU_ARCH_VERSION}-complete
94-
PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx1030;gfx1100"
94+
if [ ${GPU_ARCH_TYPE} == "rocm-manylinux_2_28" ]; then
95+
MANY_LINUX_VERSION="2_28"
96+
GPU_IMAGE=rocm/dev-almalinux-8:${GPU_ARCH_VERSION}-complete
97+
fi
98+
PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100"
9599
ROCM_REGEX="([0-9]+)\.([0-9]+)[\.]?([0-9]*)"
96100
if [[ $GPU_ARCH_VERSION =~ $ROCM_REGEX ]]; then
97101
ROCM_VERSION_INT=$((${BASH_REMATCH[1]}*10000 + ${BASH_REMATCH[2]}*100 + ${BASH_REMATCH[3]:-0}))
98102
else
99103
echo "ERROR: rocm regex failed"
100104
exit 1
101105
fi
102-
if [[ $ROCM_VERSION_INT -ge 60000 ]]; then
103-
PYTORCH_ROCM_ARCH+=";gfx942"
104-
fi
105106
DOCKER_GPU_BUILD_ARG="--build-arg ROCM_VERSION=${GPU_ARCH_VERSION} --build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH} --build-arg DEVTOOLSET_VERSION=9"
106107
;;
107108
xpu)

.ci/docker/triton_version.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
3.1.0
1+
3.2.0

.ci/manywheel/build_cuda.sh

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -59,14 +59,22 @@ cuda_version_nodot=$(echo $CUDA_VERSION | tr -d '.')
5959

6060
TORCH_CUDA_ARCH_LIST="5.0;6.0;7.0;7.5;8.0;8.6"
6161
case ${CUDA_VERSION} in
62-
12.4 | 12.6)
62+
12.6)
6363
if [[ "$GPU_ARCH_TYPE" = "cuda-aarch64" ]]; then
6464
TORCH_CUDA_ARCH_LIST="9.0"
6565
else
6666
TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST};9.0+PTX"
6767
fi
6868
EXTRA_CAFFE2_CMAKE_FLAGS+=("-DATEN_NO_TEST=ON")
6969
;;
70+
12.4)
71+
if [[ "$GPU_ARCH_TYPE" = "cuda-aarch64" ]]; then
72+
TORCH_CUDA_ARCH_LIST="9.0"
73+
else
74+
TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST};9.0"
75+
fi
76+
EXTRA_CAFFE2_CMAKE_FLAGS+=("-DATEN_NO_TEST=ON")
77+
;;
7078
12.1)
7179
TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST};9.0"
7280
EXTRA_CAFFE2_CMAKE_FLAGS+=("-DATEN_NO_TEST=ON")
@@ -75,10 +83,6 @@ case ${CUDA_VERSION} in
7583
TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST};3.7;9.0"
7684
EXTRA_CAFFE2_CMAKE_FLAGS+=("-DATEN_NO_TEST=ON")
7785
;;
78-
11.[67])
79-
TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST};3.7"
80-
EXTRA_CAFFE2_CMAKE_FLAGS+=("-DATEN_NO_TEST=ON")
81-
;;
8286
*)
8387
echo "unknown cuda version $CUDA_VERSION"
8488
exit 1

0 commit comments

Comments
 (0)