|
1 | 1 | #!/usr/bin/env bash |
2 | 2 | # Script used only in CD pipeline |
3 | 3 |
|
4 | | -set -eou pipefail |
| 4 | +set -exou pipefail |
5 | 5 |
|
6 | 6 | image="$1" |
7 | 7 | shift |
8 | 8 |
|
9 | 9 | if [ -z "${image}" ]; then |
10 | | - echo "Usage: $0 IMAGE" |
| 10 | + echo "Usage: $0 IMAGENAME:ARCHTAG" |
11 | 11 | exit 1 |
12 | 12 | fi |
13 | 13 |
|
14 | | -DOCKER_IMAGE_NAME="pytorch/${image}" |
| 14 | +# Go from imagename:tag to tag |
| 15 | +DOCKER_TAG_PREFIX=$(echo "${image}" | awk -F':' '{print $2}') |
15 | 16 |
|
| 17 | +CUDA_VERSION="" |
| 18 | +ROCM_VERSION="" |
| 19 | +EXTRA_BUILD_ARGS="" |
| 20 | +if [[ "${DOCKER_TAG_PREFIX}" == cuda* ]]; then |
| 21 | + # extract cuda version from image name and tag. e.g. manylinux2_28-builder:cuda12.8 returns 12.8 |
| 22 | + CUDA_VERSION=$(echo "${DOCKER_TAG_PREFIX}" | awk -F'cuda' '{print $2}') |
| 23 | + EXTRA_BUILD_ARGS="--build-arg CUDA_VERSION=${CUDA_VERSION}" |
| 24 | +elif [[ "${DOCKER_TAG_PREFIX}" == rocm* ]]; then |
| 25 | + # extract rocm version from image name and tag. e.g. manylinux2_28-builder:rocm6.2.4 returns 6.2.4 |
| 26 | + ROCM_VERSION=$(echo "${DOCKER_TAG_PREFIX}" | awk -F'rocm' '{print $2}') |
| 27 | + EXTRA_BUILD_ARGS="--build-arg ROCM_IMAGE=rocm/dev-almalinux-8:${ROCM_VERSION}-complete" |
| 28 | +fi |
16 | 29 |
|
17 | | -export DOCKER_BUILDKIT=1 |
18 | | -TOPDIR=$(git rev-parse --show-toplevel) |
19 | | - |
20 | | -CUDA_VERSION=${CUDA_VERSION:-12.1} |
21 | | - |
22 | | -case ${CUDA_VERSION} in |
| 30 | +case ${DOCKER_TAG_PREFIX} in |
23 | 31 | cpu) |
24 | 32 | BASE_TARGET=base |
25 | | - DOCKER_TAG=cpu |
26 | 33 | ;; |
27 | | - all) |
28 | | - BASE_TARGET=all_cuda |
29 | | - DOCKER_TAG=latest |
| 34 | + cuda*) |
| 35 | + BASE_TARGET=cuda${CUDA_VERSION} |
| 36 | + ;; |
| 37 | + rocm*) |
| 38 | + BASE_TARGET=rocm |
30 | 39 | ;; |
31 | 40 | *) |
32 | | - BASE_TARGET=cuda${CUDA_VERSION} |
33 | | - DOCKER_TAG=cuda${CUDA_VERSION} |
| 41 | + echo "ERROR: Unknown docker tag ${DOCKER_TAG_PREFIX}" |
| 42 | + exit 1 |
34 | 43 | ;; |
35 | 44 | esac |
36 | 45 |
|
| 46 | +# TODO: Remove LimitNOFILE=1048576 patch once https://github.com/pytorch/test-infra/issues/5712 |
| 47 | +# is resolved. This patch is required in order to fix timing out of Docker build on Amazon Linux 2023. |
| 48 | +sudo sed -i s/LimitNOFILE=infinity/LimitNOFILE=1048576/ /usr/lib/systemd/system/docker.service |
| 49 | +sudo systemctl daemon-reload |
| 50 | +sudo systemctl restart docker |
37 | 51 |
|
38 | | -( |
39 | | - set -x |
40 | | - # TODO: Remove LimitNOFILE=1048576 patch once https://github.com/pytorch/test-infra/issues/5712 |
41 | | - # is resolved. This patch is required in order to fix timing out of Docker build on Amazon Linux 2023. |
42 | | - sudo sed -i s/LimitNOFILE=infinity/LimitNOFILE=1048576/ /usr/lib/systemd/system/docker.service |
43 | | - sudo systemctl daemon-reload |
44 | | - sudo systemctl restart docker |
| 52 | +export DOCKER_BUILDKIT=1 |
| 53 | +TOPDIR=$(git rev-parse --show-toplevel) |
| 54 | +tmp_tag=$(basename "$(mktemp -u)" | tr '[:upper:]' '[:lower:]') |
45 | 55 |
|
46 | | - docker build \ |
47 | | - --target final \ |
48 | | - --progress plain \ |
49 | | - --build-arg "BASE_TARGET=${BASE_TARGET}" \ |
50 | | - --build-arg "CUDA_VERSION=${CUDA_VERSION}" \ |
51 | | - --build-arg "DEVTOOLSET_VERSION=11" \ |
52 | | - -t ${DOCKER_IMAGE_NAME} \ |
53 | | - $@ \ |
54 | | - -f "${TOPDIR}/.ci/docker/almalinux/Dockerfile" \ |
55 | | - ${TOPDIR}/.ci/docker/ |
56 | | -) |
| 56 | +docker build \ |
| 57 | + --target final \ |
| 58 | + --progress plain \ |
| 59 | + --build-arg "BASE_TARGET=${BASE_TARGET}" \ |
| 60 | + --build-arg "DEVTOOLSET_VERSION=11" \ |
| 61 | + ${EXTRA_BUILD_ARGS} \ |
| 62 | + -t ${tmp_tag} \ |
| 63 | + $@ \ |
| 64 | + -f "${TOPDIR}/.ci/docker/almalinux/Dockerfile" \ |
| 65 | + ${TOPDIR}/.ci/docker/ |
57 | 66 |
|
58 | | -if [[ "${DOCKER_TAG}" =~ ^cuda* ]]; then |
| 67 | +if [ -n "${CUDA_VERSION}" ]; then |
59 | 68 | # Test that we're using the right CUDA compiler |
60 | | - ( |
61 | | - set -x |
62 | | - docker run --rm "${DOCKER_IMAGE_NAME}" nvcc --version | grep "cuda_${CUDA_VERSION}" |
63 | | - ) |
64 | | -fi |
65 | | - |
66 | | -GITHUB_REF=${GITHUB_REF:-$(git symbolic-ref -q HEAD || git describe --tags --exact-match)} |
67 | | -GIT_BRANCH_NAME=${GITHUB_REF##*/} |
68 | | -GIT_COMMIT_SHA=${GITHUB_SHA:-$(git rev-parse HEAD)} |
69 | | -DOCKER_IMAGE_BRANCH_TAG=${DOCKER_IMAGE_NAME}-${GIT_BRANCH_NAME} |
70 | | -DOCKER_IMAGE_SHA_TAG=${DOCKER_IMAGE_NAME}-${GIT_COMMIT_SHA} |
71 | | -if [[ "${WITH_PUSH:-}" == true ]]; then |
72 | | - ( |
73 | | - set -x |
74 | | - docker push "${DOCKER_IMAGE_NAME}" |
75 | | - if [[ -n ${GITHUB_REF} ]]; then |
76 | | - docker tag ${DOCKER_IMAGE_NAME} ${DOCKER_IMAGE_BRANCH_TAG} |
77 | | - docker tag ${DOCKER_IMAGE_NAME} ${DOCKER_IMAGE_SHA_TAG} |
78 | | - docker push "${DOCKER_IMAGE_BRANCH_TAG}" |
79 | | - docker push "${DOCKER_IMAGE_SHA_TAG}" |
80 | | - fi |
81 | | - ) |
| 69 | + docker run --rm "${tmp_tag}" nvcc --version | grep "cuda_${CUDA_VERSION}" |
82 | 70 | fi |
0 commit comments