Skip to content
This repository was archived by the owner on Nov 17, 2023. It is now read-only.

Commit 9b8a5e6

Browse files
mseth10szha
andauthored
[v1.x] Fix nightly CD for python docker image releases (#19774)
* [CD] switch CD_RELEASE_JOB_NAME from global env var to job argument (#17775) * Fix nightly CD for python docker image releases (#19772) * install wget * test cd docker in ci * install docker * install python3-dev and gcc * remove docker testing from ci * remove python3-dev * ecr target * skip build test * adding back python3-dev for make * remove dynamic and pypi stages for testing * install build-essential * install zlib * update python version * update ld library path * install openssl * update test packages for python3.7 * remove call to deleted safe_docker_run.py * hardcode region for public ecr repo * use deadsnakes to install python * revert dependency change * refactor ecr login * update ecr repo jenkins global var * cleanup * update docker authentication * add ecr repo * add back pypi and tests * remove unused libmxnet pipeline * update cu112 base docker * update base docker images to ub18 * differentiate tag prefix for v1.x Co-authored-by: Sheng Zha <[email protected]>
1 parent a9eec24 commit 9b8a5e6

File tree

11 files changed

+54
-131
lines changed

11 files changed

+54
-131
lines changed

cd/Jenkinsfile_cd_pipeline

Lines changed: 15 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -47,41 +47,29 @@ pipeline {
4747
cd_utils = load('cd/Jenkinsfile_utils.groovy')
4848

4949
// Update release job state in Jenkins
50-
cd_utils.update_release_job_state()
50+
cd_utils.update_release_job_state(params.CD_RELEASE_JOB_NAME)
5151
}
5252
}
5353
}
5454

5555
stage("MXNet Release") {
5656
steps {
5757
script {
58-
cd_utils.error_checked_parallel([
59-
60-
"Static libmxnet based release": {
61-
stage("Build") {
62-
cd_utils.trigger_release_job("Build static libmxnet", "mxnet_lib/static", params.MXNET_VARIANTS)
63-
}
64-
stage("Releases") {
65-
cd_utils.error_checked_parallel([
66-
"PyPI Release": {
67-
echo "Building PyPI Release"
68-
cd_utils.trigger_release_job("Release PyPI Packages", "python/pypi", params.MXNET_VARIANTS)
69-
},
70-
"Python Docker Release": {
71-
echo "Building Python Docker Release"
72-
cd_utils.trigger_release_job("Release Python Docker Images", "python/docker", params.MXNET_VARIANTS)
73-
}
74-
])
75-
}
76-
},
77-
78-
"Dynamic libmxnet based release": {
79-
stage("Build") {
80-
cd_utils.trigger_release_job("Build dynamic libmxnet", "mxnet_lib/dynamic", params.MXNET_VARIANTS)
58+
stage("Build libmxnet") {
59+
cd_utils.trigger_release_job(params.CD_RELEASE_JOB_NAME, "Build libmxnet", "mxnet_lib", params.MXNET_VARIANTS)
60+
}
61+
stage("Releases") {
62+
cd_utils.error_checked_parallel([
63+
"PyPI Release": {
64+
echo "Building PyPI Release"
65+
cd_utils.trigger_release_job(params.CD_RELEASE_JOB_NAME, "Release PyPI Packages", "python/pypi", params.MXNET_VARIANTS)
66+
},
67+
"Python Docker Release": {
68+
echo "Building Python Docker Release"
69+
cd_utils.trigger_release_job(params.CD_RELEASE_JOB_NAME, "Release Python Docker Images", "python/docker", params.MXNET_VARIANTS)
8170
}
82-
}
83-
84-
])
71+
])
72+
}
8573
}
8674
}
8775
}

cd/Jenkinsfile_release_job

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ pipeline {
4242
// Using string instead of choice parameter to keep the changes to the parameters minimal to avoid
4343
// any disruption caused by different COMMIT_ID values chaning the job parameter configuration on
4444
// Jenkins.
45-
string(defaultValue: "mxnet_lib/static", description: "Pipeline to build", name: "RELEASE_JOB_TYPE")
45+
string(defaultValue: "mxnet_lib", description: "Pipeline to build", name: "RELEASE_JOB_TYPE")
4646
string(defaultValue: "cpu,native,cu100,cu101,cu102,cu110,cu112", description: "Comma separated list of variants", name: "MXNET_VARIANTS")
4747
booleanParam(defaultValue: false, description: 'Whether this is a release build or not', name: "RELEASE_BUILD")
4848
}
@@ -74,7 +74,7 @@ pipeline {
7474
|Release Build: ${params.RELEASE_BUILD}
7575
|Commit Id: ${env.GIT_COMMIT}
7676
|Branch: ${env.GIT_BRANCH}
77-
|Variants: ${env.MXNET_VARIANTS}""".stripMargin()
77+
|Variants: ${params.MXNET_VARIANTS}""".stripMargin()
7878
}
7979
}
8080
}
@@ -90,8 +90,7 @@ pipeline {
9090

9191
// Add new job types here
9292
def valid_job_types = [
93-
"mxnet_lib/static",
94-
"mxnet_lib/dynamic",
93+
"mxnet_lib",
9594
"python/pypi",
9695
"python/docker"
9796
]

cd/Jenkinsfile_utils.groovy

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -19,16 +19,16 @@
1919

2020
// Triggers a downstream jenkins job responsible for building, testing
2121
// and publishing all the variants for a particular 'job_type'.
22-
// The 'job_type' should be the name of the directory that contains the
23-
// 'Jenkins_pipeline.groovy' file and has the pipeline definition for the
22+
// The 'job_type' should be the name of the directory that contains the
23+
// 'Jenkins_pipeline.groovy' file and has the pipeline definition for the
2424
// artifact (docker image, binary, pypi or maven package, etc.) that should
2525
// be published.
2626

2727
STATE_UPDATE="State Update"
2828

29-
def trigger_release_job(job_name, job_type, mxnet_variants) {
29+
def trigger_release_job(cd_release_job, job_name, job_type, mxnet_variants) {
3030
def run = build(
31-
job: env.CD_RELEASE_JOB_NAME,
31+
job: cd_release_job,
3232
parameters: [
3333
string(name: "RELEASE_JOB_NAME", value: "${job_name}"),
3434
string(name: "RELEASE_JOB_TYPE", value: "${job_type}"),
@@ -49,7 +49,7 @@ def trigger_release_job(job_name, job_type, mxnet_variants) {
4949
// continue with the pipeline and try to post as many releases as possible
5050
// but mark it as unstable
5151
if (result == "UNSTABLE" || result == "ABORTED") {
52-
currentBuild.result = "UNSTABLE"
52+
currentBuild.result = "UNSTABLE"
5353
}
5454

5555
// Throw an exception on failure, because this would mean the whole
@@ -65,12 +65,12 @@ def trigger_release_job(job_name, job_type, mxnet_variants) {
6565
// the configuration of the release job in jenkins
6666
// to the configuration of release job as defined in the
6767
// Jenkinsfile _release_job for env.GIT_COMMIT revision
68-
def update_release_job_state() {
68+
def update_release_job_state(cd_release_job) {
6969
build(
70-
job: env.CD_RELEASE_JOB_NAME,
70+
job: cd_release_job,
7171
parameters: [
7272
string(name: "RELEASE_JOB_TYPE", value: STATE_UPDATE),
73-
73+
7474
// Should be set to the current git commit
7575
string(name: "COMMIT_ID", value: "${env.GIT_COMMIT}")
7676
])
@@ -103,7 +103,7 @@ def wrap_variant_pipeline_fn(variant_pipeline, total_num_pipelines) {
103103
// The outcome of the execution of each parallel step will affect
104104
// the result (SUCCESS, FAILURE, ABORTED, UNSTABLE) of the overall job.
105105
// If all steps fail or are aborted, the job will be set to failed.
106-
// If some steps fail or are aborted, the job will be set to unstable.
106+
// If some steps fail or are aborted, the job will be set to unstable.
107107
def error_checked_parallel(variant_pipelines) {
108108
pipelines = variant_pipelines.inject([:]) { mp, key, value ->
109109
mp << ["${key}": wrap_variant_pipeline_fn(value, variant_pipelines.size())]
@@ -179,7 +179,7 @@ def restore_dynamic_libmxnet(variant) {
179179
// NOTE: Be mindful of the expected time that a step should take. If it will take a long time,
180180
// and it can be done in a CPU node, do it in a CPU node. We should avoid using GPU instances unless
181181
// we *have* to.
182-
// However, if it is only packaging libmxnet and that doesn't take long. Then, the pipeline can
182+
// However, if it is only packaging libmxnet and that doesn't take long. Then, the pipeline can
183183
// just run on a single node. As is done bellow.
184184
// For examples of multi-node CD pipelines, see the the binary_release/static and binary_release/dynamic
185185
// pipeline.

cd/README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ The [release job](Jenkinsfile_release_job) takes five parameters:
6060
* **RELEASE\_JOB\_TYPE**: Defines the release pipeline you want to execute.
6161
* **COMMIT_ID**: The commit id to build
6262

63-
The release job executes, in parallel, the release pipeline for each of the variants (**MXNET_VARIANTS**) for the job type (**RELEASE\_JOB\_TYPE**). The job type the path to a directory (relative to the `cd` directory) that includes a `Jenkins_pipeline.groovy` file ([e.g.](mxnet_lib/static/Jenkins_pipeline.groovy)).
63+
The release job executes, in parallel, the release pipeline for each of the variants (**MXNET_VARIANTS**) for the job type (**RELEASE\_JOB\_TYPE**). The job type the path to a directory (relative to the `cd` directory) that includes a `Jenkins_pipeline.groovy` file ([e.g.](mxnet_lib/Jenkins_pipeline.groovy)).
6464

6565
NOTE: The **COMMIT_ID** is a little tricky and we must be very careful with it. It is necessary to ensure that the same commit is built through out the pipeline, but at the same time, it has the potential to change the current state of the release job configuration - specifically the parameter configuration. Any changes to this configuration will require a "dry-run" of the release job to ensure Jenkins has the current (master) version. This is acceptable as there will be few changes to the parameter configuration for the job, if any at all. But, it's something to keep in mind.
6666

@@ -192,4 +192,4 @@ def test(mxnet_variant) {
192192

193193
Examples:
194194

195-
Both the [statically linked libmxnet](mxnet_lib/static/Jenkins_pipeline.groovy) and [dynamically linked libmxnet](mxnet_lib/dynamic/Jenkins_pipeline.groovy) pipelines have long running compilation and testing stages that **do not** require specialized/expensive hardware (e.g. GPUs). Therefore, as much as possible, it is important to run each stage in on its own node, and design the pipeline to spend the least amount of time possible on expensive hardware. E.g. for GPU builds, only run GPU tests on GPU instances, all other stages can be executed on CPU nodes.
195+
The [libmxnet](mxnet_lib/Jenkins_pipeline.groovy) pipeline has long running compilation and testing stages that **do not** require specialized/expensive hardware (e.g. GPUs). Therefore, as much as possible, it is important to run each stage in on its own node, and design the pipeline to spend the least amount of time possible on expensive hardware. E.g. for GPU builds, only run GPU tests on GPU instances, all other stages can be executed on CPU nodes.
File renamed without changes.

cd/mxnet_lib/dynamic/Jenkins_pipeline.groovy

Lines changed: 0 additions & 58 deletions
This file was deleted.

cd/python/docker/Dockerfile

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -23,15 +23,14 @@
2323
ARG BASE_IMAGE
2424
FROM ${BASE_IMAGE}
2525

26-
RUN apt-get update || true
27-
RUN apt-get install -y software-properties-common
28-
RUN add-apt-repository -y ppa:deadsnakes/ppa
29-
RUN apt-get update || true
30-
RUN apt-get install -y python3.7-dev python3.7-distutils virtualenv wget
31-
RUN ln -sf /usr/bin/python3.7 /usr/local/bin/python3
32-
33-
RUN wget -nv https://bootstrap.pypa.io/get-pip.py
34-
RUN python3 get-pip.py
26+
RUN apt-get update && \
27+
apt-get install -y software-properties-common && \
28+
add-apt-repository -y ppa:deadsnakes/ppa && \
29+
apt-get update && \
30+
apt-get install -y python3.7-dev python3.7-distutils virtualenv wget && \
31+
ln -sf /usr/bin/python3.7 /usr/local/bin/python3 && \
32+
wget -nv https://bootstrap.pypa.io/get-pip.py && \
33+
python3 get-pip.py
3534

3635
ARG MXNET_COMMIT_ID
3736
ENV MXNET_COMMIT_ID=${MXNET_COMMIT_ID}

cd/python/docker/Dockerfile.test

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,6 @@
2323
ARG BASE_IMAGE
2424
FROM ${BASE_IMAGE}
2525

26-
# Install test dependencies
27-
RUN pip install nose
28-
2926
ARG USER_ID=1001
3027
ARG GROUP_ID=1001
3128

cd/python/docker/python_images.sh

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323

2424
set -xe
2525

26-
usage="Usage: python_images.sh <build|test|publish> MXNET-VARIANT"
26+
usage="Usage: python_images.sh <build|test|push> MXNET-VARIANT"
2727

2828
command=${1:?$usage}
2929
mxnet_variant=${2:?$usage}
@@ -39,8 +39,8 @@ image_name="${repository}:${main_tag}"
3939

4040
resources_path='cd/python/docker'
4141

42-
if [ ! -z "${RELEASE_DOCKERHUB_REPOSITORY}" ]; then
43-
image_name="${RELEASE_DOCKERHUB_REPOSITORY}/${image_name}"
42+
if [ ! -z "${RELEASE_PUBLIC_ECR_REPOSITORY}" ]; then
43+
image_name="${RELEASE_PUBLIC_ECR_REPOSITORY}/${image_name}"
4444
fi
4545

4646
build() {
@@ -57,26 +57,24 @@ test() {
5757

5858
# Ensure the correct context root is passed in when building - Dockerfile.test expects ci directory
5959
docker build -t "${test_image_name}" --build-arg USER_ID=`id -u` --build-arg GROUP_ID=`id -g` --build-arg BASE_IMAGE="${image_name}" -f ${resources_path}/Dockerfile.test ./ci
60-
./ci/safe_docker_run.py ${runtime_param} --cap-add "SYS_PTRACE" -u `id -u`:`id -g` -v `pwd`:/work/mxnet "${test_image_name}" ${resources_path}/test_python_image.sh "${mxnet_variant}"
6160
}
6261

6362
push() {
64-
if [ -z "${RELEASE_DOCKERHUB_REPOSITORY}" ]; then
65-
echo "Cannot publish image without RELEASE_DOCKERHUB_REPOSITORY environment variable being set."
63+
if [ -z "${RELEASE_PUBLIC_ECR_REPOSITORY}" ]; then
64+
echo "Cannot publish image without RELEASE_PUBLIC_ECR_REPOSITORY environment variable being set."
6665
exit 1
6766
fi
6867

69-
# The secret name env var is set in the Jenkins configuration
70-
# Manage Jenkins -> Configure System
71-
./${ci_utils}/docker_login.py --secret-name "${RELEASE_DOCKERHUB_SECRET_NAME}"
68+
# Retrieve an authentication token and authenticate Docker client to registry
69+
aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/w6z5f7h2
7270

7371
# Push image
7472
docker push "${image_name}"
7573

7674
# Iterate over remaining tags, if any
7775
for ((i=1;i<${#docker_tags[@]};i++)); do
7876
local docker_tag="${docker_tags[${i}]}"
79-
local latest_image_name="${RELEASE_DOCKERHUB_REPOSITORY}/${repository}:${docker_tag}_py3"
77+
local latest_image_name="${RELEASE_PUBLIC_ECR_REPOSITORY}/${repository}:${docker_tag}_py3"
8078

8179
docker tag "${image_name}" "${latest_image_name}"
8280
docker push "${latest_image_name}"

cd/utils/docker_tag.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121

2222
mxnet_variant=${1:?"Please specify the mxnet variant as the first parameter"}
2323
is_release=${RELEASE_BUILD:-false}
24-
version=${VERSION:-nightly}
24+
version=${VERSION:-nightly_v1.x}
2525

2626
# The docker tags will be in the form <version>_<hardware>(_mkl)
2727
# Eg. nightly_cpu, 1.4.0_cpu_mkl, nightly_gpu_cu80_mkl, etc.

0 commit comments

Comments
 (0)