55 branches :
66 - ' **'
77 - ' !branch-*.*'
8+ schedule :
9+ # master
10+ - cron : ' 0 4 * * *'
11+ # branch-3.2
12+ - cron : ' 0 7 * * *'
813
914jobs :
15+ configure-jobs :
16+ name : Configure jobs
17+ runs-on : ubuntu-20.04
18+ outputs :
19+ branch : ${{ steps.set-outputs.outputs.branch }}
20+ type : ${{ steps.set-outputs.outputs.type }}
21+ envs : ${{ steps.set-outputs.outputs.envs }}
22+ steps :
23+ - name : Configure branch and additional environment variables
24+ id : set-outputs
25+ run : |
26+ if [ "${{ github.event.schedule }}" = "0 4 * * *" ]; then
27+ echo '::set-output name=branch::master'
28+ echo '::set-output name=type::scheduled'
29+ echo '::set-output name=envs::{"SCALA_PROFILE": "scala2.13"}'
30+ elif [ "${{ github.event.schedule }}" = "0 7 * * *" ]; then
31+ echo '::set-output name=branch::branch-3.2'
32+ echo '::set-output name=type::scheduled'
33+ echo '::set-output name=envs::{"SCALA_PROFILE": "scala2.13"}'
34+ else
35+ echo '::set-output name=branch::master' # Default branch to run on. CHANGE here when a branch is cut out.
36+ echo '::set-output name=type::regular'
37+ echo '::set-output name=envs::{}'
38+ fi
39+
1040 # Build: build Spark and run the tests for specified modules.
1141 build :
12- name : " Build modules: ${{ matrix.modules }} ${{ matrix.comment }} (JDK ${{ matrix.java }}, ${{ matrix.hadoop }}, ${{ matrix.hive }})"
42+ name : " Build modules (${{ format('{0}, {1} job', needs.configure-jobs.outputs.branch, needs.configure-jobs.outputs.type) }}): ${{ matrix.modules }} ${{ matrix.comment }} (JDK ${{ matrix.java }}, ${{ matrix.hadoop }}, ${{ matrix.hive }})"
43+ needs : configure-jobs
44+ # Do not run as scheduled jobs in forked repos
45+ if : github.repository == 'apache/spark' || needs.configure-jobs.outputs.type == 'regular'
1346 # Ubuntu 20.04 is the latest LTS. The next LTS is 22.04.
1447 runs-on : ubuntu-20.04
1548 strategy :
@@ -81,16 +114,14 @@ jobs:
81114 with :
82115 fetch-depth : 0
83116 repository : apache/spark
84- ref : master
117+ ref : ${{ needs.configure-jobs.outputs.branch }}
85118 - name : Sync the current branch with the latest in Apache Spark
86119 if : github.repository != 'apache/spark'
87- id : sync-branch
88120 run : |
89- apache_spark_ref=` git rev-parse HEAD`
121+ echo "APACHE_SPARK_REF=$( git rev-parse HEAD)" >> $GITHUB_ENV
90122 git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/}
91123 git -c user.name='Apache Spark Test Account' -c user.email='[email protected] ' merge --no-commit --progress --squash FETCH_HEAD 92124 git -c user.name='Apache Spark Test Account' -c user.email='[email protected] ' commit -m "Merged commit" 93- echo "::set-output name=APACHE_SPARK_REF::$apache_spark_ref"
94125 # Cache local repositories. Note that GitHub Actions cache has a 2G limit.
95126 - name : Cache Scala, SBT and Maven
96127 uses : actions/cache@v2
@@ -130,11 +161,12 @@ jobs:
130161 python3.8 -m pip list
131162 # Run the tests.
132163 - name : Run tests
164+ env : ${{ fromJSON(needs.configure-jobs.outputs.envs) }}
133165 run : |
134- export APACHE_SPARK_REF=${{ steps.sync-branch.outputs.APACHE_SPARK_REF }}
135- # Hive and SQL tests become flaky when running in parallel as it's too intensive.
136- if [[ "$MODULES_TO_TEST" == "hive" ]] || [[ "$MODULES_TO_TEST" == "sql" ]]; then export SERIAL_SBT_TESTS=1; fi
137- ./dev/run-tests --parallelism 2 --modules "$MODULES_TO_TEST" --included-tags "$INCLUDED_TAGS" --excluded-tags "$EXCLUDED_TAGS"
166+ # Hive "other tests" test needs larger metaspace size based on experiment.
167+ if [[ "$MODULES_TO_TEST" == "hive" ]] && [[ "$EXCLUDED_TAGS" == "org.apache.spark.tags.SlowHiveTest" ]]; then export METASPACE_SIZE=2g; fi
168+ export SERIAL_SBT_TESTS=1
169+ ./dev/run-tests --parallelism 1 --modules "$MODULES_TO_TEST" --included-tags "$INCLUDED_TAGS" --excluded-tags "$EXCLUDED_TAGS"
138170 - name : Upload test results to report
139171 if : always()
140172 uses : actions/upload-artifact@v2
@@ -149,6 +181,8 @@ jobs:
149181 path : " **/target/unit-tests.log"
150182
151183 pyspark :
184+ needs : configure-jobs
185+ if : needs.configure-jobs.outputs.type == 'regular'
152186 name : " Build modules: ${{ matrix.modules }}"
153187 runs-on : ubuntu-20.04
154188 container :
@@ -171,6 +205,9 @@ jobs:
171205 HIVE_PROFILE : hive2.3
172206 GITHUB_PREV_SHA : ${{ github.event.before }}
173207 SPARK_LOCAL_IP : localhost
208+ SKIP_UNIDOC : true
209+ SKIP_MIMA : true
210+ METASPACE_SIZE : 128m
174211 steps :
175212 - name : Checkout Spark repository
176213 uses : actions/checkout@v2
@@ -181,13 +218,11 @@ jobs:
181218 ref : master
182219 - name : Sync the current branch with the latest in Apache Spark
183220 if : github.repository != 'apache/spark'
184- id : sync-branch
185221 run : |
186- apache_spark_ref=` git rev-parse HEAD`
222+ echo "APACHE_SPARK_REF=$( git rev-parse HEAD)" >> $GITHUB_ENV
187223 git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/}
188224 git -c user.name='Apache Spark Test Account' -c user.email='[email protected] ' merge --no-commit --progress --squash FETCH_HEAD 189225 git -c user.name='Apache Spark Test Account' -c user.email='[email protected] ' commit -m "Merged commit" 190- echo "::set-output name=APACHE_SPARK_REF::$apache_spark_ref"
191226 # Cache local repositories. Note that GitHub Actions cache has a 2G limit.
192227 - name : Cache Scala, SBT and Maven
193228 uses : actions/cache@v2
@@ -217,9 +252,8 @@ jobs:
217252 # Run the tests.
218253 - name : Run tests
219254 run : |
220- export APACHE_SPARK_REF=${{ steps.sync-branch.outputs.APACHE_SPARK_REF }}
221255 export PATH=$PATH:$HOME/miniconda/bin
222- ./dev/run-tests --parallelism 2 --modules "$MODULES_TO_TEST"
256+ ./dev/run-tests --parallelism 1 --modules "$MODULES_TO_TEST"
223257 - name : Upload test results to report
224258 if : always()
225259 uses : actions/upload-artifact@v2
@@ -234,6 +268,8 @@ jobs:
234268 path : " **/target/unit-tests.log"
235269
236270 sparkr :
271+ needs : configure-jobs
272+ if : needs.configure-jobs.outputs.type == 'regular'
237273 name : " Build modules: sparkr"
238274 runs-on : ubuntu-20.04
239275 container :
@@ -243,6 +279,7 @@ jobs:
243279 HIVE_PROFILE : hive2.3
244280 GITHUB_PREV_SHA : ${{ github.event.before }}
245281 SPARK_LOCAL_IP : localhost
282+ SKIP_MIMA : true
246283 steps :
247284 - name : Checkout Spark repository
248285 uses : actions/checkout@v2
@@ -253,13 +290,11 @@ jobs:
253290 ref : master
254291 - name : Sync the current branch with the latest in Apache Spark
255292 if : github.repository != 'apache/spark'
256- id : sync-branch
257293 run : |
258- apache_spark_ref=` git rev-parse HEAD`
294+ echo "APACHE_SPARK_REF=$( git rev-parse HEAD)" >> $GITHUB_ENV
259295 git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/}
260296 git -c user.name='Apache Spark Test Account' -c user.email='[email protected] ' merge --no-commit --progress --squash FETCH_HEAD 261297 git -c user.name='Apache Spark Test Account' -c user.email='[email protected] ' commit -m "Merged commit" 262- echo "::set-output name=APACHE_SPARK_REF::$apache_spark_ref"
263298 # Cache local repositories. Note that GitHub Actions cache has a 2G limit.
264299 - name : Cache Scala, SBT and Maven
265300 uses : actions/cache@v2
@@ -285,8 +320,7 @@ jobs:
285320 # R issues at docker environment
286321 export TZ=UTC
287322 export _R_CHECK_SYSTEM_CLOCK_=FALSE
288- export APACHE_SPARK_REF=${{ steps.sync-branch.outputs.APACHE_SPARK_REF }}
289- ./dev/run-tests --parallelism 2 --modules sparkr
323+ ./dev/run-tests --parallelism 1 --modules sparkr
290324 - name : Upload test results to report
291325 if : always()
292326 uses : actions/upload-artifact@v2
@@ -296,6 +330,8 @@ jobs:
296330
297331 # Static analysis, and documentation build
298332 lint :
333+ needs : configure-jobs
334+ if : needs.configure-jobs.outputs.type == 'regular'
299335 name : Linters, licenses, dependencies and documentation generation
300336 runs-on : ubuntu-20.04
301337 env :
@@ -313,7 +349,6 @@ jobs:
313349 ref : master
314350 - name : Sync the current branch with the latest in Apache Spark
315351 if : github.repository != 'apache/spark'
316- id : sync-branch
317352 run : |
318353 git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/}
319354 git -c user.name='Apache Spark Test Account' -c user.email='[email protected] ' merge --no-commit --progress --squash FETCH_HEAD @@ -382,7 +417,7 @@ jobs:
382417 - name : Java linter
383418 run : ./dev/lint-java
384419 - name : Python linter
385- run : ./dev/lint-python
420+ run : PYTHON_EXECUTABLE=python3.9 ./dev/lint-python
386421 - name : R linter
387422 run : ./dev/lint-r
388423 - name : JS linter
@@ -397,6 +432,8 @@ jobs:
397432 bundle exec jekyll build
398433
399434 java-11-17 :
435+ needs : configure-jobs
436+ if : needs.configure-jobs.outputs.type == 'regular'
400437 name : Java ${{ matrix.java }} build with Maven
401438 strategy :
402439 fail-fast : false
@@ -414,7 +451,6 @@ jobs:
414451 ref : master
415452 - name : Sync the current branch with the latest in Apache Spark
416453 if : github.repository != 'apache/spark'
417- id : sync-branch
418454 run : |
419455 git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/}
420456 git -c user.name='Apache Spark Test Account' -c user.email='[email protected] ' merge --no-commit --progress --squash FETCH_HEAD @@ -451,6 +487,8 @@ jobs:
451487 rm -rf ~/.m2/repository/org/apache/spark
452488
453489 scala-213 :
490+ needs : configure-jobs
491+ if : needs.configure-jobs.outputs.type == 'regular'
454492 name : Scala 2.13 build with SBT
455493 runs-on : ubuntu-20.04
456494 steps :
@@ -462,7 +500,6 @@ jobs:
462500 ref : master
463501 - name : Sync the current branch with the latest in Apache Spark
464502 if : github.repository != 'apache/spark'
465- id : sync-branch
466503 run : |
467504 git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/}
468505 git -c user.name='Apache Spark Test Account' -c user.email='[email protected] ' merge --no-commit --progress --squash FETCH_HEAD @@ -495,6 +532,8 @@ jobs:
495532 ./build/sbt -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Pkinesis-asl -Pdocker-integration-tests -Pkubernetes-integration-tests -Pspark-ganglia-lgpl -Pscala-2.13 compile test:compile
496533
497534 hadoop-2 :
535+ needs : configure-jobs
536+ if : needs.configure-jobs.outputs.type == 'regular'
498537 name : Hadoop 2 build with SBT
499538 runs-on : ubuntu-20.04
500539 steps :
@@ -506,7 +545,6 @@ jobs:
506545 ref : master
507546 - name : Sync the current branch with the latest in Apache Spark
508547 if : github.repository != 'apache/spark'
509- id : sync-branch
510548 run : |
511549 git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/}
512550 git -c user.name='Apache Spark Test Account' -c user.email='[email protected] ' merge --no-commit --progress --squash FETCH_HEAD @@ -538,6 +576,8 @@ jobs:
538576 ./build/sbt -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Pkinesis-asl -Phadoop-2.7 compile test:compile
539577
540578 tpcds-1g :
579+ needs : configure-jobs
580+ if : needs.configure-jobs.outputs.type == 'regular'
541581 name : Run TPC-DS queries with SF=1
542582 runs-on : ubuntu-20.04
543583 env :
@@ -551,7 +591,6 @@ jobs:
551591 ref : master
552592 - name : Sync the current branch with the latest in Apache Spark
553593 if : github.repository != 'apache/spark'
554- id : sync-branch
555594 run : |
556595 git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/}
557596 git -c user.name='Apache Spark Test Account' -c user.email='[email protected] ' merge --no-commit --progress --squash FETCH_HEAD @@ -614,6 +653,8 @@ jobs:
614653 path : " **/target/unit-tests.log"
615654
616655 docker-integration-tests :
656+ needs : configure-jobs
657+ if : needs.configure-jobs.outputs.type == 'regular'
617658 name : Run docker integration tests
618659 runs-on : ubuntu-20.04
619660 env :
@@ -622,6 +663,7 @@ jobs:
622663 GITHUB_PREV_SHA : ${{ github.event.before }}
623664 SPARK_LOCAL_IP : localhost
624665 ORACLE_DOCKER_IMAGE_NAME : oracle/database:18.4.0-xe
666+ SKIP_MIMA : true
625667 steps :
626668 - name : Checkout Spark repository
627669 uses : actions/checkout@v2
@@ -631,13 +673,11 @@ jobs:
631673 ref : master
632674 - name : Sync the current branch with the latest in Apache Spark
633675 if : github.repository != 'apache/spark'
634- id : sync-branch
635676 run : |
636- apache_spark_ref=` git rev-parse HEAD`
677+ echo "APACHE_SPARK_REF=$( git rev-parse HEAD)" >> $GITHUB_ENV
637678 git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/}
638679 git -c user.name='Apache Spark Test Account' -c user.email='[email protected] ' merge --no-commit --progress --squash FETCH_HEAD 639680 git -c user.name='Apache Spark Test Account' -c user.email='[email protected] ' commit -m "Merged commit" 640- echo "::set-output name=APACHE_SPARK_REF::$apache_spark_ref"
641681 - name : Cache Scala, SBT and Maven
642682 uses : actions/cache@v2
643683 with :
@@ -680,8 +720,7 @@ jobs:
680720 ./buildContainerImage.sh -v 18.4.0 -x
681721 - name : Run tests
682722 run : |
683- export APACHE_SPARK_REF=${{ steps.sync-branch.outputs.APACHE_SPARK_REF }}
684- ./dev/run-tests --parallelism 2 --modules docker-integration-tests --included-tags org.apache.spark.tags.DockerTest
723+ ./dev/run-tests --parallelism 1 --modules docker-integration-tests --included-tags org.apache.spark.tags.DockerTest
685724 - name : Upload test results to report
686725 if : always()
687726 uses : actions/upload-artifact@v2
0 commit comments