Skip to content

Commit d2e6910

Browse files
committed
Merge remote-tracking branch 'upstream/master' into alter_add_cols
2 parents 8dcc44d + df98d5b commit d2e6910

File tree

325 files changed

+11905
-3744
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

325 files changed

+11905
-3744
lines changed

.asf.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,3 +27,7 @@ github:
2727
- jdbc
2828
- sql
2929
- spark
30+
enabled_merge_buttons:
31+
merge: false
32+
squash: true
33+
rebase: true

.github/workflows/build_and_test.yml

Lines changed: 69 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,44 @@ on:
55
branches:
66
- '**'
77
- '!branch-*.*'
8+
schedule:
9+
# master
10+
- cron: '0 4 * * *'
11+
# branch-3.2
12+
- cron: '0 7 * * *'
813

914
jobs:
15+
configure-jobs:
16+
name: Configure jobs
17+
runs-on: ubuntu-20.04
18+
outputs:
19+
branch: ${{ steps.set-outputs.outputs.branch }}
20+
type: ${{ steps.set-outputs.outputs.type }}
21+
envs: ${{ steps.set-outputs.outputs.envs }}
22+
steps:
23+
- name: Configure branch and additional environment variables
24+
id: set-outputs
25+
run: |
26+
if [ "${{ github.event.schedule }}" = "0 4 * * *" ]; then
27+
echo '::set-output name=branch::master'
28+
echo '::set-output name=type::scheduled'
29+
echo '::set-output name=envs::{"SCALA_PROFILE": "scala2.13"}'
30+
elif [ "${{ github.event.schedule }}" = "0 7 * * *" ]; then
31+
echo '::set-output name=branch::branch-3.2'
32+
echo '::set-output name=type::scheduled'
33+
echo '::set-output name=envs::{"SCALA_PROFILE": "scala2.13"}'
34+
else
35+
echo '::set-output name=branch::master' # Default branch to run on. CHANGE here when a branch is cut out.
36+
echo '::set-output name=type::regular'
37+
echo '::set-output name=envs::{}'
38+
fi
39+
1040
# Build: build Spark and run the tests for specified modules.
1141
build:
12-
name: "Build modules: ${{ matrix.modules }} ${{ matrix.comment }} (JDK ${{ matrix.java }}, ${{ matrix.hadoop }}, ${{ matrix.hive }})"
42+
name: "Build modules (${{ format('{0}, {1} job', needs.configure-jobs.outputs.branch, needs.configure-jobs.outputs.type) }}): ${{ matrix.modules }} ${{ matrix.comment }} (JDK ${{ matrix.java }}, ${{ matrix.hadoop }}, ${{ matrix.hive }})"
43+
needs: configure-jobs
44+
# Do not run as scheduled jobs in forked repos
45+
if: github.repository == 'apache/spark' || needs.configure-jobs.outputs.type == 'regular'
1346
# Ubuntu 20.04 is the latest LTS. The next LTS is 22.04.
1447
runs-on: ubuntu-20.04
1548
strategy:
@@ -81,16 +114,14 @@ jobs:
81114
with:
82115
fetch-depth: 0
83116
repository: apache/spark
84-
ref: master
117+
ref: ${{ needs.configure-jobs.outputs.branch }}
85118
- name: Sync the current branch with the latest in Apache Spark
86119
if: github.repository != 'apache/spark'
87-
id: sync-branch
88120
run: |
89-
apache_spark_ref=`git rev-parse HEAD`
121+
echo "APACHE_SPARK_REF=$(git rev-parse HEAD)" >> $GITHUB_ENV
90122
git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/}
91123
git -c user.name='Apache Spark Test Account' -c user.email='[email protected]' merge --no-commit --progress --squash FETCH_HEAD
92124
git -c user.name='Apache Spark Test Account' -c user.email='[email protected]' commit -m "Merged commit"
93-
echo "::set-output name=APACHE_SPARK_REF::$apache_spark_ref"
94125
# Cache local repositories. Note that GitHub Actions cache has a 2G limit.
95126
- name: Cache Scala, SBT and Maven
96127
uses: actions/cache@v2
@@ -130,11 +161,12 @@ jobs:
130161
python3.8 -m pip list
131162
# Run the tests.
132163
- name: Run tests
164+
env: ${{ fromJSON(needs.configure-jobs.outputs.envs) }}
133165
run: |
134-
export APACHE_SPARK_REF=${{ steps.sync-branch.outputs.APACHE_SPARK_REF }}
135-
# Hive and SQL tests become flaky when running in parallel as it's too intensive.
136-
if [[ "$MODULES_TO_TEST" == "hive" ]] || [[ "$MODULES_TO_TEST" == "sql" ]]; then export SERIAL_SBT_TESTS=1; fi
137-
./dev/run-tests --parallelism 2 --modules "$MODULES_TO_TEST" --included-tags "$INCLUDED_TAGS" --excluded-tags "$EXCLUDED_TAGS"
166+
# Hive "other tests" test needs larger metaspace size based on experiment.
167+
if [[ "$MODULES_TO_TEST" == "hive" ]] && [[ "$EXCLUDED_TAGS" == "org.apache.spark.tags.SlowHiveTest" ]]; then export METASPACE_SIZE=2g; fi
168+
export SERIAL_SBT_TESTS=1
169+
./dev/run-tests --parallelism 1 --modules "$MODULES_TO_TEST" --included-tags "$INCLUDED_TAGS" --excluded-tags "$EXCLUDED_TAGS"
138170
- name: Upload test results to report
139171
if: always()
140172
uses: actions/upload-artifact@v2
@@ -149,6 +181,8 @@ jobs:
149181
path: "**/target/unit-tests.log"
150182

151183
pyspark:
184+
needs: configure-jobs
185+
if: needs.configure-jobs.outputs.type == 'regular'
152186
name: "Build modules: ${{ matrix.modules }}"
153187
runs-on: ubuntu-20.04
154188
container:
@@ -171,6 +205,9 @@ jobs:
171205
HIVE_PROFILE: hive2.3
172206
GITHUB_PREV_SHA: ${{ github.event.before }}
173207
SPARK_LOCAL_IP: localhost
208+
SKIP_UNIDOC: true
209+
SKIP_MIMA: true
210+
METASPACE_SIZE: 128m
174211
steps:
175212
- name: Checkout Spark repository
176213
uses: actions/checkout@v2
@@ -181,13 +218,11 @@ jobs:
181218
ref: master
182219
- name: Sync the current branch with the latest in Apache Spark
183220
if: github.repository != 'apache/spark'
184-
id: sync-branch
185221
run: |
186-
apache_spark_ref=`git rev-parse HEAD`
222+
echo "APACHE_SPARK_REF=$(git rev-parse HEAD)" >> $GITHUB_ENV
187223
git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/}
188224
git -c user.name='Apache Spark Test Account' -c user.email='[email protected]' merge --no-commit --progress --squash FETCH_HEAD
189225
git -c user.name='Apache Spark Test Account' -c user.email='[email protected]' commit -m "Merged commit"
190-
echo "::set-output name=APACHE_SPARK_REF::$apache_spark_ref"
191226
# Cache local repositories. Note that GitHub Actions cache has a 2G limit.
192227
- name: Cache Scala, SBT and Maven
193228
uses: actions/cache@v2
@@ -217,9 +252,8 @@ jobs:
217252
# Run the tests.
218253
- name: Run tests
219254
run: |
220-
export APACHE_SPARK_REF=${{ steps.sync-branch.outputs.APACHE_SPARK_REF }}
221255
export PATH=$PATH:$HOME/miniconda/bin
222-
./dev/run-tests --parallelism 2 --modules "$MODULES_TO_TEST"
256+
./dev/run-tests --parallelism 1 --modules "$MODULES_TO_TEST"
223257
- name: Upload test results to report
224258
if: always()
225259
uses: actions/upload-artifact@v2
@@ -234,6 +268,8 @@ jobs:
234268
path: "**/target/unit-tests.log"
235269

236270
sparkr:
271+
needs: configure-jobs
272+
if: needs.configure-jobs.outputs.type == 'regular'
237273
name: "Build modules: sparkr"
238274
runs-on: ubuntu-20.04
239275
container:
@@ -243,6 +279,7 @@ jobs:
243279
HIVE_PROFILE: hive2.3
244280
GITHUB_PREV_SHA: ${{ github.event.before }}
245281
SPARK_LOCAL_IP: localhost
282+
SKIP_MIMA: true
246283
steps:
247284
- name: Checkout Spark repository
248285
uses: actions/checkout@v2
@@ -253,13 +290,11 @@ jobs:
253290
ref: master
254291
- name: Sync the current branch with the latest in Apache Spark
255292
if: github.repository != 'apache/spark'
256-
id: sync-branch
257293
run: |
258-
apache_spark_ref=`git rev-parse HEAD`
294+
echo "APACHE_SPARK_REF=$(git rev-parse HEAD)" >> $GITHUB_ENV
259295
git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/}
260296
git -c user.name='Apache Spark Test Account' -c user.email='[email protected]' merge --no-commit --progress --squash FETCH_HEAD
261297
git -c user.name='Apache Spark Test Account' -c user.email='[email protected]' commit -m "Merged commit"
262-
echo "::set-output name=APACHE_SPARK_REF::$apache_spark_ref"
263298
# Cache local repositories. Note that GitHub Actions cache has a 2G limit.
264299
- name: Cache Scala, SBT and Maven
265300
uses: actions/cache@v2
@@ -285,8 +320,7 @@ jobs:
285320
# R issues at docker environment
286321
export TZ=UTC
287322
export _R_CHECK_SYSTEM_CLOCK_=FALSE
288-
export APACHE_SPARK_REF=${{ steps.sync-branch.outputs.APACHE_SPARK_REF }}
289-
./dev/run-tests --parallelism 2 --modules sparkr
323+
./dev/run-tests --parallelism 1 --modules sparkr
290324
- name: Upload test results to report
291325
if: always()
292326
uses: actions/upload-artifact@v2
@@ -296,6 +330,8 @@ jobs:
296330

297331
# Static analysis, and documentation build
298332
lint:
333+
needs: configure-jobs
334+
if: needs.configure-jobs.outputs.type == 'regular'
299335
name: Linters, licenses, dependencies and documentation generation
300336
runs-on: ubuntu-20.04
301337
env:
@@ -313,7 +349,6 @@ jobs:
313349
ref: master
314350
- name: Sync the current branch with the latest in Apache Spark
315351
if: github.repository != 'apache/spark'
316-
id: sync-branch
317352
run: |
318353
git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/}
319354
git -c user.name='Apache Spark Test Account' -c user.email='[email protected]' merge --no-commit --progress --squash FETCH_HEAD
@@ -382,7 +417,7 @@ jobs:
382417
- name: Java linter
383418
run: ./dev/lint-java
384419
- name: Python linter
385-
run: ./dev/lint-python
420+
run: PYTHON_EXECUTABLE=python3.9 ./dev/lint-python
386421
- name: R linter
387422
run: ./dev/lint-r
388423
- name: JS linter
@@ -397,6 +432,8 @@ jobs:
397432
bundle exec jekyll build
398433
399434
java-11-17:
435+
needs: configure-jobs
436+
if: needs.configure-jobs.outputs.type == 'regular'
400437
name: Java ${{ matrix.java }} build with Maven
401438
strategy:
402439
fail-fast: false
@@ -414,7 +451,6 @@ jobs:
414451
ref: master
415452
- name: Sync the current branch with the latest in Apache Spark
416453
if: github.repository != 'apache/spark'
417-
id: sync-branch
418454
run: |
419455
git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/}
420456
git -c user.name='Apache Spark Test Account' -c user.email='[email protected]' merge --no-commit --progress --squash FETCH_HEAD
@@ -451,6 +487,8 @@ jobs:
451487
rm -rf ~/.m2/repository/org/apache/spark
452488
453489
scala-213:
490+
needs: configure-jobs
491+
if: needs.configure-jobs.outputs.type == 'regular'
454492
name: Scala 2.13 build with SBT
455493
runs-on: ubuntu-20.04
456494
steps:
@@ -462,7 +500,6 @@ jobs:
462500
ref: master
463501
- name: Sync the current branch with the latest in Apache Spark
464502
if: github.repository != 'apache/spark'
465-
id: sync-branch
466503
run: |
467504
git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/}
468505
git -c user.name='Apache Spark Test Account' -c user.email='[email protected]' merge --no-commit --progress --squash FETCH_HEAD
@@ -495,6 +532,8 @@ jobs:
495532
./build/sbt -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Pkinesis-asl -Pdocker-integration-tests -Pkubernetes-integration-tests -Pspark-ganglia-lgpl -Pscala-2.13 compile test:compile
496533
497534
hadoop-2:
535+
needs: configure-jobs
536+
if: needs.configure-jobs.outputs.type == 'regular'
498537
name: Hadoop 2 build with SBT
499538
runs-on: ubuntu-20.04
500539
steps:
@@ -506,7 +545,6 @@ jobs:
506545
ref: master
507546
- name: Sync the current branch with the latest in Apache Spark
508547
if: github.repository != 'apache/spark'
509-
id: sync-branch
510548
run: |
511549
git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/}
512550
git -c user.name='Apache Spark Test Account' -c user.email='[email protected]' merge --no-commit --progress --squash FETCH_HEAD
@@ -538,6 +576,8 @@ jobs:
538576
./build/sbt -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Pkinesis-asl -Phadoop-2.7 compile test:compile
539577
540578
tpcds-1g:
579+
needs: configure-jobs
580+
if: needs.configure-jobs.outputs.type == 'regular'
541581
name: Run TPC-DS queries with SF=1
542582
runs-on: ubuntu-20.04
543583
env:
@@ -551,7 +591,6 @@ jobs:
551591
ref: master
552592
- name: Sync the current branch with the latest in Apache Spark
553593
if: github.repository != 'apache/spark'
554-
id: sync-branch
555594
run: |
556595
git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/}
557596
git -c user.name='Apache Spark Test Account' -c user.email='[email protected]' merge --no-commit --progress --squash FETCH_HEAD
@@ -614,6 +653,8 @@ jobs:
614653
path: "**/target/unit-tests.log"
615654

616655
docker-integration-tests:
656+
needs: configure-jobs
657+
if: needs.configure-jobs.outputs.type == 'regular'
617658
name: Run docker integration tests
618659
runs-on: ubuntu-20.04
619660
env:
@@ -622,6 +663,7 @@ jobs:
622663
GITHUB_PREV_SHA: ${{ github.event.before }}
623664
SPARK_LOCAL_IP: localhost
624665
ORACLE_DOCKER_IMAGE_NAME: oracle/database:18.4.0-xe
666+
SKIP_MIMA: true
625667
steps:
626668
- name: Checkout Spark repository
627669
uses: actions/checkout@v2
@@ -631,13 +673,11 @@ jobs:
631673
ref: master
632674
- name: Sync the current branch with the latest in Apache Spark
633675
if: github.repository != 'apache/spark'
634-
id: sync-branch
635676
run: |
636-
apache_spark_ref=`git rev-parse HEAD`
677+
echo "APACHE_SPARK_REF=$(git rev-parse HEAD)" >> $GITHUB_ENV
637678
git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/}
638679
git -c user.name='Apache Spark Test Account' -c user.email='[email protected]' merge --no-commit --progress --squash FETCH_HEAD
639680
git -c user.name='Apache Spark Test Account' -c user.email='[email protected]' commit -m "Merged commit"
640-
echo "::set-output name=APACHE_SPARK_REF::$apache_spark_ref"
641681
- name: Cache Scala, SBT and Maven
642682
uses: actions/cache@v2
643683
with:
@@ -680,8 +720,7 @@ jobs:
680720
./buildContainerImage.sh -v 18.4.0 -x
681721
- name: Run tests
682722
run: |
683-
export APACHE_SPARK_REF=${{ steps.sync-branch.outputs.APACHE_SPARK_REF }}
684-
./dev/run-tests --parallelism 2 --modules docker-integration-tests --included-tags org.apache.spark.tags.DockerTest
723+
./dev/run-tests --parallelism 1 --modules docker-integration-tests --included-tags org.apache.spark.tags.DockerTest
685724
- name: Upload test results to report
686725
if: always()
687726
uses: actions/upload-artifact@v2

0 commit comments

Comments
 (0)