Skip to content

Commit 268326b

Browse files
committed
Merge branch 'master' into reorder_keys
2 parents 8308649 + 71aea02 commit 268326b

File tree

711 files changed

+18988
-7385
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

711 files changed

+18988
-7385
lines changed

.github/workflows/master.yml

Lines changed: 45 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -154,15 +154,18 @@ jobs:
154154
python3.8 -m pip install numpy pyarrow pandas scipy
155155
python3.8 -m pip list
156156
# SparkR
157-
- name: Install R 3.6
158-
uses: r-lib/actions/setup-r@v1
157+
- name: Install R 4.0
159158
if: contains(matrix.modules, 'sparkr')
160-
with:
161-
r-version: 3.6
159+
run: |
160+
sudo sh -c "echo 'deb https://cloud.r-project.org/bin/linux/ubuntu bionic-cran40/' >> /etc/apt/sources.list"
161+
curl -sL "https://keyserver.ubuntu.com/pks/lookup?op=get&search=0xE298A3A825C0D65DFD57CBB651716619E084DAB9" | sudo apt-key add
162+
sudo apt-get update
163+
sudo apt-get install -y r-base r-base-dev libcurl4-openssl-dev
162164
- name: Install R packages
163165
if: contains(matrix.modules, 'sparkr')
164166
run: |
165-
sudo apt-get install -y libcurl4-openssl-dev
167+
# qpdf is required to reduce the size of PDFs to make CRAN check pass. See SPARK-32497.
168+
sudo apt-get install -y libcurl4-openssl-dev qpdf
166169
sudo Rscript -e "install.packages(c('knitr', 'rmarkdown', 'testthat', 'devtools', 'e1071', 'survival', 'arrow', 'roxygen2'), repos='https://cloud.r-project.org/')"
167170
# Show installed packages in R.
168171
sudo Rscript -e 'pkg_list <- as.data.frame(installed.packages()[, c(1,3:4)]); pkg_list[is.na(pkg_list$Priority), 1:2, drop = FALSE]'
@@ -200,11 +203,15 @@ jobs:
200203
architecture: x64
201204
- name: Install Python linter dependencies
202205
run: |
203-
pip3 install flake8 sphinx numpy
204-
- name: Install R 3.6
205-
uses: r-lib/actions/setup-r@v1
206-
with:
207-
r-version: 3.6
206+
# TODO(SPARK-32407): Sphinx 3.1+ does not correctly index nested classes.
207+
# See also https://github.com/sphinx-doc/sphinx/issues/7551.
208+
pip3 install flake8 'sphinx<3.1.0' numpy pydata_sphinx_theme
209+
- name: Install R 4.0
210+
run: |
211+
sudo sh -c "echo 'deb https://cloud.r-project.org/bin/linux/ubuntu bionic-cran40/' >> /etc/apt/sources.list"
212+
curl -sL "https://keyserver.ubuntu.com/pks/lookup?op=get&search=0xE298A3A825C0D65DFD57CBB651716619E084DAB9" | sudo apt-key add
213+
sudo apt-get update
214+
sudo apt-get install -y r-base r-base-dev libcurl4-openssl-dev
208215
- name: Install R linter dependencies and SparkR
209216
run: |
210217
sudo apt-get install -y libcurl4-openssl-dev
@@ -218,7 +225,9 @@ jobs:
218225
- name: Install dependencies for documentation generation
219226
run: |
220227
sudo apt-get install -y libcurl4-openssl-dev pandoc
221-
pip install sphinx mkdocs numpy
228+
# TODO(SPARK-32407): Sphinx 3.1+ does not correctly index nested classes.
229+
# See also https://github.com/sphinx-doc/sphinx/issues/7551.
230+
pip install 'sphinx<3.1.0' mkdocs numpy pydata_sphinx_theme
222231
gem install jekyll jekyll-redirect-from rouge
223232
sudo Rscript -e "install.packages(c('devtools', 'testthat', 'knitr', 'rmarkdown', 'roxygen2'), repos='https://cloud.r-project.org/')"
224233
- name: Scala linter
@@ -237,3 +246,28 @@ jobs:
237246
run: |
238247
cd docs
239248
jekyll build
249+
250+
java11:
251+
name: Java 11 build
252+
runs-on: ubuntu-latest
253+
steps:
254+
- name: Checkout Spark repository
255+
uses: actions/checkout@v2
256+
- name: Cache Maven local repository
257+
uses: actions/cache@v2
258+
with:
259+
path: ~/.m2/repository
260+
key: java11-maven-${{ hashFiles('**/pom.xml') }}
261+
restore-keys: |
262+
java11-maven-
263+
- name: Install Java 11
264+
uses: actions/setup-java@v1
265+
with:
266+
java-version: 11
267+
- name: Build with Maven
268+
run: |
269+
export MAVEN_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=1g -Dorg.slf4j.simpleLogger.defaultLogLevel=WARN"
270+
export MAVEN_CLI_OPTS="--no-transfer-progress"
271+
mkdir -p ~/.m2
272+
./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Djava.version=11 install
273+
rm -rf ~/.m2/repository/org/apache/spark

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ python/lib/pyspark.zip
6464
python/.eggs/
6565
python/deps
6666
python/docs/_site/
67+
python/docs/source/reference/api/
6768
python/test_coverage/coverage_data
6869
python/test_coverage/htmlcov
6970
python/pyspark/python

LICENSE

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -222,14 +222,13 @@ external/spark-ganglia-lgpl/src/main/java/com/codahale/metrics/ganglia/GangliaRe
222222
Python Software Foundation License
223223
----------------------------------
224224

225-
pyspark/heapq3.py
226-
python/docs/_static/copybutton.js
225+
python/docs/source/_static/copybutton.js
227226

228227
BSD 3-Clause
229228
------------
230229

231230
python/lib/py4j-*-src.zip
232-
python/pyspark/cloudpickle.py
231+
python/pyspark/cloudpickle/*.py
233232
python/pyspark/join.py
234233
core/src/main/resources/org/apache/spark/ui/static/d3.min.js
235234

LICENSE-binary

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -557,12 +557,6 @@ jakarta.ws.rs:jakarta.ws.rs-api https://github.com/eclipse-ee4j/jaxrs-api
557557
org.glassfish.hk2.external:jakarta.inject
558558

559559

560-
Python Software Foundation License
561-
----------------------------------
562-
563-
pyspark/heapq3.py
564-
565-
566560
Public Domain
567561
-------------
568562

R/pkg/DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ Suggests:
2323
testthat,
2424
e1071,
2525
survival,
26-
arrow (>= 0.15.1)
26+
arrow (>= 1.0.0)
2727
Collate:
2828
'schema.R'
2929
'generics.R'

R/pkg/R/DataFrame.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1233,7 +1233,7 @@ setMethod("collect",
12331233
port = port, blocking = TRUE, open = "wb", timeout = connectionTimeout)
12341234
output <- tryCatch({
12351235
doServerAuth(conn, authSecret)
1236-
arrowTable <- arrow::read_arrow(readRaw(conn))
1236+
arrowTable <- arrow::read_ipc_stream(readRaw(conn))
12371237
# Arrow drops `as_tibble` since 0.14.0, see ARROW-5190.
12381238
if (exists("as_tibble", envir = asNamespace("arrow"))) {
12391239
as.data.frame(arrow::as_tibble(arrowTable), stringsAsFactors = stringsAsFactors)

R/pkg/tests/fulltests/test_context.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ test_that("utility function can be called", {
139139
expect_true(TRUE)
140140
})
141141

142-
test_that("getClientModeSparkSubmitOpts() returns spark-submit args from whitelist", {
142+
test_that("getClientModeSparkSubmitOpts() returns spark-submit args from allowList", {
143143
e <- new.env()
144144
e[["spark.driver.memory"]] <- "512m"
145145
ops <- getClientModeSparkSubmitOpts("sparkrmain", e)

R/pkg/tests/fulltests/test_sparkSQL.R

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3921,14 +3921,14 @@ test_that("No extra files are created in SPARK_HOME by starting session and maki
39213921
# before creating a SparkSession with enableHiveSupport = T at the top of this test file
39223922
# (filesBefore). The test here is to compare that (filesBefore) against the list of files before
39233923
# any test is run in run-all.R (sparkRFilesBefore).
3924-
# sparkRWhitelistSQLDirs is also defined in run-all.R, and should contain only 2 whitelisted dirs,
3924+
# sparkRAllowedSQLDirs is also defined in run-all.R, and should contain only 2 allowed dirs,
39253925
# here allow the first value, spark-warehouse, in the diff, everything else should be exactly the
39263926
# same as before any test is run.
3927-
compare_list(sparkRFilesBefore, setdiff(filesBefore, sparkRWhitelistSQLDirs[[1]]))
3927+
compare_list(sparkRFilesBefore, setdiff(filesBefore, sparkRAllowedSQLDirs[[1]]))
39283928
# third, ensure only spark-warehouse and metastore_db are created when enableHiveSupport = T
39293929
# note: as the note above, after running all tests in this file while enableHiveSupport = T, we
3930-
# check the list of files again. This time we allow both whitelisted dirs to be in the diff.
3931-
compare_list(sparkRFilesBefore, setdiff(filesAfter, sparkRWhitelistSQLDirs))
3930+
# check the list of files again. This time we allow both dirs to be in the diff.
3931+
compare_list(sparkRFilesBefore, setdiff(filesAfter, sparkRAllowedSQLDirs))
39323932
})
39333933

39343934
unlink(parquetPath)

R/pkg/tests/fulltests/test_sparkSQL_arrow.R

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -312,4 +312,22 @@ test_that("Arrow optimization - unsupported types", {
312312
})
313313
})
314314

315+
test_that("SPARK-32478: gapply() Arrow optimization - error message for schema mismatch", {
316+
skip_if_not_installed("arrow")
317+
df <- createDataFrame(list(list(a = 1L, b = "a")))
318+
319+
conf <- callJMethod(sparkSession, "conf")
320+
arrowEnabled <- sparkR.conf("spark.sql.execution.arrow.sparkr.enabled")[[1]]
321+
322+
callJMethod(conf, "set", "spark.sql.execution.arrow.sparkr.enabled", "true")
323+
tryCatch({
324+
expect_error(
325+
count(gapply(df, "a", function(key, group) { group }, structType("a int, b int"))),
326+
"expected IntegerType, IntegerType, got IntegerType, StringType")
327+
},
328+
finally = {
329+
callJMethod(conf, "set", "spark.sql.execution.arrow.sparkr.enabled", arrowEnabled)
330+
})
331+
})
332+
315333
sparkR.session.stop()

R/pkg/tests/run-all.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,8 @@ if (identical(Sys.getenv("NOT_CRAN"), "true")) {
3535
install.spark(overwrite = TRUE)
3636

3737
sparkRDir <- file.path(Sys.getenv("SPARK_HOME"), "R")
38-
sparkRWhitelistSQLDirs <- c("spark-warehouse", "metastore_db")
39-
invisible(lapply(sparkRWhitelistSQLDirs,
38+
sparkRAllowedSQLDirs <- c("spark-warehouse", "metastore_db")
39+
invisible(lapply(sparkRAllowedSQLDirs,
4040
function(x) { unlink(file.path(sparkRDir, x), recursive = TRUE, force = TRUE)}))
4141
sparkRFilesBefore <- list.files(path = sparkRDir, all.files = TRUE)
4242

0 commit comments

Comments
 (0)