Skip to content

Commit 11e2b8a

Browse files
authored
Merge branch 'master' into feature/cap-alias-substitution
2 parents 22f594a + 8b52240 commit 11e2b8a

File tree

1,046 files changed

+29155
-11727
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,046 files changed

+29155
-11727
lines changed

LICENSE-binary

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -260,9 +260,6 @@ net.sf.supercsv:super-csv
260260
org.apache.arrow:arrow-format
261261
org.apache.arrow:arrow-memory
262262
org.apache.arrow:arrow-vector
263-
org.apache.calcite:calcite-avatica
264-
org.apache.calcite:calcite-core
265-
org.apache.calcite:calcite-linq4j
266263
org.apache.commons:commons-crypto
267264
org.apache.commons:commons-lang3
268265
org.apache.hadoop:hadoop-annotations
@@ -487,6 +484,15 @@ org.glassfish.jersey.core:jersey-server
487484
org.glassfish.jersey.media:jersey-media-jaxb
488485

489486

487+
Eclipse Distribution License (EDL) 1.0
488+
--------------------------------------
489+
490+
org.glassfish.jaxb:jaxb-runtime
491+
jakarta.xml.bind:jakarta.xml.bind-api
492+
com.sun.istack:istack-commons-runtime
493+
jakarta.activation:jakarta.activation-api
494+
495+
490496
Mozilla Public License (MPL) 1.1
491497
--------------------------------
492498

NOTICE-binary

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -792,15 +792,6 @@ Copyright 2005-2006 The Apache Software Foundation
792792
Apache Jakarta HttpClient
793793
Copyright 1999-2007 The Apache Software Foundation
794794

795-
Calcite Avatica
796-
Copyright 2012-2015 The Apache Software Foundation
797-
798-
Calcite Core
799-
Copyright 2012-2015 The Apache Software Foundation
800-
801-
Calcite Linq4j
802-
Copyright 2012-2015 The Apache Software Foundation
803-
804795
Apache HttpClient
805796
Copyright 1999-2017 The Apache Software Foundation
806797

R/pkg/DESCRIPTION

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
Package: SparkR
22
Type: Package
33
Version: 3.0.0
4-
Title: R Frontend for Apache Spark
5-
Description: Provides an R Frontend for Apache Spark.
4+
Title: R Front end for 'Apache Spark'
5+
Description: Provides an R Front end for 'Apache Spark' <https://spark.apache.org>.
66
Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"),
77
email = "[email protected]"),
88
person("Xiangrui", "Meng", role = "aut",
@@ -11,8 +11,8 @@ Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"),
1111
email = "[email protected]"),
1212
person(family = "The Apache Software Foundation", role = c("aut", "cph")))
1313
License: Apache License (== 2.0)
14-
URL: http://www.apache.org/ http://spark.apache.org/
15-
BugReports: http://spark.apache.org/contributing.html
14+
URL: https://www.apache.org/ https://spark.apache.org/
15+
BugReports: https://spark.apache.org/contributing.html
1616
SystemRequirements: Java (== 8)
1717
Depends:
1818
R (>= 3.1),

R/pkg/NAMESPACE

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -312,8 +312,10 @@ exportMethods("%<=>%",
312312
"lower",
313313
"lpad",
314314
"ltrim",
315+
"map_concat",
315316
"map_entries",
316317
"map_from_arrays",
318+
"map_from_entries",
317319
"map_keys",
318320
"map_values",
319321
"max",

R/pkg/R/DataFrame.R

Lines changed: 52 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -950,7 +950,7 @@ setMethod("write.parquet",
950950
#'
951951
#' Save the content of the SparkDataFrame in a text file at the specified path.
952952
#' The SparkDataFrame must have only one column of string type with the name "value".
953-
#' Each row becomes a new line in the output file.
953+
#' Each row becomes a new line in the output file. The text files will be encoded as UTF-8.
954954
#'
955955
#' @param x A SparkDataFrame
956956
#' @param path The directory where the file is saved
@@ -1177,11 +1177,50 @@ setMethod("dim",
11771177
setMethod("collect",
11781178
signature(x = "SparkDataFrame"),
11791179
function(x, stringsAsFactors = FALSE) {
1180+
connectionTimeout <- as.numeric(Sys.getenv("SPARKR_BACKEND_CONNECTION_TIMEOUT", "6000"))
1181+
useArrow <- FALSE
1182+
arrowEnabled <- sparkR.conf("spark.sql.execution.arrow.enabled")[[1]] == "true"
1183+
if (arrowEnabled) {
1184+
useArrow <- tryCatch({
1185+
checkSchemaInArrow(schema(x))
1186+
TRUE
1187+
}, error = function(e) {
1188+
warning(paste0("The conversion from Spark DataFrame to R DataFrame was attempted ",
1189+
"with Arrow optimization because ",
1190+
"'spark.sql.execution.arrow.enabled' is set to true; however, ",
1191+
"failed, attempting non-optimization. Reason: ",
1192+
e))
1193+
FALSE
1194+
})
1195+
}
1196+
11801197
dtypes <- dtypes(x)
11811198
ncol <- length(dtypes)
11821199
if (ncol <= 0) {
11831200
# empty data.frame with 0 columns and 0 rows
11841201
data.frame()
1202+
} else if (useArrow) {
1203+
requireNamespace1 <- requireNamespace
1204+
if (requireNamespace1("arrow", quietly = TRUE)) {
1205+
read_arrow <- get("read_arrow", envir = asNamespace("arrow"), inherits = FALSE)
1206+
as_tibble <- get("as_tibble", envir = asNamespace("arrow"))
1207+
1208+
portAuth <- callJMethod(x@sdf, "collectAsArrowToR")
1209+
port <- portAuth[[1]]
1210+
authSecret <- portAuth[[2]]
1211+
conn <- socketConnection(
1212+
port = port, blocking = TRUE, open = "wb", timeout = connectionTimeout)
1213+
output <- tryCatch({
1214+
doServerAuth(conn, authSecret)
1215+
arrowTable <- read_arrow(readRaw(conn))
1216+
as.data.frame(as_tibble(arrowTable), stringsAsFactors = stringsAsFactors)
1217+
}, finally = {
1218+
close(conn)
1219+
})
1220+
return(output)
1221+
} else {
1222+
stop("'arrow' package should be installed.")
1223+
}
11851224
} else {
11861225
# listCols is a list of columns
11871226
listCols <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "dfToCols", x@sdf)
@@ -1437,6 +1476,18 @@ dapplyInternal <- function(x, func, schema) {
14371476
schema <- structType(schema)
14381477
}
14391478

1479+
arrowEnabled <- sparkR.conf("spark.sql.execution.arrow.enabled")[[1]] == "true"
1480+
if (arrowEnabled) {
1481+
if (inherits(schema, "structType")) {
1482+
checkSchemaInArrow(schema)
1483+
} else if (is.null(schema)) {
1484+
stop(paste0("Arrow optimization does not support 'dapplyCollect' yet. Please disable ",
1485+
"Arrow optimization or use 'collect' and 'dapply' APIs instead."))
1486+
} else {
1487+
stop("'schema' should be DDL-formatted string or structType.")
1488+
}
1489+
}
1490+
14401491
packageNamesArr <- serialize(.sparkREnv[[".packages"]],
14411492
connection = NULL)
14421493

0 commit comments

Comments
 (0)