Skip to content

Commit 0887eab

Browse files
committed
Add comment and merge master.
* 'master' of https://github.com/apache/spark: (149 commits) [SPARK-19753][CORE] Un-register all shuffle output on a host in case of slave lost or fetch failure [SPARK-20986][SQL] Reset table's statistics after PruneFileSourcePartitions rule. [SPARK-12552][CORE] Correctly count the driver resource when recovering from failure for Master [SPARK-21016][CORE] Improve code fault tolerance for converting string to number [SPARK-21051][SQL] Add hash map metrics to aggregate [SPARK-21064][CORE][TEST] Fix the default value bug in NettyBlockTransferServiceSuite [SPARK-21060][WEB-UI] Css style about paging function is error in the executor page. Css style about paging function is error in the executor page. It is different of history server ui paging function css style. [SPARK-21039][SPARK CORE] Use treeAggregate instead of aggregate in DataFrame.stat.bloomFilter [SPARK-21006][TESTS][FOLLOW-UP] Some Worker's RpcEnv is leaked in WorkerSuite [SPARK-20920][SQL] ForkJoinPool pools are leaked when writing hive tables with many partitions [TEST][SPARKR][CORE] Fix broken SparkSubmitSuite [SPARK-19910][SQL] `stack` should not reject NULL values due to type mismatch Revert "[SPARK-21046][SQL] simplify the array offset and length in ColumnVector" [SPARK-20979][SS] Add RateSource to generate values for tests and benchmark [SPARK-21050][ML] Word2vec persistence overflow bug fix [SPARK-21059][SQL] LikeSimplification can NPE on null pattern [SPARK-20345][SQL] Fix STS error handling logic on HiveSQLException [SPARK-17914][SQL] Fix parsing of timestamp strings with nanoseconds [SPARK-21046][SQL] simplify the array offset and length in ColumnVector [SPARK-21041][SQL] SparkSession.range should be consistent with SparkContext.range ...
2 parents da0f977 + dccc0aa commit 0887eab

File tree

419 files changed

+12161
-3443
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

419 files changed

+12161
-3443
lines changed

R/pkg/.Rbuildignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,4 @@
66
^README\.Rmd$
77
^src-native$
88
^html$
9+
^tests/fulltests/*

R/pkg/R/DataFrame.R

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2646,6 +2646,7 @@ generateAliasesForIntersectedCols <- function (x, intersectedColNames, suffix) {
26462646
#' Input SparkDataFrames can have different schemas (names and data types).
26472647
#'
26482648
#' Note: This does not remove duplicate rows across the two SparkDataFrames.
2649+
#' Also as standard in SQL, this function resolves columns by position (not by name).
26492650
#'
26502651
#' @param x A SparkDataFrame
26512652
#' @param y A SparkDataFrame

R/pkg/R/install.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -267,7 +267,7 @@ hadoopVersionName <- function(hadoopVersion) {
267267
# The implementation refers to appdirs package: https://pypi.python.org/pypi/appdirs and
268268
# adapt to Spark context
269269
sparkCachePath <- function() {
270-
if (.Platform$OS.type == "windows") {
270+
if (is_windows()) {
271271
winAppPath <- Sys.getenv("LOCALAPPDATA", unset = NA)
272272
if (is.na(winAppPath)) {
273273
stop(paste("%LOCALAPPDATA% not found.",

R/pkg/R/utils.R

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -908,10 +908,6 @@ isAtomicLengthOne <- function(x) {
908908
is.atomic(x) && length(x) == 1
909909
}
910910

911-
is_cran <- function() {
912-
!identical(Sys.getenv("NOT_CRAN"), "true")
913-
}
914-
915911
is_windows <- function() {
916912
.Platform$OS.type == "windows"
917913
}
@@ -920,6 +916,6 @@ hadoop_home_set <- function() {
920916
!identical(Sys.getenv("HADOOP_HOME"), "")
921917
}
922918

923-
not_cran_or_windows_with_hadoop <- function() {
924-
!is_cran() && (!is_windows() || hadoop_home_set())
919+
windows_with_hadoop <- function() {
920+
!is_windows() || hadoop_home_set()
925921
}
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
#
2+
# Licensed to the Apache Software Foundation (ASF) under one or more
3+
# contributor license agreements. See the NOTICE file distributed with
4+
# this work for additional information regarding copyright ownership.
5+
# The ASF licenses this file to You under the Apache License, Version 2.0
6+
# (the "License"); you may not use this file except in compliance with
7+
# the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
#
17+
18+
context("basic tests for CRAN")
19+
20+
test_that("create DataFrame from list or data.frame", {
21+
sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE)
22+
23+
i <- 4
24+
df <- createDataFrame(data.frame(dummy = 1:i))
25+
expect_equal(count(df), i)
26+
27+
l <- list(list(a = 1, b = 2), list(a = 3, b = 4))
28+
df <- createDataFrame(l)
29+
expect_equal(columns(df), c("a", "b"))
30+
31+
a <- 1:3
32+
b <- c("a", "b", "c")
33+
ldf <- data.frame(a, b)
34+
df <- createDataFrame(ldf)
35+
expect_equal(columns(df), c("a", "b"))
36+
expect_equal(dtypes(df), list(c("a", "int"), c("b", "string")))
37+
expect_equal(count(df), 3)
38+
ldf2 <- collect(df)
39+
expect_equal(ldf$a, ldf2$a)
40+
41+
mtcarsdf <- createDataFrame(mtcars)
42+
expect_equivalent(collect(mtcarsdf), mtcars)
43+
44+
bytes <- as.raw(c(1, 2, 3))
45+
df <- createDataFrame(list(list(bytes)))
46+
expect_equal(collect(df)[[1]][[1]], bytes)
47+
48+
sparkR.session.stop()
49+
})
50+
51+
test_that("spark.glm and predict", {
52+
sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE)
53+
54+
training <- suppressWarnings(createDataFrame(iris))
55+
# gaussian family
56+
model <- spark.glm(training, Sepal_Width ~ Sepal_Length + Species)
57+
prediction <- predict(model, training)
58+
expect_equal(typeof(take(select(prediction, "prediction"), 1)$prediction), "double")
59+
vals <- collect(select(prediction, "prediction"))
60+
rVals <- predict(glm(Sepal.Width ~ Sepal.Length + Species, data = iris), iris)
61+
expect_true(all(abs(rVals - vals) < 1e-6), rVals - vals)
62+
63+
# Gamma family
64+
x <- runif(100, -1, 1)
65+
y <- rgamma(100, rate = 10 / exp(0.5 + 1.2 * x), shape = 10)
66+
df <- as.DataFrame(as.data.frame(list(x = x, y = y)))
67+
model <- glm(y ~ x, family = Gamma, df)
68+
out <- capture.output(print(summary(model)))
69+
expect_true(any(grepl("Dispersion parameter for gamma family", out)))
70+
71+
# tweedie family
72+
model <- spark.glm(training, Sepal_Width ~ Sepal_Length + Species,
73+
family = "tweedie", var.power = 1.2, link.power = 0.0)
74+
prediction <- predict(model, training)
75+
expect_equal(typeof(take(select(prediction, "prediction"), 1)$prediction), "double")
76+
vals <- collect(select(prediction, "prediction"))
77+
78+
# manual calculation of the R predicted values to avoid dependence on statmod
79+
#' library(statmod)
80+
#' rModel <- glm(Sepal.Width ~ Sepal.Length + Species, data = iris,
81+
#' family = tweedie(var.power = 1.2, link.power = 0.0))
82+
#' print(coef(rModel))
83+
84+
rCoef <- c(0.6455409, 0.1169143, -0.3224752, -0.3282174)
85+
rVals <- exp(as.numeric(model.matrix(Sepal.Width ~ Sepal.Length + Species,
86+
data = iris) %*% rCoef))
87+
expect_true(all(abs(rVals - vals) < 1e-5), rVals - vals)
88+
89+
sparkR.session.stop()
90+
})
File renamed without changes.
Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,6 @@ context("SerDe functionality")
2020
sparkSession <- sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE)
2121

2222
test_that("SerDe of primitive types", {
23-
skip_on_cran()
24-
2523
x <- callJStatic("SparkRHandler", "echo", 1L)
2624
expect_equal(x, 1L)
2725
expect_equal(class(x), "integer")
@@ -40,8 +38,6 @@ test_that("SerDe of primitive types", {
4038
})
4139

4240
test_that("SerDe of list of primitive types", {
43-
skip_on_cran()
44-
4541
x <- list(1L, 2L, 3L)
4642
y <- callJStatic("SparkRHandler", "echo", x)
4743
expect_equal(x, y)
@@ -69,8 +65,6 @@ test_that("SerDe of list of primitive types", {
6965
})
7066

7167
test_that("SerDe of list of lists", {
72-
skip_on_cran()
73-
7468
x <- list(list(1L, 2L, 3L), list(1, 2, 3),
7569
list(TRUE, FALSE), list("a", "b", "c"))
7670
y <- callJStatic("SparkRHandler", "echo", x)
Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,7 @@
1717
context("Windows-specific tests")
1818

1919
test_that("sparkJars tag in SparkContext", {
20-
skip_on_cran()
21-
22-
if (.Platform$OS.type != "windows") {
20+
if (!is_windows()) {
2321
skip("This test is only for Windows, skipped")
2422
}
2523

R/pkg/inst/tests/testthat/test_binaryFile.R renamed to R/pkg/tests/fulltests/test_binaryFile.R

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,6 @@ sc <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "getJavaSparkContext",
2424
mockFile <- c("Spark is pretty.", "Spark is awesome.")
2525

2626
test_that("saveAsObjectFile()/objectFile() following textFile() works", {
27-
skip_on_cran()
28-
2927
fileName1 <- tempfile(pattern = "spark-test", fileext = ".tmp")
3028
fileName2 <- tempfile(pattern = "spark-test", fileext = ".tmp")
3129
writeLines(mockFile, fileName1)
@@ -40,8 +38,6 @@ test_that("saveAsObjectFile()/objectFile() following textFile() works", {
4038
})
4139

4240
test_that("saveAsObjectFile()/objectFile() works on a parallelized list", {
43-
skip_on_cran()
44-
4541
fileName <- tempfile(pattern = "spark-test", fileext = ".tmp")
4642

4743
l <- list(1, 2, 3)
@@ -54,8 +50,6 @@ test_that("saveAsObjectFile()/objectFile() works on a parallelized list", {
5450
})
5551

5652
test_that("saveAsObjectFile()/objectFile() following RDD transformations works", {
57-
skip_on_cran()
58-
5953
fileName1 <- tempfile(pattern = "spark-test", fileext = ".tmp")
6054
fileName2 <- tempfile(pattern = "spark-test", fileext = ".tmp")
6155
writeLines(mockFile, fileName1)
@@ -80,8 +74,6 @@ test_that("saveAsObjectFile()/objectFile() following RDD transformations works",
8074
})
8175

8276
test_that("saveAsObjectFile()/objectFile() works with multiple paths", {
83-
skip_on_cran()
84-
8577
fileName1 <- tempfile(pattern = "spark-test", fileext = ".tmp")
8678
fileName2 <- tempfile(pattern = "spark-test", fileext = ".tmp")
8779

0 commit comments

Comments
 (0)