apache
diff --git a/‎R/pkg/.Rbuildignore‎
Lines changed: 1 addition & 0 deletions b/‎R/pkg/.Rbuildignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎R/pkg/R/DataFrame.R‎
Lines changed: 1 addition & 0 deletions b/‎R/pkg/R/DataFrame.R‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎R/pkg/R/install.R‎
Lines changed: 1 addition & 1 deletion b/‎R/pkg/R/install.R‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎R/pkg/R/utils.R‎
Lines changed: 2 additions & 6 deletions b/‎R/pkg/R/utils.R‎
Lines changed: 2 additions & 6 deletions
diff --git a/‎R/pkg/inst/tests/testthat/test_basic.R‎
Lines changed: 90 additions & 0 deletions b/‎R/pkg/inst/tests/testthat/test_basic.R‎
Lines changed: 90 additions & 0 deletions
diff --git a/‎R/pkg/inst/tests/testthat/jarTest.R‎ ‎R/pkg/tests/fulltests/jarTest.R‎R/pkg/inst/tests/testthat/jarTest.R renamed to R/pkg/tests/fulltests/jarTest.R b/‎R/pkg/inst/tests/testthat/jarTest.R‎ ‎R/pkg/tests/fulltests/jarTest.R‎R/pkg/inst/tests/testthat/jarTest.R renamed to R/pkg/tests/fulltests/jarTest.R
diff --git a/‎…/inst/tests/testthat/packageInAJarTest.R‎ ‎R/pkg/tests/fulltests/packageInAJarTest.R‎R/pkg/inst/tests/testthat/packageInAJarTest.R renamed to R/pkg/tests/fulltests/packageInAJarTest.R b/‎…/inst/tests/testthat/packageInAJarTest.R‎ ‎R/pkg/tests/fulltests/packageInAJarTest.R‎R/pkg/inst/tests/testthat/packageInAJarTest.R renamed to R/pkg/tests/fulltests/packageInAJarTest.R
diff --git a/‎R/pkg/inst/tests/testthat/test_Serde.R‎ ‎R/pkg/tests/fulltests/test_Serde.R‎R/pkg/inst/tests/testthat/test_Serde.R renamed to R/pkg/tests/fulltests/test_Serde.R
Lines changed: 0 additions & 6 deletions b/‎R/pkg/inst/tests/testthat/test_Serde.R‎ ‎R/pkg/tests/fulltests/test_Serde.R‎R/pkg/inst/tests/testthat/test_Serde.R renamed to R/pkg/tests/fulltests/test_Serde.R
Lines changed: 0 additions & 6 deletions
diff --git a/‎R/pkg/inst/tests/testthat/test_Windows.R‎ ‎R/pkg/tests/fulltests/test_Windows.R‎R/pkg/inst/tests/testthat/test_Windows.R renamed to R/pkg/tests/fulltests/test_Windows.R
Lines changed: 1 addition & 3 deletions b/‎R/pkg/inst/tests/testthat/test_Windows.R‎ ‎R/pkg/tests/fulltests/test_Windows.R‎R/pkg/inst/tests/testthat/test_Windows.R renamed to R/pkg/tests/fulltests/test_Windows.R
Lines changed: 1 addition & 3 deletions
diff --git a/‎…kg/inst/tests/testthat/test_binaryFile.R‎ ‎R/pkg/tests/fulltests/test_binaryFile.R‎R/pkg/inst/tests/testthat/test_binaryFile.R renamed to R/pkg/tests/fulltests/test_binaryFile.R
Lines changed: 0 additions & 8 deletions b/‎…kg/inst/tests/testthat/test_binaryFile.R‎ ‎R/pkg/tests/fulltests/test_binaryFile.R‎R/pkg/inst/tests/testthat/test_binaryFile.R renamed to R/pkg/tests/fulltests/test_binaryFile.R
Lines changed: 0 additions & 8 deletions
@@ -6,3 +6,4 @@
 ^README\.Rmd$
 ^src-native$
 ^html$
+^tests/fulltests/*
@@ -2646,6 +2646,7 @@ generateAliasesForIntersectedCols <- function (x, intersectedColNames, suffix) {
 #' Input SparkDataFrames can have different schemas (names and data types).
 #'
 #' Note: This does not remove duplicate rows across the two SparkDataFrames.
+#' Also as standard in SQL, this function resolves columns by position (not by name).
 #'
 #' @param x A SparkDataFrame
 #' @param y A SparkDataFrame
 
@@ -267,7 +267,7 @@ hadoopVersionName <- function(hadoopVersion) {
 # The implementation refers to appdirs package: https://pypi.python.org/pypi/appdirs and
 # adapt to Spark context
 sparkCachePath <- function() {
-  if (.Platform$OS.type == "windows") {
+  if (is_windows()) {
     winAppPath <- Sys.getenv("LOCALAPPDATA", unset = NA)
     if (is.na(winAppPath)) {
       stop(paste("%LOCALAPPDATA% not found.",
 
@@ -908,10 +908,6 @@ isAtomicLengthOne <- function(x) {
   is.atomic(x) && length(x) == 1
 }
 
-is_cran <- function() {
-  !identical(Sys.getenv("NOT_CRAN"), "true")
-}
-
 is_windows <- function() {
   .Platform$OS.type == "windows"
 }
@@ -920,6 +916,6 @@ hadoop_home_set <- function() {
   !identical(Sys.getenv("HADOOP_HOME"), "")
 }
 
-not_cran_or_windows_with_hadoop <- function() {
-  !is_cran() && (!is_windows() || hadoop_home_set())
+windows_with_hadoop <- function() {
+  !is_windows() || hadoop_home_set()
 }
@@ -0,0 +1,90 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+context("basic tests for CRAN")
+
+test_that("create DataFrame from list or data.frame", {
+  sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE)
+
+  i <- 4
+  df <- createDataFrame(data.frame(dummy = 1:i))
+  expect_equal(count(df), i)
+
+  l <- list(list(a = 1, b = 2), list(a = 3, b = 4))
+  df <- createDataFrame(l)
+  expect_equal(columns(df), c("a", "b"))
+
+  a <- 1:3
+  b <- c("a", "b", "c")
+  ldf <- data.frame(a, b)
+  df <- createDataFrame(ldf)
+  expect_equal(columns(df), c("a", "b"))
+  expect_equal(dtypes(df), list(c("a", "int"), c("b", "string")))
+  expect_equal(count(df), 3)
+  ldf2 <- collect(df)
+  expect_equal(ldf$a, ldf2$a)
+
+  mtcarsdf <- createDataFrame(mtcars)
+  expect_equivalent(collect(mtcarsdf), mtcars)
+
+  bytes <- as.raw(c(1, 2, 3))
+  df <- createDataFrame(list(list(bytes)))
+  expect_equal(collect(df)[[1]][[1]], bytes)
+
+  sparkR.session.stop()
+})
+
+test_that("spark.glm and predict", {
+  sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE)
+
+  training <- suppressWarnings(createDataFrame(iris))
+  # gaussian family
+  model <- spark.glm(training, Sepal_Width ~ Sepal_Length + Species)
+  prediction <- predict(model, training)
+  expect_equal(typeof(take(select(prediction, "prediction"), 1)$prediction), "double")
+  vals <- collect(select(prediction, "prediction"))
+  rVals <- predict(glm(Sepal.Width ~ Sepal.Length + Species, data = iris), iris)
+  expect_true(all(abs(rVals - vals) < 1e-6), rVals - vals)
+
+  # Gamma family
+  x <- runif(100, -1, 1)
+  y <- rgamma(100, rate = 10 / exp(0.5 + 1.2 * x), shape = 10)
+  df <- as.DataFrame(as.data.frame(list(x = x, y = y)))
+  model <- glm(y ~ x, family = Gamma, df)
+  out <- capture.output(print(summary(model)))
+  expect_true(any(grepl("Dispersion parameter for gamma family", out)))
+
+  # tweedie family
+  model <- spark.glm(training, Sepal_Width ~ Sepal_Length + Species,
+                     family = "tweedie", var.power = 1.2, link.power = 0.0)
+  prediction <- predict(model, training)
+  expect_equal(typeof(take(select(prediction, "prediction"), 1)$prediction), "double")
+  vals <- collect(select(prediction, "prediction"))
+
+  # manual calculation of the R predicted values to avoid dependence on statmod
+  #' library(statmod)
+  #' rModel <- glm(Sepal.Width ~ Sepal.Length + Species, data = iris,
+  #'             family = tweedie(var.power = 1.2, link.power = 0.0))
+  #' print(coef(rModel))
+
+  rCoef <- c(0.6455409, 0.1169143, -0.3224752, -0.3282174)
+  rVals <- exp(as.numeric(model.matrix(Sepal.Width ~ Sepal.Length + Species,
+                                       data = iris) %*% rCoef))
+  expect_true(all(abs(rVals - vals) < 1e-5), rVals - vals)
+
+  sparkR.session.stop()
+})
@@ -20,8 +20,6 @@ context("SerDe functionality")
 sparkSession <- sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE)
 
 test_that("SerDe of primitive types", {
-  skip_on_cran()
-
   x <- callJStatic("SparkRHandler", "echo", 1L)
   expect_equal(x, 1L)
   expect_equal(class(x), "integer")
@@ -40,8 +38,6 @@ test_that("SerDe of primitive types", {
 })
 
 test_that("SerDe of list of primitive types", {
-  skip_on_cran()
-
   x <- list(1L, 2L, 3L)
   y <- callJStatic("SparkRHandler", "echo", x)
   expect_equal(x, y)
@@ -69,8 +65,6 @@ test_that("SerDe of list of primitive types", {
 })
 
 test_that("SerDe of list of lists", {
-  skip_on_cran()
-
   x <- list(list(1L, 2L, 3L), list(1, 2, 3),
             list(TRUE, FALSE), list("a", "b", "c"))
   y <- callJStatic("SparkRHandler", "echo", x)
 
@@ -17,9 +17,7 @@
 context("Windows-specific tests")
 
 test_that("sparkJars tag in SparkContext", {
-  skip_on_cran()
-
-  if (.Platform$OS.type != "windows") {
+  if (!is_windows()) {
     skip("This test is only for Windows, skipped")
   }
 
 
@@ -24,8 +24,6 @@ sc <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "getJavaSparkContext",
 mockFile <- c("Spark is pretty.", "Spark is awesome.")
 
 test_that("saveAsObjectFile()/objectFile() following textFile() works", {
-  skip_on_cran()
-
   fileName1 <- tempfile(pattern = "spark-test", fileext = ".tmp")
   fileName2 <- tempfile(pattern = "spark-test", fileext = ".tmp")
   writeLines(mockFile, fileName1)
@@ -40,8 +38,6 @@ test_that("saveAsObjectFile()/objectFile() following textFile() works", {
 })
 
 test_that("saveAsObjectFile()/objectFile() works on a parallelized list", {
-  skip_on_cran()
-
   fileName <- tempfile(pattern = "spark-test", fileext = ".tmp")
 
   l <- list(1, 2, 3)
@@ -54,8 +50,6 @@ test_that("saveAsObjectFile()/objectFile() works on a parallelized list", {
 })
 
 test_that("saveAsObjectFile()/objectFile() following RDD transformations works", {
-  skip_on_cran()
-
   fileName1 <- tempfile(pattern = "spark-test", fileext = ".tmp")
   fileName2 <- tempfile(pattern = "spark-test", fileext = ".tmp")
   writeLines(mockFile, fileName1)
@@ -80,8 +74,6 @@ test_that("saveAsObjectFile()/objectFile() following RDD transformations works",
 })
 
 test_that("saveAsObjectFile()/objectFile() works with multiple paths", {
-  skip_on_cran()
-
   fileName1 <- tempfile(pattern = "spark-test", fileext = ".tmp")
   fileName2 <- tempfile(pattern = "spark-test", fileext = ".tmp")
Original file line number	Diff line number	Diff line change
`@@ -908,10 +908,6 @@ isAtomicLengthOne <- function(x) {`
`908`	`908`	`is.atomic(x) && length(x) == 1`
`909`	`909`	`}`
`910`	`910`
`911`		`-is_cran <- function() {`
`912`		`- !identical(Sys.getenv("NOT_CRAN"), "true")`
`913`		`-}`
`914`		`-`
`915`	`911`	`is_windows <- function() {`
`916`	`912`	`.Platform$OS.type == "windows"`
`917`	`913`	`}`
`@@ -920,6 +916,6 @@ hadoop_home_set <- function() {`
`920`	`916`	`!identical(Sys.getenv("HADOOP_HOME"), "")`
`921`	`917`	`}`
`922`	`918`
`923`		`-not_cran_or_windows_with_hadoop <- function() {`
`924`		`- !is_cran() && (!is_windows() \|\| hadoop_home_set())`
	`919`	`+windows_with_hadoop <- function() {`
	`920`	`+ !is_windows() \|\| hadoop_home_set()`
`925`	`921`	`}`
Original file line number	Diff line number	Diff line change
`@@ -17,9 +17,7 @@`
`17`	`17`	`context("Windows-specific tests")`
`18`	`18`
`19`	`19`	`test_that("sparkJars tag in SparkContext", {`
`20`		`- skip_on_cran()`
`21`		`-`
`22`		`- if (.Platform$OS.type != "windows") {`
	`20`	`+ if (!is_windows()) {`
`23`	`21`	`skip("This test is only for Windows, skipped")`
`24`	`22`	`}`
`25`	`23`