Skip to content

Commit 6a0eb3f

Browse files
committed
SPARK-19342 bug fixed in collect method for collecting timestamp column, if column values are all NAs, the type is logical
1 parent 5829ddd commit 6a0eb3f

File tree

2 files changed

+22
-10
lines changed

2 files changed

+22
-10
lines changed

R/pkg/R/DataFrame.R

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1136,17 +1136,11 @@ setMethod("collect",
11361136

11371137
# Note that "binary" columns behave like complex types.
11381138
if (!is.null(PRIMITIVE_TYPES[[colType]]) && colType != "binary") {
1139-
valueIndex <- which(!is.na(col))
1140-
if (length(valueIndex) > 0 && valueIndex[1] > 1) {
1141-
colTail <- col[-(1 : (valueIndex[1] - 1))]
1142-
vec <- do.call(c, colTail)
1143-
classVal <- class(vec)
1144-
vec <- c(rep(NA, valueIndex[1] - 1), vec)
1145-
class(vec) <- classVal
1146-
} else {
1147-
vec <- do.call(c, col)
1148-
}
1139+
vec <- do.call(c, col)
11491140
stopifnot(class(vec) != "list")
1141+
# If vec is an vector with only NAs, the type is logical
1142+
if (length(vec[!is.na(vec)]) > 0)
1143+
class(vec) <- PRIMITIVE_TYPES[[colType]]
11501144
df[[colIndex]] <- vec
11511145
} else {
11521146
df[[colIndex]] <- col

R/pkg/inst/tests/testthat/test_sparkSQL.R

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2748,6 +2748,24 @@ test_that("Call DataFrameWriter.load() API in Java without path and check argume
27482748
"Unnamed arguments ignored: 2, 3, a.")
27492749
})
27502750

2751+
test_that("Collect on DataFrame when NAs exists at the top of a timestamp column") {
2752+
ldf <- data.frame(col1 = c(0, 1, 2),
2753+
col2 = c(as.POSIXct("2017-01-01 00:00:01"),
2754+
NA,
2755+
as.POSIXct("2017-01-01 12:00:01")))
2756+
sdf1 <- createDataFrame(ldf)
2757+
ldf1 <- collect(sdf1)
2758+
expect_equal(dtypes(sdf1), list(c("col1", "double"), c("col2", "timestamp")))
2759+
expect_equal(class(ldf1$col1), "numeric")
2760+
expect_equal(class(ldf1$col2), "POSIXct")
2761+
2762+
sdf2 <- filter(sdf1, "col1 > 0")
2763+
ldf2 <- collect(sdf2)
2764+
expect_equal(dtypes(sdf2), list(c("col1", "double"), c("col2", "timestamp")))
2765+
expect_equal(class(ldf2$col1), "numeric")
2766+
expect_equal(class(ldf2$col2), "POSIXct")
2767+
}
2768+
27512769
unlink(parquetPath)
27522770
unlink(orcPath)
27532771
unlink(jsonPath)

0 commit comments

Comments
 (0)