Skip to content

Commit 10feb9d

Browse files
committed
Support array in from_json in R
1 parent 207067e commit 10feb9d

File tree

2 files changed

+60
-26
lines changed

2 files changed

+60
-26
lines changed

R/pkg/R/functions.R

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2430,33 +2430,43 @@ setMethod("date_format", signature(y = "Column", x = "character"),
24302430
column(jc)
24312431
})
24322432

2433+
setClassUnion("characterOrstructType", c("character", "structType"))
2434+
24332435
#' from_json
24342436
#'
24352437
#' Parses a column containing a JSON string into a Column of \code{structType} with the specified
24362438
#' \code{schema}. If the string is unparseable, the Column will contains the value NA.
24372439
#'
24382440
#' @param x Column containing the JSON string.
2439-
#' @param schema a structType object to use as the schema to use when parsing the JSON string.
2441+
#' @param schema a structType object or the data type string representing an array or struct type
2442+
#' used in structField to use as the schema to use when parsing the JSON string.
24402443
#' @param ... additional named properties to control how the json is parsed, accepts the same
24412444
#' options as the JSON data source.
24422445
#'
24432446
#' @family normal_funcs
24442447
#' @rdname from_json
24452448
#' @name from_json
2446-
#' @aliases from_json,Column,structType-method
2449+
#' @aliases from_json,Column,characterOrstructType-method
24472450
#' @export
24482451
#' @examples
24492452
#' \dontrun{
24502453
#' schema <- structType(structField("name", "string"),
24512454
#' select(df, from_json(df$value, schema, dateFormat = "dd/MM/yyyy"))
24522455
#'}
24532456
#' @note from_json since 2.2.0
2454-
setMethod("from_json", signature(x = "Column", schema = "structType"),
2457+
setMethod("from_json", signature(x = "Column", schema = "characterOrstructType"),
24552458
function(x, schema, ...) {
2459+
if (is.character(schema)) {
2460+
jschema <- callJStatic("org.apache.spark.sql.api.r.SQLUtils",
2461+
"getSQLDataType",
2462+
schema)
2463+
} else {
2464+
jschema <- schema$jobj
2465+
}
24562466
options <- varargsToStrEnv(...)
24572467
jc <- callJStatic("org.apache.spark.sql.functions",
24582468
"from_json",
2459-
x@jc, schema$jobj, options)
2469+
x@jc, jschema, options)
24602470
column(jc)
24612471
})
24622472

R/pkg/inst/tests/testthat/test_sparkSQL.R

Lines changed: 46 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1342,28 +1342,52 @@ test_that("column functions", {
13421342
df <- read.json(mapTypeJsonPath)
13431343
j <- collect(select(df, alias(to_json(df$info), "json")))
13441344
expect_equal(j[order(j$json), ][1], "{\"age\":16,\"height\":176.5}")
1345-
df <- as.DataFrame(j)
1346-
schema <- structType(structField("age", "integer"),
1347-
structField("height", "double"))
1348-
s <- collect(select(df, alias(from_json(df$json, schema), "structcol")))
1349-
expect_equal(ncol(s), 1)
1350-
expect_equal(nrow(s), 3)
1351-
expect_is(s[[1]][[1]], "struct")
1352-
expect_true(any(apply(s, 1, function(x) { x[[1]]$age == 16 } )))
1353-
1354-
# passing option
1355-
df <- as.DataFrame(list(list("col" = "{\"date\":\"21/10/2014\"}")))
1356-
schema2 <- structType(structField("date", "date"))
1357-
expect_error(tryCatch(collect(select(df, from_json(df$col, schema2))),
1358-
error = function(e) { stop(e) }),
1359-
paste0(".*(java.lang.NumberFormatException: For input string:).*"))
1360-
s <- collect(select(df, from_json(df$col, schema2, dateFormat = "dd/MM/yyyy")))
1361-
expect_is(s[[1]][[1]]$date, "Date")
1362-
expect_equal(as.character(s[[1]][[1]]$date), "2014-10-21")
1363-
1364-
# check for unparseable
1365-
df <- as.DataFrame(list(list("a" = "")))
1366-
expect_equal(collect(select(df, from_json(df$a, schema)))[[1]][[1]], NA)
1345+
1346+
schemas <- list(structType(structField("age", "integer"), structField("height", "double")),
1347+
"struct<age:integer,height:double>")
1348+
for (schema in schemas) {
1349+
df <- as.DataFrame(j)
1350+
s <- collect(select(df, alias(from_json(df$json, schema), "structcol")))
1351+
expect_equal(ncol(s), 1)
1352+
expect_equal(nrow(s), 3)
1353+
expect_is(s[[1]][[1]], "struct")
1354+
expect_true(any(apply(s, 1, function(x) { x[[1]]$age == 16 } )))
1355+
1356+
# passing option
1357+
df <- as.DataFrame(list(list("col" = "{\"date\":\"21/10/2014\"}")))
1358+
schema2 <- structType(structField("date", "date"))
1359+
expect_error(tryCatch(collect(select(df, from_json(df$col, schema2))),
1360+
error = function(e) { stop(e) }),
1361+
paste0(".*(java.lang.NumberFormatException: For input string:).*"))
1362+
s <- collect(select(df, from_json(df$col, schema2, dateFormat = "dd/MM/yyyy")))
1363+
expect_is(s[[1]][[1]]$date, "Date")
1364+
expect_equal(as.character(s[[1]][[1]]$date), "2014-10-21")
1365+
1366+
# check for unparseable
1367+
df <- as.DataFrame(list(list("a" = "")))
1368+
expect_equal(collect(select(df, from_json(df$a, schema)))[[1]][[1]], NA)
1369+
}
1370+
1371+
# check if array type in string is correctly supported.
1372+
jsonArr <- "[{\"name\":\"Bob\"}, {\"name\":\"Alice\"}]"
1373+
df <- as.DataFrame(list(list("people" = jsonArr)))
1374+
arr <- collect(select(df, alias(from_json(df$people, "array<struct<name:string>>"), "arrcol")))
1375+
expect_equal(ncol(arr), 1)
1376+
expect_equal(nrow(arr), 1)
1377+
expect_is(arr[[1]][[1]], "list")
1378+
expect_equal(length(arr$arrcol[[1]]), 2)
1379+
expect_equal(arr$arrcol[[1]][[1]]$name, "Bob")
1380+
expect_equal(arr$arrcol[[1]][[2]]$name, "Alice")
1381+
1382+
# check for unparseable data type
1383+
expect_error(tryCatch(collect(select(df, from_json(df$people, "unknown"))),
1384+
error = function(e) { stop(e) }),
1385+
paste0(".*(Invalid type unknown).*"))
1386+
1387+
# check for incorrect data type
1388+
expect_error(tryCatch(collect(select(df, from_json(df$people, "integer"))),
1389+
error = function(e) { stop(e) }),
1390+
paste0(".*(data type mismatch: Input schema int must be a struct or an array of structs).*"))
13671391
})
13681392

13691393
test_that("column binary mathfunctions", {

0 commit comments

Comments
 (0)