Skip to content

Commit 6b45d7e

Browse files
viiryaHyukjinKwon
authored andcommitted
[SPARK-21954][SQL] JacksonUtils should verify MapType's value type instead of key type
## What changes were proposed in this pull request? `JacksonUtils.verifySchema` verifies if a data type can be converted to JSON. For `MapType`, it now verifies the key type. However, in `JacksonGenerator`, when converting a map to JSON, we only care about its values and create a writer for the values. The keys in a map are treated as strings by calling `toString` on the keys. Thus, we should change `JacksonUtils.verifySchema` to verify the value type of `MapType`. ## How was this patch tested? Added tests. Author: Liang-Chi Hsieh <[email protected]> Closes #19167 from viirya/test-jacksonutils.
1 parent 8a5eb50 commit 6b45d7e

File tree

3 files changed

+47
-4
lines changed

3 files changed

+47
-4
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonUtils.scala

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,9 @@ object JacksonUtils {
4444

4545
case at: ArrayType => verifyType(name, at.elementType)
4646

47-
case mt: MapType => verifyType(name, mt.keyType)
47+
// For MapType, its keys are treated as a string (i.e. calling `toString`) basically when
48+
// generating JSON, so we only care if the values are valid for JSON.
49+
case mt: MapType => verifyType(name, mt.valueType)
4850

4951
case udt: UserDefinedType[_] => verifyType(name, udt.sqlType)
5052

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import java.util.Calendar
2121

2222
import org.apache.spark.SparkFunSuite
2323
import org.apache.spark.sql.catalyst.InternalRow
24+
import org.apache.spark.sql.catalyst.errors.TreeNodeException
2425
import org.apache.spark.sql.catalyst.util.{DateTimeTestUtils, DateTimeUtils, GenericArrayData, PermissiveMode}
2526
import org.apache.spark.sql.types._
2627
import org.apache.spark.unsafe.types.UTF8String
@@ -610,4 +611,26 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
610611
"""{"t":"2015-12-31T16:00:00"}"""
611612
)
612613
}
614+
615+
test("to_json: verify MapType's value type instead of key type") {
616+
// Keys in map are treated as strings when converting to JSON. The type doesn't matter at all.
617+
val mapType1 = MapType(CalendarIntervalType, IntegerType)
618+
val schema1 = StructType(StructField("a", mapType1) :: Nil)
619+
val struct1 = Literal.create(null, schema1)
620+
checkEvaluation(
621+
StructsToJson(Map.empty, struct1, gmtId),
622+
null
623+
)
624+
625+
// The value type must be valid for converting to JSON.
626+
val mapType2 = MapType(IntegerType, CalendarIntervalType)
627+
val schema2 = StructType(StructField("a", mapType2) :: Nil)
628+
val struct2 = Literal.create(null, schema2)
629+
intercept[TreeNodeException[_]] {
630+
checkEvaluation(
631+
StructsToJson(Map.empty, struct2, gmtId),
632+
null
633+
)
634+
}
635+
}
613636
}

sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
package org.apache.spark.sql
1919

20-
import org.apache.spark.sql.functions.{from_json, struct, to_json}
20+
import org.apache.spark.sql.functions.{from_json, lit, map, struct, to_json}
2121
import org.apache.spark.sql.test.SharedSQLContext
2222
import org.apache.spark.sql.types._
2323

@@ -195,15 +195,33 @@ class JsonFunctionsSuite extends QueryTest with SharedSQLContext {
195195
Row("""{"_1":"26/08/2015 18:00"}""") :: Nil)
196196
}
197197

198-
test("to_json unsupported type") {
198+
test("to_json - key types of map don't matter") {
199+
// interval type is invalid for converting to JSON. However, the keys of a map are treated
200+
// as strings, so its type doesn't matter.
199201
val df = Seq(Tuple1(Tuple1("interval -3 month 7 hours"))).toDF("a")
200-
.select(struct($"a._1".cast(CalendarIntervalType).as("a")).as("c"))
202+
.select(struct(map($"a._1".cast(CalendarIntervalType), lit("a")).as("col1")).as("c"))
203+
checkAnswer(
204+
df.select(to_json($"c")),
205+
Row("""{"col1":{"interval -3 months 7 hours":"a"}}""") :: Nil)
206+
}
207+
208+
test("to_json unsupported type") {
209+
val baseDf = Seq(Tuple1(Tuple1("interval -3 month 7 hours"))).toDF("a")
210+
val df = baseDf.select(struct($"a._1".cast(CalendarIntervalType).as("a")).as("c"))
201211
val e = intercept[AnalysisException]{
202212
// Unsupported type throws an exception
203213
df.select(to_json($"c")).collect()
204214
}
205215
assert(e.getMessage.contains(
206216
"Unable to convert column a of type calendarinterval to JSON."))
217+
218+
// interval type is invalid for converting to JSON. We can't use it as value type of a map.
219+
val df2 = baseDf
220+
.select(struct(map(lit("a"), $"a._1".cast(CalendarIntervalType)).as("col1")).as("c"))
221+
val e2 = intercept[AnalysisException] {
222+
df2.select(to_json($"c")).collect()
223+
}
224+
assert(e2.getMessage.contains("Unable to convert column col1 of type calendarinterval to JSON"))
207225
}
208226

209227
test("roundtrip in to_json and from_json - struct") {

0 commit comments

Comments
 (0)