@@ -19,6 +19,7 @@ package org.apache.spark.sql.execution.datasources.json
1919
2020import org .apache .spark .sql .QueryTest
2121import org .apache .spark .sql .test .SharedSQLContext
22+ import org .apache .spark .sql .types .{DoubleType , StructField , StructType }
2223
2324/**
2425 * Test cases for various [[JSONOptions ]].
@@ -93,23 +94,51 @@ class JsonParsingOptionsSuite extends QueryTest with SharedSQLContext {
9394 assert(df.first().getLong(0 ) == 18 )
9495 }
9596
96- // The following two tests are not really working - need to look into Jackson's
97- // JsonParser.Feature.ALLOW_NON_NUMERIC_NUMBERS.
98- ignore(" allowNonNumericNumbers off" ) {
99- val str = """ {"age": NaN}"""
100- val rdd = spark.sparkContext.parallelize(Seq (str))
101- val df = spark.read.json(rdd)
102-
103- assert(df.schema.head.name == " _corrupt_record" )
97+ test(" allowNonNumericNumbers off" ) {
98+ // non-quoted non-numeric numbers don't work if allowNonNumericNumbers is off.
99+ var testCases : Seq [String ] = Seq (""" {"age": NaN}""" , """ {"age": Infinity}""" ,
100+ """ {"age": +Infinity}""" , """ {"age": -Infinity}""" , """ {"age": INF}""" ,
101+ """ {"age": +INF}""" , """ {"age": -INF}""" )
102+ testCases.foreach { str =>
103+ val rdd = spark.sparkContext.parallelize(Seq (str))
104+ val df = spark.read.option(" allowNonNumericNumbers" , " false" ).json(rdd)
105+
106+ assert(df.schema.head.name == " _corrupt_record" )
107+ }
108+
109+ // quoted non-numeric numbers should still work even allowNonNumericNumbers is off.
110+ testCases = Seq (""" {"age": "NaN"}""" , """ {"age": "Infinity"}""" , """ {"age": "+Infinity"}""" ,
111+ """ {"age": "-Infinity"}""" , """ {"age": "INF"}""" , """ {"age": "+INF"}""" ,
112+ """ {"age": "-INF"}""" )
113+ val tests : Seq [Double => Boolean ] = Seq (_.isNaN, _.isPosInfinity, _.isPosInfinity,
114+ _.isNegInfinity, _.isPosInfinity, _.isPosInfinity, _.isNegInfinity)
115+ val schema = StructType (StructField (" age" , DoubleType , true ) :: Nil )
116+
117+ testCases.zipWithIndex.foreach { case (str, idx) =>
118+ val rdd = spark.sparkContext.parallelize(Seq (str))
119+ val df = spark.read.option(" allowNonNumericNumbers" , " false" ).schema(schema).json(rdd)
120+
121+ assert(df.schema.head.name == " age" )
122+ assert(tests(idx)(df.first().getDouble(0 )))
123+ }
104124 }
105125
106- ignore(" allowNonNumericNumbers on" ) {
107- val str = """ {"age": NaN}"""
108- val rdd = spark.sparkContext.parallelize(Seq (str))
109- val df = spark.read.option(" allowNonNumericNumbers" , " true" ).json(rdd)
110-
111- assert(df.schema.head.name == " age" )
112- assert(df.first().getDouble(0 ).isNaN)
126+ test(" allowNonNumericNumbers on" ) {
127+ val testCases : Seq [String ] = Seq (""" {"age": NaN}""" , """ {"age": Infinity}""" ,
128+ """ {"age": +Infinity}""" , """ {"age": -Infinity}""" , """ {"age": +INF}""" ,
129+ """ {"age": -INF}""" , """ {"age": "NaN"}""" , """ {"age": "Infinity"}""" ,
130+ """ {"age": "-Infinity"}""" )
131+ val tests : Seq [Double => Boolean ] = Seq (_.isNaN, _.isPosInfinity, _.isPosInfinity,
132+ _.isNegInfinity, _.isPosInfinity, _.isNegInfinity, _.isNaN, _.isPosInfinity,
133+ _.isNegInfinity, _.isPosInfinity, _.isNegInfinity)
134+ val schema = StructType (StructField (" age" , DoubleType , true ) :: Nil )
135+ testCases.zipWithIndex.foreach { case (str, idx) =>
136+ val rdd = spark.sparkContext.parallelize(Seq (str))
137+ val df = spark.read.option(" allowNonNumericNumbers" , " true" ).schema(schema).json(rdd)
138+
139+ assert(df.schema.head.name == " age" )
140+ assert(tests(idx)(df.first().getDouble(0 )))
141+ }
113142 }
114143
115144 test(" allowBackslashEscapingAnyCharacter off" ) {
0 commit comments