Skip to content

Commit 2f70077

Browse files
linhongliu-dbcloud-fan
authored andcommitted
[SPARK-36224][SQL] Use Void as the type name of NullType
### What changes were proposed in this pull request? Change the `NullType.simpleString` to "void" to set "void" as the formal type name of `NullType` ### Why are the changes needed? This PR is intended to address the type name discussion in PR #28833. Here are the reasons: 1. The type name of NullType is displayed everywhere, e.g. schema string, error message, document. Hence it's not possible to hide it from users, we have to choose a proper name 2. The "void" is widely used as the type name of "NULL", e.g. Hive, pgSQL 3. Changing to "void" can enable the round trip of `toDDL`/`fromDDL` for NullType. (i.e. make `from_json(col, schema.toDDL)`) work ### Does this PR introduce _any_ user-facing change? Yes, the type name of "NULL" is changed from "null" to "void". for example: ``` scala> sql("select null as a, 1 as b").schema.catalogString res5: String = struct<a:void,b:int> ``` ### How was this patch tested? existing test cases Closes #33437 from linhongliu-db/SPARK-36224-void-type-name. Authored-by: Linhong Liu <[email protected]> Signed-off-by: Wenchen Fan <[email protected]>
1 parent a98d919 commit 2f70077

File tree

21 files changed

+43
-52
lines changed

21 files changed

+43
-52
lines changed

python/pyspark/sql/tests/test_types.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -511,8 +511,7 @@ def test_struct_type(self):
511511
def test_parse_datatype_string(self):
512512
from pyspark.sql.types import _all_atomic_types, _parse_datatype_string
513513
for k, t in _all_atomic_types.items():
514-
if t != NullType:
515-
self.assertEqual(t(), _parse_datatype_string(k))
514+
self.assertEqual(t(), _parse_datatype_string(k))
516515
self.assertEqual(IntegerType(), _parse_datatype_string("int"))
517516
self.assertEqual(DecimalType(1, 1), _parse_datatype_string("decimal(1 ,1)"))
518517
self.assertEqual(DecimalType(10, 1), _parse_datatype_string("decimal( 10,1 )"))

python/pyspark/sql/types.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,9 @@ class NullType(DataType, metaclass=DataTypeSingleton):
107107
108108
The data type representing None, used for the types that cannot be inferred.
109109
"""
110-
pass
110+
@classmethod
111+
def typeName(cls):
112+
return 'void'
111113

112114

113115
class AtomicType(DataType):

sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,8 @@ object DataType {
195195
case FIXED_DECIMAL(precision, scale) => DecimalType(precision.toInt, scale.toInt)
196196
case CHAR_TYPE(length) => CharType(length.toInt)
197197
case VARCHAR_TYPE(length) => VarcharType(length.toInt)
198+
// For backwards compatibility, previously the type name of NullType is "null"
199+
case "null" => NullType
198200
case other => otherTypes.getOrElse(
199201
other,
200202
throw new IllegalArgumentException(

sql/catalyst/src/main/scala/org/apache/spark/sql/types/NullType.scala

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ class NullType private() extends DataType {
3232
override def defaultSize: Int = 1
3333

3434
private[spark] override def asNullable: NullType = this
35+
36+
override def typeName: String = "void"
3537
}
3638

3739
/**

sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,10 @@ class DataTypeSuite extends SparkFunSuite {
183183
assert(!arrayType.existsRecursively(_.isInstanceOf[IntegerType]))
184184
}
185185

186+
test("SPARK-36224: Backwards compatibility test for NullType.json") {
187+
assert(DataType.fromJson("\"null\"") == NullType)
188+
}
189+
186190
def checkDataTypeFromJson(dataType: DataType): Unit = {
187191
test(s"from Json - $dataType") {
188192
assert(DataType.fromJson(dataType.json) === dataType)
@@ -198,6 +202,7 @@ class DataTypeSuite extends SparkFunSuite {
198202
}
199203

200204
checkDataTypeFromJson(NullType)
205+
checkDataTypeFromDDL(NullType)
201206

202207
checkDataTypeFromJson(BooleanType)
203208
checkDataTypeFromDDL(BooleanType)
@@ -424,6 +429,7 @@ class DataTypeSuite extends SparkFunSuite {
424429
i => StructField(s"col$i", IntegerType, nullable = true)
425430
})
426431

432+
checkCatalogString(NullType)
427433
checkCatalogString(BooleanType)
428434
checkCatalogString(ByteType)
429435
checkCatalogString(ShortType)

sql/core/src/test/resources/sql-functions/sql-expression-schema.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
| org.apache.spark.sql.catalyst.expressions.Ascii | ascii | SELECT ascii('222') | struct<ascii(222):int> |
3535
| org.apache.spark.sql.catalyst.expressions.Asin | asin | SELECT asin(0) | struct<ASIN(0):double> |
3636
| org.apache.spark.sql.catalyst.expressions.Asinh | asinh | SELECT asinh(0) | struct<ASINH(0):double> |
37-
| org.apache.spark.sql.catalyst.expressions.AssertTrue | assert_true | SELECT assert_true(0 < 1) | struct<assert_true((0 < 1), '(0 < 1)' is not true!):null> |
37+
| org.apache.spark.sql.catalyst.expressions.AssertTrue | assert_true | SELECT assert_true(0 < 1) | struct<assert_true((0 < 1), '(0 < 1)' is not true!):void> |
3838
| org.apache.spark.sql.catalyst.expressions.Atan | atan | SELECT atan(0) | struct<ATAN(0):double> |
3939
| org.apache.spark.sql.catalyst.expressions.Atan2 | atan2 | SELECT atan2(0, 0) | struct<ATAN2(0, 0):double> |
4040
| org.apache.spark.sql.catalyst.expressions.Atanh | atanh | SELECT atanh(0) | struct<ATANH(0):double> |
@@ -223,7 +223,7 @@
223223
| org.apache.spark.sql.catalyst.expressions.RLike | regexp | SELECT regexp('%SystemDrive%\Users\John', '%SystemDrive%\\Users.*') | struct<REGEXP(%SystemDrive%UsersJohn, %SystemDrive%\Users.*):boolean> |
224224
| org.apache.spark.sql.catalyst.expressions.RLike | regexp_like | SELECT regexp_like('%SystemDrive%\Users\John', '%SystemDrive%\\Users.*') | struct<REGEXP_LIKE(%SystemDrive%UsersJohn, %SystemDrive%\Users.*):boolean> |
225225
| org.apache.spark.sql.catalyst.expressions.RLike | rlike | SELECT rlike('%SystemDrive%\Users\John', '%SystemDrive%\\Users.*') | struct<RLIKE(%SystemDrive%UsersJohn, %SystemDrive%\Users.*):boolean> |
226-
| org.apache.spark.sql.catalyst.expressions.RaiseError | raise_error | SELECT raise_error('custom error message') | struct<raise_error(custom error message):null> |
226+
| org.apache.spark.sql.catalyst.expressions.RaiseError | raise_error | SELECT raise_error('custom error message') | struct<raise_error(custom error message):void> |
227227
| org.apache.spark.sql.catalyst.expressions.Rand | rand | SELECT rand() | struct<rand():double> |
228228
| org.apache.spark.sql.catalyst.expressions.Rand | random | SELECT random() | struct<rand():double> |
229229
| org.apache.spark.sql.catalyst.expressions.Randn | randn | SELECT randn() | struct<randn():double> |
@@ -366,4 +366,4 @@
366366
| org.apache.spark.sql.catalyst.expressions.xml.XPathList | xpath | SELECT xpath('<a><b>b1</b><b>b2</b><b>b3</b><c>c1</c><c>c2</c></a>','a/b/text()') | struct<xpath(<a><b>b1</b><b>b2</b><b>b3</b><c>c1</c><c>c2</c></a>, a/b/text()):array<string>> |
367367
| org.apache.spark.sql.catalyst.expressions.xml.XPathLong | xpath_long | SELECT xpath_long('<a><b>1</b><b>2</b></a>', 'sum(a/b)') | struct<xpath_long(<a><b>1</b><b>2</b></a>, sum(a/b)):bigint> |
368368
| org.apache.spark.sql.catalyst.expressions.xml.XPathShort | xpath_short | SELECT xpath_short('<a><b>1</b><b>2</b></a>', 'sum(a/b)') | struct<xpath_short(<a><b>1</b><b>2</b></a>, sum(a/b)):smallint> |
369-
| org.apache.spark.sql.catalyst.expressions.xml.XPathString | xpath_string | SELECT xpath_string('<a><b>b</b><c>cc</c></a>','a/c') | struct<xpath_string(<a><b>b</b><c>cc</c></a>, a/c):string> |
369+
| org.apache.spark.sql.catalyst.expressions.xml.XPathString | xpath_string | SELECT xpath_string('<a><b>b</b><c>cc</c></a>','a/c') | struct<xpath_string(<a><b>b</b><c>cc</c></a>, a/c):string> |

sql/core/src/test/resources/sql-tests/results/ansi/literals.sql.out

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
-- !query
66
select null, Null, nUll
77
-- !query schema
8-
struct<NULL:null,NULL:null,NULL:null>
8+
struct<NULL:void,NULL:void,NULL:void>
99
-- !query output
1010
NULL NULL NULL
1111

sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ select left(null, -2)
7474
struct<>
7575
-- !query output
7676
org.apache.spark.sql.AnalysisException
77-
cannot resolve 'substring(NULL, 1, -2)' due to data type mismatch: argument 1 requires (string or binary) type, however, 'NULL' is of null type.; line 1 pos 7
77+
cannot resolve 'substring(NULL, 1, -2)' due to data type mismatch: argument 1 requires (string or binary) type, however, 'NULL' is of void type.; line 1 pos 7
7878

7979

8080
-- !query
@@ -101,7 +101,7 @@ select right(null, -2)
101101
struct<>
102102
-- !query output
103103
org.apache.spark.sql.AnalysisException
104-
cannot resolve 'substring(NULL, (- -2), 2147483647)' due to data type mismatch: argument 1 requires (string or binary) type, however, 'NULL' is of null type.; line 1 pos 7
104+
cannot resolve 'substring(NULL, (- -2), 2147483647)' due to data type mismatch: argument 1 requires (string or binary) type, however, 'NULL' is of void type.; line 1 pos 7
105105

106106

107107
-- !query

sql/core/src/test/resources/sql-tests/results/inline-table.sql.out

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ two 2
4949
-- !query
5050
select * from values ("one", null), ("two", null) as data(a, b)
5151
-- !query schema
52-
struct<a:string,b:null>
52+
struct<a:string,b:void>
5353
-- !query output
5454
one NULL
5555
two NULL

sql/core/src/test/resources/sql-tests/results/literals.sql.out

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
-- !query
66
select null, Null, nUll
77
-- !query schema
8-
struct<NULL:null,NULL:null,NULL:null>
8+
struct<NULL:void,NULL:void,NULL:void>
99
-- !query output
1010
NULL NULL NULL
1111

0 commit comments

Comments
 (0)