Skip to content

Commit ae48f7c

Browse files
author
Anselme Vignon
committed
unittesting SPARK-5775
1 parent f876dea commit ae48f7c

File tree

1 file changed

+91
-6
lines changed

1 file changed

+91
-6
lines changed

sql/hive/src/test/scala/org/apache/spark/sql/parquet/parquetSuites.scala

Lines changed: 91 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,9 @@ import org.apache.spark.sql.hive.test.TestHive._
3131
case class ParquetData(intField: Int, stringField: String)
3232
// The data that also includes the partitioning key
3333
case class ParquetDataWithKey(p: Int, intField: Int, stringField: String)
34-
case class ParquetDataWithKeyAndComplexTypes(p: Int, intField: Int, stringField: String)
34+
case class StructContainer(intStructField :Int, stringStructField: String )
35+
case class ParquetDataWithComplexTypes(intField :Int, stringField: String ,structField: StructContainer, arrayField: Seq[Int])
36+
case class ParquetDataWithKeyAndComplexTypes(p: Int,intField :Int, stringField: String , structField: StructContainer, arrayField: Seq[Int])
3537

3638
/**
3739
* A suite to test the automatic conversion of metastore tables with parquet data to use the
@@ -69,20 +71,36 @@ class ParquetMetastoreSuite extends ParquetTest {
6971
location '${partitionedTableDirWithKey.getCanonicalPath}'
7072
""")
7173

74+
sql(s"""
75+
create external table partitioned_parquet_with_complextypes
76+
(
77+
intField INT,
78+
stringField STRING,
79+
structField STRUCT<intStructField :INT, stringStructField :STRING>,
80+
arrayField ARRAY<INT>
81+
)
82+
PARTITIONED BY (p int)
83+
ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
84+
STORED AS
85+
INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
86+
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
87+
location '${partitionedTableDirWithComplexTypes.getCanonicalPath}'
88+
""")
89+
7290
sql(s"""
7391
create external table partitioned_parquet_with_key_and_complextypes
7492
(
7593
intField INT,
76-
structField STRUCT<intStructField INT, stringStructField STRING>,
77-
arrayField ARRAY<INT>,
78-
stringField STRING
94+
stringField STRING,
95+
structField STRUCT<intStructField :INT, stringStructField :STRING>,
96+
arrayField ARRAY<INT>
7997
)
8098
PARTITIONED BY (p int)
8199
ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
82100
STORED AS
83101
INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
84102
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
85-
location '${partitionedTableDirWithKey.getCanonicalPath}'
103+
location '${partitionedTableDirWithKeyAndComplexTypes.getCanonicalPath}'
86104
""")
87105

88106
sql(s"""
@@ -106,6 +124,14 @@ class ParquetMetastoreSuite extends ParquetTest {
106124
sql(s"ALTER TABLE partitioned_parquet_with_key ADD PARTITION (p=$p)")
107125
}
108126

127+
(1 to 10).foreach { p =>
128+
sql(s"ALTER TABLE partitioned_parquet_with_key_and_complextypes ADD PARTITION (p=$p)")
129+
}
130+
131+
(1 to 10).foreach { p =>
132+
sql(s"ALTER TABLE partitioned_parquet_with_complextypes ADD PARTITION (p=$p)")
133+
}
134+
109135
setConf("spark.sql.hive.convertMetastoreParquet", "true")
110136
}
111137

@@ -155,6 +181,22 @@ class ParquetSourceSuite extends ParquetTest {
155181
path '${new File(partitionedTableDir, "p=1").getCanonicalPath}'
156182
)
157183
""")
184+
185+
sql( s"""
186+
create temporary table partitioned_parquet_with_key_and_complextypes
187+
USING org.apache.spark.sql.parquet
188+
OPTIONS (
189+
path '${new File(partitionedTableDirWithKeyAndComplexTypes, "p=1").getCanonicalPath}'
190+
)
191+
""")
192+
193+
sql( s"""
194+
create temporary table partitioned_parquet_with_complextypes
195+
USING org.apache.spark.sql.parquet
196+
OPTIONS (
197+
path '${new File(partitionedTableDirWithComplexTypes, "p=1").getCanonicalPath}'
198+
)
199+
""")
158200
}
159201
}
160202

@@ -164,6 +206,8 @@ class ParquetSourceSuite extends ParquetTest {
164206
abstract class ParquetTest extends QueryTest with BeforeAndAfterAll {
165207
var partitionedTableDir: File = null
166208
var partitionedTableDirWithKey: File = null
209+
var partitionedTableDirWithKeyAndComplexTypes: File = null
210+
var partitionedTableDirWithComplexTypes: File = null
167211

168212
override def beforeAll(): Unit = {
169213
partitionedTableDir = File.createTempFile("parquettests", "sparksql")
@@ -187,9 +231,32 @@ abstract class ParquetTest extends QueryTest with BeforeAndAfterAll {
187231
.map(i => ParquetDataWithKey(p, i, s"part-$p"))
188232
.saveAsParquetFile(partDir.getCanonicalPath)
189233
}
234+
235+
partitionedTableDirWithKeyAndComplexTypes = File.createTempFile("parquettests", "sparksql")
236+
partitionedTableDirWithKeyAndComplexTypes.delete()
237+
partitionedTableDirWithKeyAndComplexTypes.mkdir()
238+
239+
(1 to 10).foreach { p =>
240+
val partDir = new File(partitionedTableDirWithKeyAndComplexTypes, s"p=$p")
241+
sparkContext.makeRDD(1 to 10)
242+
.map(i => ParquetDataWithKeyAndComplexTypes(p, i,s"part-$p", StructContainer(i,f"${i}_string"), (1 to i)))
243+
.saveAsParquetFile(partDir.getCanonicalPath)
244+
}
245+
246+
partitionedTableDirWithComplexTypes = File.createTempFile("parquettests", "sparksql")
247+
partitionedTableDirWithComplexTypes.delete()
248+
partitionedTableDirWithComplexTypes.mkdir()
249+
250+
(1 to 10).foreach { p =>
251+
val partDir = new File(partitionedTableDirWithComplexTypes, s"p=$p")
252+
sparkContext.makeRDD(1 to 10)
253+
.map(i => ParquetDataWithComplexTypes(i,s"part-$p", StructContainer(i,f"${i}_string"), (1 to i)))
254+
.saveAsParquetFile(partDir.getCanonicalPath)
255+
}
256+
190257
}
191258

192-
Seq("partitioned_parquet", "partitioned_parquet_with_key").foreach { table =>
259+
Seq("partitioned_parquet", "partitioned_parquet_with_key", "partitioned_parquet_with_key_and_complextypes","partitioned_parquet_with_complextypes").foreach { table =>
193260
test(s"ordering of the partitioning columns $table") {
194261
checkAnswer(
195262
sql(s"SELECT p, stringField FROM $table WHERE p = 1"),
@@ -202,6 +269,8 @@ abstract class ParquetTest extends QueryTest with BeforeAndAfterAll {
202269
)
203270
}
204271

272+
273+
205274
test(s"project the partitioning column $table") {
206275
checkAnswer(
207276
sql(s"SELECT p, count(*) FROM $table group by p"),
@@ -279,6 +348,22 @@ abstract class ParquetTest extends QueryTest with BeforeAndAfterAll {
279348
}
280349
}
281350

351+
Seq("partitioned_parquet_with_key_and_complextypes", "partitioned_parquet_with_complextypes").foreach { table =>
352+
test(s"SPARK-5775 read struct from $table") {
353+
checkAnswer(
354+
sql(s"SELECT p, structField.intStructField , structField.stringStructField FROM $table WHERE p = 1"),
355+
(1 to 10).map { i => ((1, i, f"${i}_string"))}
356+
)
357+
}
358+
359+
test (s"SPARK-5775 read array from $table") {
360+
checkAnswer(
361+
sql(s"SELECT arrayField, p FROM $table WHERE p = 1"),
362+
(1 to 10).map { i => ((1 to i,1))}
363+
)
364+
}
365+
}
366+
282367
test("non-part select(*)") {
283368
checkAnswer(
284369
sql("SELECT COUNT(*) FROM normal_parquet"),

0 commit comments

Comments
 (0)