@@ -31,7 +31,9 @@ import org.apache.spark.sql.hive.test.TestHive._
3131case class ParquetData (intField : Int , stringField : String )
3232// The data that also includes the partitioning key
3333case class ParquetDataWithKey (p : Int , intField : Int , stringField : String )
34- case class ParquetDataWithKeyAndComplexTypes (p : Int , intField : Int , stringField : String )
34+ case class StructContainer (intStructField : Int , stringStructField : String )
35+ case class ParquetDataWithComplexTypes (intField : Int , stringField : String ,structField : StructContainer , arrayField : Seq [Int ])
36+ case class ParquetDataWithKeyAndComplexTypes (p : Int ,intField : Int , stringField : String , structField : StructContainer , arrayField : Seq [Int ])
3537
3638/**
3739 * A suite to test the automatic conversion of metastore tables with parquet data to use the
@@ -69,20 +71,36 @@ class ParquetMetastoreSuite extends ParquetTest {
6971 location ' ${partitionedTableDirWithKey.getCanonicalPath}'
7072 """ )
7173
74+ sql(s """
75+ create external table partitioned_parquet_with_complextypes
76+ (
77+ intField INT,
78+ stringField STRING,
79+ structField STRUCT<intStructField :INT, stringStructField :STRING>,
80+ arrayField ARRAY<INT>
81+ )
82+ PARTITIONED BY (p int)
83+ ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
84+ STORED AS
85+ INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
86+ OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
87+ location ' ${partitionedTableDirWithComplexTypes.getCanonicalPath}'
88+ """ )
89+
7290 sql(s """
7391 create external table partitioned_parquet_with_key_and_complextypes
7492 (
7593 intField INT,
76- structField STRUCT<intStructField INT, stringStructField STRING> ,
77- arrayField ARRAY< INT>,
78- stringField STRING
94+ stringField STRING,
95+ structField STRUCT<intStructField : INT, stringStructField :STRING >,
96+ arrayField ARRAY<INT>
7997 )
8098 PARTITIONED BY (p int)
8199 ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
82100 STORED AS
83101 INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
84102 OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
85- location ' ${partitionedTableDirWithKey .getCanonicalPath}'
103+ location ' ${partitionedTableDirWithKeyAndComplexTypes .getCanonicalPath}'
86104 """ )
87105
88106 sql(s """
@@ -106,6 +124,14 @@ class ParquetMetastoreSuite extends ParquetTest {
106124 sql(s " ALTER TABLE partitioned_parquet_with_key ADD PARTITION (p= $p) " )
107125 }
108126
127+ (1 to 10 ).foreach { p =>
128+ sql(s " ALTER TABLE partitioned_parquet_with_key_and_complextypes ADD PARTITION (p= $p) " )
129+ }
130+
131+ (1 to 10 ).foreach { p =>
132+ sql(s " ALTER TABLE partitioned_parquet_with_complextypes ADD PARTITION (p= $p) " )
133+ }
134+
109135 setConf(" spark.sql.hive.convertMetastoreParquet" , " true" )
110136 }
111137
@@ -155,6 +181,22 @@ class ParquetSourceSuite extends ParquetTest {
155181 path ' ${new File (partitionedTableDir, " p=1" ).getCanonicalPath}'
156182 )
157183 """ )
184+
185+ sql( s """
186+ create temporary table partitioned_parquet_with_key_and_complextypes
187+ USING org.apache.spark.sql.parquet
188+ OPTIONS (
189+ path ' ${new File (partitionedTableDirWithKeyAndComplexTypes, " p=1" ).getCanonicalPath}'
190+ )
191+ """ )
192+
193+ sql( s """
194+ create temporary table partitioned_parquet_with_complextypes
195+ USING org.apache.spark.sql.parquet
196+ OPTIONS (
197+ path ' ${new File (partitionedTableDirWithComplexTypes, " p=1" ).getCanonicalPath}'
198+ )
199+ """ )
158200 }
159201}
160202
@@ -164,6 +206,8 @@ class ParquetSourceSuite extends ParquetTest {
164206abstract class ParquetTest extends QueryTest with BeforeAndAfterAll {
165207 var partitionedTableDir : File = null
166208 var partitionedTableDirWithKey : File = null
209+ var partitionedTableDirWithKeyAndComplexTypes : File = null
210+ var partitionedTableDirWithComplexTypes : File = null
167211
168212 override def beforeAll (): Unit = {
169213 partitionedTableDir = File .createTempFile(" parquettests" , " sparksql" )
@@ -187,9 +231,32 @@ abstract class ParquetTest extends QueryTest with BeforeAndAfterAll {
187231 .map(i => ParquetDataWithKey (p, i, s " part- $p" ))
188232 .saveAsParquetFile(partDir.getCanonicalPath)
189233 }
234+
235+ partitionedTableDirWithKeyAndComplexTypes = File .createTempFile(" parquettests" , " sparksql" )
236+ partitionedTableDirWithKeyAndComplexTypes.delete()
237+ partitionedTableDirWithKeyAndComplexTypes.mkdir()
238+
239+ (1 to 10 ).foreach { p =>
240+ val partDir = new File (partitionedTableDirWithKeyAndComplexTypes, s " p= $p" )
241+ sparkContext.makeRDD(1 to 10 )
242+ .map(i => ParquetDataWithKeyAndComplexTypes (p, i,s " part- $p" , StructContainer (i,f " ${i}_string " ), (1 to i)))
243+ .saveAsParquetFile(partDir.getCanonicalPath)
244+ }
245+
246+ partitionedTableDirWithComplexTypes = File .createTempFile(" parquettests" , " sparksql" )
247+ partitionedTableDirWithComplexTypes.delete()
248+ partitionedTableDirWithComplexTypes.mkdir()
249+
250+ (1 to 10 ).foreach { p =>
251+ val partDir = new File (partitionedTableDirWithComplexTypes, s " p= $p" )
252+ sparkContext.makeRDD(1 to 10 )
253+ .map(i => ParquetDataWithComplexTypes (i,s " part- $p" , StructContainer (i,f " ${i}_string " ), (1 to i)))
254+ .saveAsParquetFile(partDir.getCanonicalPath)
255+ }
256+
190257 }
191258
192- Seq (" partitioned_parquet" , " partitioned_parquet_with_key" ).foreach { table =>
259+ Seq (" partitioned_parquet" , " partitioned_parquet_with_key" , " partitioned_parquet_with_key_and_complextypes " , " partitioned_parquet_with_complextypes " ).foreach { table =>
193260 test(s " ordering of the partitioning columns $table" ) {
194261 checkAnswer(
195262 sql(s " SELECT p, stringField FROM $table WHERE p = 1 " ),
@@ -202,6 +269,8 @@ abstract class ParquetTest extends QueryTest with BeforeAndAfterAll {
202269 )
203270 }
204271
272+
273+
205274 test(s " project the partitioning column $table" ) {
206275 checkAnswer(
207276 sql(s " SELECT p, count(*) FROM $table group by p " ),
@@ -279,6 +348,22 @@ abstract class ParquetTest extends QueryTest with BeforeAndAfterAll {
279348 }
280349 }
281350
351+ Seq (" partitioned_parquet_with_key_and_complextypes" , " partitioned_parquet_with_complextypes" ).foreach { table =>
352+ test(s " SPARK-5775 read struct from $table" ) {
353+ checkAnswer(
354+ sql(s " SELECT p, structField.intStructField , structField.stringStructField FROM $table WHERE p = 1 " ),
355+ (1 to 10 ).map { i => ((1 , i, f " ${i}_string " ))}
356+ )
357+ }
358+
359+ test (s " SPARK-5775 read array from $table" ) {
360+ checkAnswer(
361+ sql(s " SELECT arrayField, p FROM $table WHERE p = 1 " ),
362+ (1 to 10 ).map { i => ((1 to i,1 ))}
363+ )
364+ }
365+ }
366+
282367 test(" non-part select(*)" ) {
283368 checkAnswer(
284369 sql(" SELECT COUNT(*) FROM normal_parquet" ),
0 commit comments