@@ -728,4 +728,70 @@ class InsertSuite extends QueryTest with TestHiveSingleton with BeforeAndAfter
728728 assert(e.contains(" mismatched input 'ROW'" ))
729729 }
730730 }
731+
732+ test(" [SPARK-21786] The 'spark.sql.parquet.compression.codec' " +
733+ " configuration doesn't take effect on tables with partition field(s)" ) {
734+ withTempDir { tmpDir =>
735+ withTempView(" table_source" ) {
736+ (0 until 10000 ).toDF(" a" ).createOrReplaceTempView(" table_source" )
737+
738+ val tableWithPartition = " table_with_partition"
739+ val tableNoPartition = " table_no_partition"
740+ withTable(tableWithPartition, tableNoPartition) {
741+ sql(
742+ s """
743+ |CREATE TABLE $tableNoPartition(a int)
744+ |STORED AS PARQUET
745+ |LOCATION ' ${tmpDir.toURI.toString.stripSuffix(" /" )}/ $tableNoPartition'
746+ """ .stripMargin)
747+ sql(
748+ s """
749+ |CREATE TABLE $tableWithPartition(a int)
750+ |PARTITIONED BY (p int)
751+ |STORED AS PARQUET
752+ |LOCATION ' ${tmpDir.toURI.toString.stripSuffix(" /" )}/ $tableWithPartition'
753+ """ .stripMargin)
754+
755+ def insertOverwriteTable (tableName : String , codec : String , isPartitioned : Boolean ): Unit = {
756+ withSQLConf(" spark.sql.parquet.compression.codec" -> codec) {
757+ sql(
758+ s """
759+ |INSERT OVERWRITE TABLE $tableName
760+ | ${if (isPartitioned) " partition (p=10000)" else " " }
761+ |SELECT * from table_source
762+ """ .stripMargin)
763+ }
764+ }
765+
766+ def getDirFiles (file : File ): List [File ] = {
767+ if (! file.exists()) Nil
768+ else if (file.isFile) List (file)
769+ else {
770+ file.listFiles().filterNot(_.getName.startsWith(" .hive-staging" ))
771+ .groupBy(_.isFile).flatMap {
772+ case (isFile, files) if isFile => files.toList
773+ case (_, dirs) => dirs.flatMap(getDirFiles)
774+ }.toList
775+ }
776+ }
777+
778+ def getTableSize (tableName : String , codec : String , isPartitioned : Boolean = false ): Long = {
779+ insertOverwriteTable(tableName, codec, isPartitioned)
780+ val path = s " ${tmpDir.toURI.toString.stripSuffix(" /" )}/ $tableName"
781+ val dir = new File (path)
782+ val files = getDirFiles(dir).filter(_.getName.startsWith(" part-" ))
783+ files.map(_.length()).sum
784+ }
785+
786+ // In fact, partitioned and unpartitioned table meta information is slightly different,
787+ // and partitioned tables are slightly larger, but the differences are not very large.
788+ // Think less than 1024Byte
789+ val maxDiff = 1024
790+ assert(getTableSize(tableWithPartition, " uncompressed" , true ) - getTableSize(tableNoPartition, " uncompressed" ) < maxDiff)
791+ assert(getTableSize(tableWithPartition, " gzip" , true ) - getTableSize(tableNoPartition, " gzip" ) < maxDiff)
792+ assert(getTableSize(tableWithPartition, " uncompressed" , true ) - getTableSize(tableWithPartition, " gzip" , true ) > maxDiff)
793+ }
794+ }
795+ }
796+ }
731797}
0 commit comments