Skip to content

Commit 6cf23ae

Browse files
author
wangzhenhua
committed
comments
1 parent 30ac539 commit 6cf23ae

File tree

1 file changed

+9
-11
lines changed

1 file changed

+9
-11
lines changed

sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -394,33 +394,33 @@ class StatisticsSuite extends QueryTest with TestHiveSingleton with SQLTestUtils
394394
val (statsBeforeUpdate, statsAfterUpdate) = getStatsBeforeAfterUpdate(isAnalyzeColumns = false)
395395

396396
assert(statsBeforeUpdate.sizeInBytes > 0)
397-
assert(statsBeforeUpdate.rowCount.contains(1))
397+
assert(statsBeforeUpdate.rowCount == Some(1))
398398

399399
assert(statsAfterUpdate.sizeInBytes > statsBeforeUpdate.sizeInBytes)
400-
assert(statsAfterUpdate.rowCount.contains(2))
400+
assert(statsAfterUpdate.rowCount == Some(2))
401401
}
402402

403403
test("test refreshing column stats of cached data source table by `ANALYZE TABLE` statement") {
404404
val (statsBeforeUpdate, statsAfterUpdate) = getStatsBeforeAfterUpdate(isAnalyzeColumns = true)
405405

406406
assert(statsBeforeUpdate.sizeInBytes > 0)
407-
assert(statsBeforeUpdate.rowCount.contains(1))
407+
assert(statsBeforeUpdate.rowCount == Some(1))
408408
StatisticsTest.checkColStat(
409409
dataType = IntegerType,
410410
colStat = statsBeforeUpdate.colStats("key"),
411411
expectedColStat = ColumnStat(InternalRow(0L, 1, 1, 1L)),
412412
rsd = spark.sessionState.conf.ndvMaxError)
413413

414414
assert(statsAfterUpdate.sizeInBytes > statsBeforeUpdate.sizeInBytes)
415-
assert(statsAfterUpdate.rowCount.contains(2))
415+
assert(statsAfterUpdate.rowCount == Some(2))
416416
StatisticsTest.checkColStat(
417417
dataType = IntegerType,
418418
colStat = statsAfterUpdate.colStats("key"),
419419
expectedColStat = ColumnStat(InternalRow(0L, 2, 1, 2L)),
420420
rsd = spark.sessionState.conf.ndvMaxError)
421421
}
422422

423-
private def dataAndColStats(): (DataFrame, Seq[(StructField, ColumnStat)]) = {
423+
private lazy val (testDataFrame, expectedColStatsSeq) = {
424424
import testImplicits._
425425

426426
val intSeq = Seq(1, 2)
@@ -430,8 +430,8 @@ class StatisticsSuite extends QueryTest with TestHiveSingleton with SQLTestUtils
430430
val data = intSeq.indices.map { i =>
431431
(intSeq(i), stringSeq(i), binarySeq(i), booleanSeq(i))
432432
}
433-
val df = data.toDF("c1", "c2", "c3", "c4")
434-
val expectedColStatsSeq = df.schema.map { f =>
433+
val df: DataFrame = data.toDF("c1", "c2", "c3", "c4")
434+
val expectedColStatsSeq: Seq[(StructField, ColumnStat)] = df.schema.map { f =>
435435
val colStat = f.dataType match {
436436
case IntegerType =>
437437
ColumnStat(InternalRow(0L, intSeq.max, intSeq.min, intSeq.distinct.length.toLong))
@@ -478,8 +478,7 @@ class StatisticsSuite extends QueryTest with TestHiveSingleton with SQLTestUtils
478478
test("generate and load column-level stats for data source table") {
479479
val dsTable = "dsTable"
480480
withTable(dsTable) {
481-
val (df, expectedColStatsSeq) = dataAndColStats()
482-
df.write.format("parquet").saveAsTable(dsTable)
481+
testDataFrame.write.format("parquet").saveAsTable(dsTable)
483482
sql(s"ANALYZE TABLE $dsTable COMPUTE STATISTICS FOR COLUMNS c1, c2, c3, c4")
484483
checkColStats(dsTable, isDataSourceTable = true, expectedColStatsSeq)
485484
}
@@ -489,8 +488,7 @@ class StatisticsSuite extends QueryTest with TestHiveSingleton with SQLTestUtils
489488
val hTable = "hTable"
490489
val tmp = "tmp"
491490
withTable(hTable, tmp) {
492-
val (df, expectedColStatsSeq) = dataAndColStats()
493-
df.write.format("parquet").saveAsTable(tmp)
491+
testDataFrame.write.format("parquet").saveAsTable(tmp)
494492
sql(s"CREATE TABLE $hTable (c1 int, c2 string, c3 binary, c4 boolean) STORED AS TEXTFILE")
495493
sql(s"INSERT INTO $hTable SELECT * FROM $tmp")
496494
sql(s"ANALYZE TABLE $hTable COMPUTE STATISTICS FOR COLUMNS c1, c2, c3, c4")

0 commit comments

Comments
 (0)