Skip to content

Commit 3b3c9ea

Browse files
committed
Add benchmark results for vectorized vs. rowbased hashmap
1 parent 20baf3e commit 3b3c9ea

File tree

1 file changed

+237
-25
lines changed

1 file changed

+237
-25
lines changed

sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/AggregateBenchmark.scala

Lines changed: 237 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -602,7 +602,76 @@ class AggregateBenchmark extends BenchmarkBase {
602602
query.queryExecution.debug.codegen()
603603
}
604604

605-
ignore("1 key field, 1 value field, varying distinct keys") {
605+
ignore("1 key field, 1 value field, distinct linear keys") {
606+
val N = 20 << 22;
607+
608+
var timeStart: Long = 0L
609+
var timeEnd: Long = 0L
610+
var nsPerRow: Long = 0L
611+
var i = 0
612+
sparkSession.conf.set("spark.sql.codegen.wholeStage", "true")
613+
sparkSession.conf.set("spark.sql.codegen.aggregate.map.columns.max", "30")
614+
615+
// scalastyle:off
616+
println(Benchmark.getJVMOSInfo())
617+
println(Benchmark.getProcessorName())
618+
printf("%20s %20s %20s %20s\n", "Num. Distinct Keys", "No Fast Hashmap",
619+
"Vectorized", "Row-based")
620+
// scalastyle:on
621+
622+
val modes = List("skip", "vectorized", "rowbased")
623+
624+
while (i < 15) {
625+
val results = modes.map(mode => {
626+
sparkSession.conf.set("spark.sql.codegen.aggregate.map.enforce.impl", mode)
627+
var j = 0
628+
var minTime: Long = 1000
629+
while (j < 5) {
630+
System.gc()
631+
sparkSession.range(N)
632+
.selectExpr(
633+
"id & " + ((1 << i) - 1) + " as k0")
634+
.createOrReplaceTempView("test")
635+
timeStart = System.nanoTime
636+
sparkSession.sql("select sum(k0)" +
637+
" from test group by k0").collect()
638+
timeEnd = System.nanoTime
639+
nsPerRow = (timeEnd - timeStart) / N
640+
if (j > 1 && minTime > nsPerRow) minTime = nsPerRow
641+
j += 1
642+
}
643+
minTime
644+
})
645+
printf("%20s %20s %20s %20s\n", 1 << i, results(0), results(1), results(2))
646+
i += 1
647+
}
648+
printf("Unit: ns/row\n")
649+
650+
/*
651+
Java HotSpot(TM) 64-Bit Server VM 1.8.0_91-b14 on Mac OS X 10.11.5
652+
Intel(R) Core(TM) i7-4980HQ CPU @ 2.80GHz
653+
654+
Num. Distinct Keys No Fast Hashmap Vectorized Row-based
655+
1 21 13 11
656+
2 23 14 13
657+
4 23 14 14
658+
8 23 14 14
659+
16 23 12 13
660+
32 24 12 13
661+
64 24 14 16
662+
128 24 14 13
663+
256 25 14 14
664+
512 25 16 14
665+
1024 25 16 15
666+
2048 26 12 15
667+
4096 27 15 15
668+
8192 33 16 15
669+
16384 34 15 15
670+
Unit: ns/row
671+
*/
672+
}
673+
674+
ignore("1 key field, 1 value field, distinct random keys") {
606675
val N = 20 << 22;
607676

608677
var timeStart: Long = 0L
@@ -650,27 +719,28 @@ class AggregateBenchmark extends BenchmarkBase {
650719
/*
651720
Java HotSpot(TM) 64-Bit Server VM 1.8.0_91-b14 on Mac OS X 10.11.5
652721
Intel(R) Core(TM) i7-4980HQ CPU @ 2.80GHz
722+
653723
Num. Distinct Keys No Fast Hashmap Vectorized Row-based
654-
1 30 8 12
655-
2 38 13 21
656-
4 37 12 23
657-
8 37 11 20
658-
16 36 11 19
659-
32 36 10 19
660-
64 36 11 19
661-
128 38 16 20
662-
256 39 17 21
663-
512 39 18 22
664-
1024 41 20 23
665-
2048 42 20 23
666-
4096 46 19 23
667-
8192 52 19 24
668-
16384 53 20 26
724+
1 32 9 13
725+
2 39 16 22
726+
4 39 14 23
727+
8 39 13 22
728+
16 38 13 20
729+
32 38 13 20
730+
64 38 13 20
731+
128 37 16 21
732+
256 36 17 22
733+
512 38 17 21
734+
1024 39 18 21
735+
2048 41 18 21
736+
4096 44 18 22
737+
8192 49 20 23
738+
16384 52 23 25
669739
Unit: ns/row
670740
*/
671741
}
672742

673-
ignore("1 key field, varying value fields, 16384 distinct keys") {
743+
ignore("1 key field, varying value fields, 16 linear distinct keys") {
674744
val N = 20 << 22;
675745

676746
var timeStart: Long = 0L
@@ -697,11 +767,10 @@ class AggregateBenchmark extends BenchmarkBase {
697767
while (j < 5) {
698768
System.gc()
699769
sparkSession.range(N)
700-
.selectExpr(List.range(0, i)
701-
.map(x => "cast(floor(rand() * " + 16384 + ") as long) as k" + x): _*)
770+
.selectExpr("id & " + 15 + " as k0")
702771
.createOrReplaceTempView("test")
703772
timeStart = System.nanoTime
704-
sparkSession.sql("select " + List.range(0, i).map(x => "sum(k" + x + ")").mkString(",") +
773+
sparkSession.sql("select " + List.range(0, i).map(x => "sum(k" + 0 + ")").mkString(",") +
705774
" from test group by k0").collect()
706775
timeEnd = System.nanoTime
707776
nsPerRow = (timeEnd - timeStart) / N
@@ -716,10 +785,25 @@ class AggregateBenchmark extends BenchmarkBase {
716785
printf("Unit: ns/row\n")
717786

718787
/*
788+
Java HotSpot(TM) 64-Bit Server VM 1.8.0_91-b14 on Mac OS X 10.11.5
789+
Intel(R) Core(TM) i7-4980HQ CPU @ 2.80GHz
790+
791+
Num. Value Fields No Fast Hashmap Vectorized Row-based
792+
1 24 15 12
793+
2 25 24 14
794+
3 29 25 17
795+
4 31 32 22
796+
5 33 40 24
797+
6 36 36 27
798+
7 38 44 28
799+
8 47 50 32
800+
9 52 55 37
801+
10 59 59 45
802+
Unit: ns/row
719803
*/
720804
}
721805

722-
ignore("varying key fields, 1 value field, 16384 distinct keys") {
806+
ignore("varying key fields, 1 value field, 16 linear distinct keys") {
723807
val N = 20 << 22;
724808

725809
var timeStart: Long = 0L
@@ -745,7 +829,7 @@ class AggregateBenchmark extends BenchmarkBase {
745829
var minTime: Long = 1000
746830
while (j < 5) {
747831
System.gc()
748-
val s = "id & " + 16383 + " as k"
832+
val s = "id & " + 15 + " as k"
749833
sparkSession.range(N)
750834
.selectExpr(List.range(0, i).map(x => s + x): _*)
751835
.createOrReplaceTempView("test")
@@ -765,10 +849,25 @@ class AggregateBenchmark extends BenchmarkBase {
765849
printf("Unit: ns/row\n")
766850

767851
/*
852+
Java HotSpot(TM) 64-Bit Server VM 1.8.0_91-b14 on Mac OS X 10.11.5
853+
Intel(R) Core(TM) i7-4980HQ CPU @ 2.80GHz
854+
855+
Num. Key Fields No Fast Hashmap Vectorized Row-based
856+
1 24 15 13
857+
2 31 20 14
858+
3 37 22 17
859+
4 46 26 18
860+
5 53 27 20
861+
6 61 29 23
862+
7 69 36 25
863+
8 78 37 27
864+
9 88 43 30
865+
10 92 45 33
866+
Unit: ns/row
768867
*/
769868
}
770869

771-
test("varying key fields, varying value field, 256 distinct keys") {
870+
ignore("varying key fields, varying value field, 16 linear distinct keys") {
772871
val N = 20 << 22;
773872

774873
var timeStart: Long = 0L
@@ -794,7 +893,7 @@ class AggregateBenchmark extends BenchmarkBase {
794893
var minTime: Long = 1000
795894
while (j < 5) {
796895
System.gc()
797-
val s = "id & " + 255 + " as k"
896+
val s = "id & " + 15 + " as k"
798897
sparkSession.range(N)
799898
.selectExpr(List.range(0, i).map(x => s + x): _*)
800899
.createOrReplaceTempView("test")
@@ -813,10 +912,93 @@ class AggregateBenchmark extends BenchmarkBase {
813912
i += 1
814913
}
815914
printf("Unit: ns/row\n")
915+
916+
/*
917+
Java HotSpot(TM) 64-Bit Server VM 1.8.0_91-b14 on Mac OS X 10.11.5
918+
Intel(R) Core(TM) i7-4980HQ CPU @ 2.80GHz
919+
920+
Num. Total Fields No Fast Hashmap Vectorized Row-based
921+
2 24 14 12
922+
4 32 28 17
923+
6 42 29 21
924+
8 53 36 24
925+
10 62 44 29
926+
12 77 50 34
927+
14 93 61 37
928+
16 109 75 41
929+
18 124 88 51
930+
20 145 97 70
931+
Unit: ns/row
932+
*/
816933
}
817934

935+
ignore("varying key fields, varying value field, 512 linear distinct keys") {
936+
val N = 20 << 22;
937+
938+
var timeStart: Long = 0L
939+
var timeEnd: Long = 0L
940+
var nsPerRow: Long = 0L
941+
var i = 1
942+
sparkSession.conf.set("spark.sql.codegen.wholeStage", "true")
943+
sparkSession.conf.set("spark.sql.codegen.aggregate.map.columns.max", "30")
944+
945+
// scalastyle:off
946+
println(Benchmark.getJVMOSInfo())
947+
println(Benchmark.getProcessorName())
948+
printf("%20s %20s %20s %20s\n", "Num. Total Fields", "No Fast Hashmap",
949+
"Vectorized", "Row-based")
950+
// scalastyle:on
951+
952+
val modes = List("skip", "vectorized", "rowbased")
818953

819-
test("varying key fields, varying value field, varying distinct keys") {
954+
while (i < 11) {
955+
val results = modes.map(mode => {
956+
sparkSession.conf.set("spark.sql.codegen.aggregate.map.enforce.impl", mode)
957+
var j = 0
958+
var minTime: Long = 1000
959+
while (j < 5) {
960+
System.gc()
961+
val s = "id & " + 511 + " as k"
962+
sparkSession.range(N)
963+
.selectExpr(List.range(0, i).map(x => s + x): _*)
964+
.createOrReplaceTempView("test")
965+
timeStart = System.nanoTime
966+
sparkSession.sql("select " + List.range(0, i).map(x => "sum(k" + x + ")").mkString(",") +
967+
" from test group by " + List.range(0, i).map(x => "k" + x).mkString(",")).collect()
968+
timeEnd = System.nanoTime
969+
nsPerRow = (timeEnd - timeStart) / N
970+
// printf("nsPerRow i=%d j=%d mode=%10s %20s\n", i, j, mode, nsPerRow)
971+
if (j > 1 && minTime > nsPerRow) minTime = nsPerRow
972+
j += 1
973+
}
974+
minTime
975+
})
976+
printf("%20s %20s %20s %20s\n", i * 2, results(0), results(1), results(2))
977+
i += 1
978+
}
979+
printf("Unit: ns/row\n")
980+
981+
/*
982+
Java HotSpot(TM) 64-Bit Server VM 1.8.0_91-b14 on Mac OS X 10.11.5
983+
Intel(R) Core(TM) i7-4980HQ CPU @ 2.80GHz
984+
985+
Num. Total Fields No Fast Hashmap Vectorized Row-based
986+
2 26 16 13
987+
4 36 24 17
988+
6 45 30 22
989+
8 54 33 27
990+
10 64 38 30
991+
12 74 47 35
992+
14 95 54 39
993+
16 114 72 44
994+
18 129 70 51
995+
20 150 91 72
996+
Unit: ns/row
997+
*/
998+
}
999+
1000+
1001+
ignore("varying key fields, varying value field, varying linear distinct keys") {
8201002
val N = 20 << 22;
8211003

8221004
var timeStart: Long = 0L
@@ -863,6 +1045,36 @@ class AggregateBenchmark extends BenchmarkBase {
8631045
printf("Unit: ns/row\n")
8641046

8651047
/*
1048+
Java HotSpot(TM) 64-Bit Server VM 1.8.0_91-b14 on Mac OS X 10.11.5
1049+
Intel(R) Core(TM) i7-4980HQ CPU @ 2.80GHz
1050+
1051+
Num. Total Fields No Fast Hashmap Vectorized Row-based
1052+
2 24 11 10
1053+
4 33 25 16
1054+
6 42 30 21
1055+
8 53 44 24
1056+
10 65 52 27
1057+
12 74 47 33
1058+
14 92 69 35
1059+
16 109 77 40
1060+
18 127 75 49
1061+
20 143 80 66
1062+
Unit: ns/row
1063+
1064+
Java HotSpot(TM) 64-Bit Server VM 1.8.0_91-b14 on Linux 3.13.0-74-generic
1065+
Intel(R) Xeon(R) CPU E5-2676 v3 @ 2.40GHz
1066+
Num. Total Fields No Fast Hashmap Vectorized Row-based
1067+
2 38 15 15
1068+
4 50 25 25
1069+
6 65 35 30
1070+
8 79 42 35
1071+
10 93 50 43
1072+
12 108 58 48
1073+
14 120 71 57
1074+
16 145 79 62
1075+
18 166 88 77
1076+
20 189 96 98
1077+
Unit: ns/row
8661078
*/
8671079
}
8681080

0 commit comments

Comments
 (0)