@@ -602,7 +602,76 @@ class AggregateBenchmark extends BenchmarkBase {
602602 query.queryExecution.debug.codegen()
603603 }
604604
605- ignore(" 1 key field, 1 value field, varying distinct keys" ) {
605+ ignore(" 1 key field, 1 value field, distinct linear keys" ) {
606+ val N = 20 << 22 ;
607+
608+ var timeStart : Long = 0L
609+ var timeEnd : Long = 0L
610+ var nsPerRow : Long = 0L
611+ var i = 0
612+ sparkSession.conf.set(" spark.sql.codegen.wholeStage" , " true" )
613+ sparkSession.conf.set(" spark.sql.codegen.aggregate.map.columns.max" , " 30" )
614+
615+ // scalastyle:off
616+ println(Benchmark .getJVMOSInfo())
617+ println(Benchmark .getProcessorName())
618+ printf(" %20s %20s %20s %20s\n " , " Num. Distinct Keys" , " No Fast Hashmap" ,
619+ " Vectorized" , " Row-based" )
620+ // scalastyle:on
621+
622+ val modes = List (" skip" , " vectorized" , " rowbased" )
623+
624+ while (i < 15 ) {
625+ val results = modes.map(mode => {
626+ sparkSession.conf.set(" spark.sql.codegen.aggregate.map.enforce.impl" , mode)
627+ var j = 0
628+ var minTime : Long = 1000
629+ while (j < 5 ) {
630+ System .gc()
631+ sparkSession.range(N )
632+ .selectExpr(
633+ " id & " + ((1 << i) - 1 ) + " as k0" )
634+ .createOrReplaceTempView(" test" )
635+ timeStart = System .nanoTime
636+ sparkSession.sql(" select sum(k0)" +
637+ " from test group by k0" ).collect()
638+ timeEnd = System .nanoTime
639+ nsPerRow = (timeEnd - timeStart) / N
640+ if (j > 1 && minTime > nsPerRow) minTime = nsPerRow
641+ j += 1
642+ }
643+ minTime
644+ })
645+ printf(" %20s %20s %20s %20s\n " , 1 << i, results(0 ), results(1 ), results(2 ))
646+ i += 1
647+ }
648+ printf(" Unit: ns/row\n " )
649+
650+ /*
651+ Java HotSpot(TM) 64-Bit Server VM 1.8.0_91-b14 on Mac OS X 10.11.5
652+ Intel(R) Core(TM) i7-4980HQ CPU @ 2.80GHz
653+
654+ Num. Distinct Keys No Fast Hashmap Vectorized Row-based
655+ 1 21 13 11
656+ 2 23 14 13
657+ 4 23 14 14
658+ 8 23 14 14
659+ 16 23 12 13
660+ 32 24 12 13
661+ 64 24 14 16
662+ 128 24 14 13
663+ 256 25 14 14
664+ 512 25 16 14
665+ 1024 25 16 15
666+ 2048 26 12 15
667+ 4096 27 15 15
668+ 8192 33 16 15
669+ 16384 34 15 15
670+ Unit: ns/row
671+ */
672+ }
673+
674+ ignore(" 1 key field, 1 value field, distinct random keys" ) {
606675 val N = 20 << 22 ;
607676
608677 var timeStart : Long = 0L
@@ -650,27 +719,28 @@ class AggregateBenchmark extends BenchmarkBase {
650719 /*
651720 Java HotSpot(TM) 64-Bit Server VM 1.8.0_91-b14 on Mac OS X 10.11.5
652721 Intel(R) Core(TM) i7-4980HQ CPU @ 2.80GHz
722+
653723 Num. Distinct Keys No Fast Hashmap Vectorized Row-based
654- 1 30 8 12
655- 2 38 13 21
656- 4 37 12 23
657- 8 37 11 20
658- 16 36 11 19
659- 32 36 10 19
660- 64 36 11 19
661- 128 38 16 20
662- 256 39 17 21
663- 512 39 18 22
664- 1024 41 20 23
665- 2048 42 20 23
666- 4096 46 19 23
667- 8192 52 19 24
668- 16384 53 20 26
724+ 1 32 9 13
725+ 2 39 16 22
726+ 4 39 14 23
727+ 8 39 13 22
728+ 16 38 13 20
729+ 32 38 13 20
730+ 64 38 13 20
731+ 128 37 16 21
732+ 256 36 17 22
733+ 512 38 17 21
734+ 1024 39 18 21
735+ 2048 41 18 21
736+ 4096 44 18 22
737+ 8192 49 20 23
738+ 16384 52 23 25
669739 Unit: ns/row
670740 */
671741 }
672742
673- ignore(" 1 key field, varying value fields, 16384 distinct keys" ) {
743+ ignore(" 1 key field, varying value fields, 16 linear distinct keys" ) {
674744 val N = 20 << 22 ;
675745
676746 var timeStart : Long = 0L
@@ -697,11 +767,10 @@ class AggregateBenchmark extends BenchmarkBase {
697767 while (j < 5 ) {
698768 System .gc()
699769 sparkSession.range(N )
700- .selectExpr(List .range(0 , i)
701- .map(x => " cast(floor(rand() * " + 16384 + " ) as long) as k" + x): _* )
770+ .selectExpr(" id & " + 15 + " as k0" )
702771 .createOrReplaceTempView(" test" )
703772 timeStart = System .nanoTime
704- sparkSession.sql(" select " + List .range(0 , i).map(x => " sum(k" + x + " )" ).mkString(" ," ) +
773+ sparkSession.sql(" select " + List .range(0 , i).map(x => " sum(k" + 0 + " )" ).mkString(" ," ) +
705774 " from test group by k0" ).collect()
706775 timeEnd = System .nanoTime
707776 nsPerRow = (timeEnd - timeStart) / N
@@ -716,10 +785,25 @@ class AggregateBenchmark extends BenchmarkBase {
716785 printf(" Unit: ns/row\n " )
717786
718787 /*
788+ Java HotSpot(TM) 64-Bit Server VM 1.8.0_91-b14 on Mac OS X 10.11.5
789+ Intel(R) Core(TM) i7-4980HQ CPU @ 2.80GHz
790+
791+ Num. Value Fields No Fast Hashmap Vectorized Row-based
792+ 1 24 15 12
793+ 2 25 24 14
794+ 3 29 25 17
795+ 4 31 32 22
796+ 5 33 40 24
797+ 6 36 36 27
798+ 7 38 44 28
799+ 8 47 50 32
800+ 9 52 55 37
801+ 10 59 59 45
802+ Unit: ns/row
719803 */
720804 }
721805
722- ignore(" varying key fields, 1 value field, 16384 distinct keys" ) {
806+ ignore(" varying key fields, 1 value field, 16 linear distinct keys" ) {
723807 val N = 20 << 22 ;
724808
725809 var timeStart : Long = 0L
@@ -745,7 +829,7 @@ class AggregateBenchmark extends BenchmarkBase {
745829 var minTime : Long = 1000
746830 while (j < 5 ) {
747831 System .gc()
748- val s = " id & " + 16383 + " as k"
832+ val s = " id & " + 15 + " as k"
749833 sparkSession.range(N )
750834 .selectExpr(List .range(0 , i).map(x => s + x): _* )
751835 .createOrReplaceTempView(" test" )
@@ -765,10 +849,25 @@ class AggregateBenchmark extends BenchmarkBase {
765849 printf(" Unit: ns/row\n " )
766850
767851 /*
852+ Java HotSpot(TM) 64-Bit Server VM 1.8.0_91-b14 on Mac OS X 10.11.5
853+ Intel(R) Core(TM) i7-4980HQ CPU @ 2.80GHz
854+
855+ Num. Key Fields No Fast Hashmap Vectorized Row-based
856+ 1 24 15 13
857+ 2 31 20 14
858+ 3 37 22 17
859+ 4 46 26 18
860+ 5 53 27 20
861+ 6 61 29 23
862+ 7 69 36 25
863+ 8 78 37 27
864+ 9 88 43 30
865+ 10 92 45 33
866+ Unit: ns/row
768867 */
769868 }
770869
771- test (" varying key fields, varying value field, 256 distinct keys" ) {
870+ ignore (" varying key fields, varying value field, 16 linear distinct keys" ) {
772871 val N = 20 << 22 ;
773872
774873 var timeStart : Long = 0L
@@ -794,7 +893,7 @@ class AggregateBenchmark extends BenchmarkBase {
794893 var minTime : Long = 1000
795894 while (j < 5 ) {
796895 System .gc()
797- val s = " id & " + 255 + " as k"
896+ val s = " id & " + 15 + " as k"
798897 sparkSession.range(N )
799898 .selectExpr(List .range(0 , i).map(x => s + x): _* )
800899 .createOrReplaceTempView(" test" )
@@ -813,10 +912,93 @@ class AggregateBenchmark extends BenchmarkBase {
813912 i += 1
814913 }
815914 printf(" Unit: ns/row\n " )
915+
916+ /*
917+ Java HotSpot(TM) 64-Bit Server VM 1.8.0_91-b14 on Mac OS X 10.11.5
918+ Intel(R) Core(TM) i7-4980HQ CPU @ 2.80GHz
919+
920+ Num. Total Fields No Fast Hashmap Vectorized Row-based
921+ 2 24 14 12
922+ 4 32 28 17
923+ 6 42 29 21
924+ 8 53 36 24
925+ 10 62 44 29
926+ 12 77 50 34
927+ 14 93 61 37
928+ 16 109 75 41
929+ 18 124 88 51
930+ 20 145 97 70
931+ Unit: ns/row
932+ */
816933 }
817934
935+ ignore(" varying key fields, varying value field, 512 linear distinct keys" ) {
936+ val N = 20 << 22 ;
937+
938+ var timeStart : Long = 0L
939+ var timeEnd : Long = 0L
940+ var nsPerRow : Long = 0L
941+ var i = 1
942+ sparkSession.conf.set(" spark.sql.codegen.wholeStage" , " true" )
943+ sparkSession.conf.set(" spark.sql.codegen.aggregate.map.columns.max" , " 30" )
944+
945+ // scalastyle:off
946+ println(Benchmark .getJVMOSInfo())
947+ println(Benchmark .getProcessorName())
948+ printf(" %20s %20s %20s %20s\n " , " Num. Total Fields" , " No Fast Hashmap" ,
949+ " Vectorized" , " Row-based" )
950+ // scalastyle:on
951+
952+ val modes = List (" skip" , " vectorized" , " rowbased" )
818953
819- test(" varying key fields, varying value field, varying distinct keys" ) {
954+ while (i < 11 ) {
955+ val results = modes.map(mode => {
956+ sparkSession.conf.set(" spark.sql.codegen.aggregate.map.enforce.impl" , mode)
957+ var j = 0
958+ var minTime : Long = 1000
959+ while (j < 5 ) {
960+ System .gc()
961+ val s = " id & " + 511 + " as k"
962+ sparkSession.range(N )
963+ .selectExpr(List .range(0 , i).map(x => s + x): _* )
964+ .createOrReplaceTempView(" test" )
965+ timeStart = System .nanoTime
966+ sparkSession.sql(" select " + List .range(0 , i).map(x => " sum(k" + x + " )" ).mkString(" ," ) +
967+ " from test group by " + List .range(0 , i).map(x => " k" + x).mkString(" ," )).collect()
968+ timeEnd = System .nanoTime
969+ nsPerRow = (timeEnd - timeStart) / N
970+ // printf("nsPerRow i=%d j=%d mode=%10s %20s\n", i, j, mode, nsPerRow)
971+ if (j > 1 && minTime > nsPerRow) minTime = nsPerRow
972+ j += 1
973+ }
974+ minTime
975+ })
976+ printf(" %20s %20s %20s %20s\n " , i * 2 , results(0 ), results(1 ), results(2 ))
977+ i += 1
978+ }
979+ printf(" Unit: ns/row\n " )
980+
981+ /*
982+ Java HotSpot(TM) 64-Bit Server VM 1.8.0_91-b14 on Mac OS X 10.11.5
983+ Intel(R) Core(TM) i7-4980HQ CPU @ 2.80GHz
984+
985+ Num. Total Fields No Fast Hashmap Vectorized Row-based
986+ 2 26 16 13
987+ 4 36 24 17
988+ 6 45 30 22
989+ 8 54 33 27
990+ 10 64 38 30
991+ 12 74 47 35
992+ 14 95 54 39
993+ 16 114 72 44
994+ 18 129 70 51
995+ 20 150 91 72
996+ Unit: ns/row
997+ */
998+ }
999+
1000+
1001+ ignore(" varying key fields, varying value field, varying linear distinct keys" ) {
8201002 val N = 20 << 22 ;
8211003
8221004 var timeStart : Long = 0L
@@ -863,6 +1045,36 @@ class AggregateBenchmark extends BenchmarkBase {
8631045 printf(" Unit: ns/row\n " )
8641046
8651047 /*
1048+ Java HotSpot(TM) 64-Bit Server VM 1.8.0_91-b14 on Mac OS X 10.11.5
1049+ Intel(R) Core(TM) i7-4980HQ CPU @ 2.80GHz
1050+
1051+ Num. Total Fields No Fast Hashmap Vectorized Row-based
1052+ 2 24 11 10
1053+ 4 33 25 16
1054+ 6 42 30 21
1055+ 8 53 44 24
1056+ 10 65 52 27
1057+ 12 74 47 33
1058+ 14 92 69 35
1059+ 16 109 77 40
1060+ 18 127 75 49
1061+ 20 143 80 66
1062+ Unit: ns/row
1063+
1064+ Java HotSpot(TM) 64-Bit Server VM 1.8.0_91-b14 on Linux 3.13.0-74-generic
1065+ Intel(R) Xeon(R) CPU E5-2676 v3 @ 2.40GHz
1066+ Num. Total Fields No Fast Hashmap Vectorized Row-based
1067+ 2 38 15 15
1068+ 4 50 25 25
1069+ 6 65 35 30
1070+ 8 79 42 35
1071+ 10 93 50 43
1072+ 12 108 58 48
1073+ 14 120 71 57
1074+ 16 145 79 62
1075+ 18 166 88 77
1076+ 20 189 96 98
1077+ Unit: ns/row
8661078 */
8671079 }
8681080
0 commit comments