@@ -1078,6 +1078,146 @@ class AggregateBenchmark extends BenchmarkBase {
10781078 */
10791079 }
10801080
1081+ ignore(" 4 key fields, 4 value field, varying linear distinct keys" ) {
1082+ val N = 20 << 22 ;
1083+
1084+ var timeStart : Long = 0L
1085+ var timeEnd : Long = 0L
1086+ var nsPerRow : Long = 0L
1087+ var i = 0
1088+ sparkSession.conf.set(" spark.sql.codegen.wholeStage" , " true" )
1089+ sparkSession.conf.set(" spark.sql.codegen.aggregate.map.columns.max" , " 30" )
1090+
1091+ // scalastyle:off
1092+ println(Benchmark .getJVMOSInfo())
1093+ println(Benchmark .getProcessorName())
1094+ printf(" %20s %20s %20s %20s\n " , " Num. Distinct Keys" , " No Fast Hashmap" ,
1095+ " Vectorized" , " Row-based" )
1096+ // scalastyle:on
1097+
1098+ val modes = List (" skip" , " vectorized" , " rowbased" )
1099+
1100+ while (i < 17 ) {
1101+ val results = modes.map(mode => {
1102+ sparkSession.conf.set(" spark.sql.codegen.aggregate.map.enforce.impl" , mode)
1103+ var j = 0
1104+ var minTime : Long = 1000
1105+ while (j < 5 ) {
1106+ System .gc()
1107+ val s = " id & " + ((1 << i)- 1 ) + " as k"
1108+ sparkSession.range(N )
1109+ .selectExpr(List .range(0 , 4 ).map(x => s + x): _* )
1110+ .createOrReplaceTempView(" test" )
1111+ timeStart = System .nanoTime
1112+ sparkSession.sql(" select " + List .range(0 , 4 ).map(x => " sum(k" + x + " )" ).mkString(" ," ) +
1113+ " from test group by " + List .range(0 , 4 ).map(x => " k" + x).mkString(" ," )).collect()
1114+ timeEnd = System .nanoTime
1115+ nsPerRow = (timeEnd - timeStart) / N
1116+ // printf("nsPerRow i=%d j=%d mode=%10s %20s\n", i, j, mode, nsPerRow)
1117+ if (j > 1 && minTime > nsPerRow) minTime = nsPerRow
1118+ j += 1
1119+ }
1120+ minTime
1121+ })
1122+ printf(" %20s %20s %20s %20s\n " , (1 << i), results(0 ), results(1 ), results(2 ))
1123+ i += 1
1124+ }
1125+ printf(" Unit: ns/row\n " )
1126+
1127+ /*
1128+ Java HotSpot(TM) 64-Bit Server VM 1.8.0_91-b14 on Mac OS X 10.11.5
1129+ Intel(R) Core(TM) i7-4980HQ CPU @ 2.80GHz
1130+
1131+ Num. Distinct Keys No Fast Hashmap Vectorized Row-based
1132+ 1 33 38 24
1133+ 2 58 43 30
1134+ 4 58 42 28
1135+ 8 57 46 28
1136+ 16 56 41 28
1137+ 32 55 44 27
1138+ 64 56 48 27
1139+ 128 58 43 27
1140+ 256 60 43 30
1141+ 512 61 45 31
1142+ 1024 62 44 31
1143+ 2048 64 42 38
1144+ 4096 66 47 38
1145+ 8192 70 48 38
1146+ 16384 72 48 42
1147+ 32768 77 54 47
1148+ 65536 96 75 61
1149+ 131072 115 119 130
1150+ 262144 137 162 185
1151+ Unit: ns/row
1152+ */
1153+ }
1154+
1155+ ignore(" single key field, single value field, varying linear distinct keys" ) {
1156+ val N = 20 << 21 ;
1157+
1158+ var timeStart : Long = 0L
1159+ var timeEnd : Long = 0L
1160+ var nsPerRow : Long = 0L
1161+ var i = 0
1162+ sparkSession.conf.set(" spark.sql.codegen.wholeStage" , " true" )
1163+ sparkSession.conf.set(" spark.sql.codegen.aggregate.map.columns.max" , " 30" )
1164+
1165+ // scalastyle:off
1166+ println(Benchmark .getJVMOSInfo())
1167+ println(Benchmark .getProcessorName())
1168+ printf(" %20s %20s %20s %20s\n " , " Num. Distinct Keys" , " No Fast Hashmap" ,
1169+ " Vectorized" , " Row-based" )
1170+ // scalastyle:on
1171+
1172+ val modes = List (" skip" , " vectorized" , " rowbased" )
1173+
1174+ while (i < 21 ) {
1175+ val results = modes.map(mode => {
1176+ sparkSession.conf.set(" spark.sql.codegen.aggregate.map.enforce.impl" , mode)
1177+ var j = 0
1178+ var minTime : Long = 1000
1179+ while (j < 5 ) {
1180+ System .gc()
1181+ val s = " id & " + ((1 << i)- 1 ) + " as k"
1182+ sparkSession.range(N )
1183+ .selectExpr(List .range(0 , 2 ).map(x => s + x): _* )
1184+ .createOrReplaceTempView(" test" )
1185+ timeStart = System .nanoTime
1186+ sparkSession.sql(" select sum(k1) from test group by k0" ).collect()
1187+ timeEnd = System .nanoTime
1188+ nsPerRow = (timeEnd - timeStart) / N
1189+ // printf("nsPerRow i=%d j=%d mode=%10s %20s\n", i, j, mode, nsPerRow)
1190+ if (j > 1 && minTime > nsPerRow) minTime = nsPerRow
1191+ j += 1
1192+ }
1193+ minTime
1194+ })
1195+ printf(" %20s %20s %20s %20s\n " , (1 << i), results(0 ), results(1 ), results(2 ))
1196+ i += 1
1197+ }
1198+ printf(" Unit: ns/row\n " )
1199+
1200+ /*
1201+ Java HotSpot(TM) 64-Bit Server VM 1.8.0_91-b14 on Mac OS X 10.11.5
1202+ Intel(R) Core(TM) i7-4980HQ CPU @ 2.80GHz
1203+
1204+ Partial results:
1205+
1206+ Num. Distinct Keys No Fast Hashmap Vectorized Row-based
1207+ 1 23 14 12
1208+ 8 25 13 14
1209+ 64 24 13 14
1210+ 512 27 15 14
1211+ 4096 29 18 15
1212+ 32768 39 17 16
1213+ 65536 46 19 15
1214+ 131072 65 38 35
1215+ 262144 91 74 86
1216+ 524288 119 93 95
1217+ Unit: ns/row
1218+ */
1219+ }
1220+
10811221 ignore(" TPCDS mini-scale benchmark" ) {
10821222 /*
10831223 Java HotSpot(TM) 64-Bit Server VM 1.8.0_91-b14 on Mac OS X 10.11.5
@@ -1177,4 +1317,4 @@ class AggregateBenchmark extends BenchmarkBase {
11771317 q24b 962 959 (1.0x) 981 (0.9x)
11781318 */
11791319 }
1180- }
1320+ }
0 commit comments