Skip to content

Commit 0875fbc

Browse files
authored
Add results that show cache locality difference.
1 parent 4944b29 commit 0875fbc

File tree

1 file changed

+141
-1
lines changed

1 file changed

+141
-1
lines changed

sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/AggregateBenchmark.scala

Lines changed: 141 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1078,6 +1078,146 @@ class AggregateBenchmark extends BenchmarkBase {
10781078
*/
10791079
}
10801080

1081+
ignore("4 key fields, 4 value field, varying linear distinct keys") {
1082+
val N = 20 << 22;
1083+
1084+
var timeStart: Long = 0L
1085+
var timeEnd: Long = 0L
1086+
var nsPerRow: Long = 0L
1087+
var i = 0
1088+
sparkSession.conf.set("spark.sql.codegen.wholeStage", "true")
1089+
sparkSession.conf.set("spark.sql.codegen.aggregate.map.columns.max", "30")
1090+
1091+
// scalastyle:off
1092+
println(Benchmark.getJVMOSInfo())
1093+
println(Benchmark.getProcessorName())
1094+
printf("%20s %20s %20s %20s\n", "Num. Distinct Keys", "No Fast Hashmap",
1095+
"Vectorized", "Row-based")
1096+
// scalastyle:on
1097+
1098+
val modes = List("skip", "vectorized", "rowbased")
1099+
1100+
while (i < 17) {
1101+
val results = modes.map(mode => {
1102+
sparkSession.conf.set("spark.sql.codegen.aggregate.map.enforce.impl", mode)
1103+
var j = 0
1104+
var minTime: Long = 1000
1105+
while (j < 5) {
1106+
System.gc()
1107+
val s = "id & " + ((1<<i)-1) + " as k"
1108+
sparkSession.range(N)
1109+
.selectExpr(List.range(0, 4).map(x => s + x): _*)
1110+
.createOrReplaceTempView("test")
1111+
timeStart = System.nanoTime
1112+
sparkSession.sql("select " + List.range(0, 4).map(x => "sum(k" + x + ")").mkString(",") +
1113+
" from test group by " + List.range(0, 4).map(x => "k" + x).mkString(",")).collect()
1114+
timeEnd = System.nanoTime
1115+
nsPerRow = (timeEnd - timeStart) / N
1116+
// printf("nsPerRow i=%d j=%d mode=%10s %20s\n", i, j, mode, nsPerRow)
1117+
if (j > 1 && minTime > nsPerRow) minTime = nsPerRow
1118+
j += 1
1119+
}
1120+
minTime
1121+
})
1122+
printf("%20s %20s %20s %20s\n", (1<<i), results(0), results(1), results(2))
1123+
i += 1
1124+
}
1125+
printf("Unit: ns/row\n")
1126+
1127+
/*
1128+
Java HotSpot(TM) 64-Bit Server VM 1.8.0_91-b14 on Mac OS X 10.11.5
1129+
Intel(R) Core(TM) i7-4980HQ CPU @ 2.80GHz
1130+
1131+
Num. Distinct Keys No Fast Hashmap Vectorized Row-based
1132+
1 33 38 24
1133+
2 58 43 30
1134+
4 58 42 28
1135+
8 57 46 28
1136+
16 56 41 28
1137+
32 55 44 27
1138+
64 56 48 27
1139+
128 58 43 27
1140+
256 60 43 30
1141+
512 61 45 31
1142+
1024 62 44 31
1143+
2048 64 42 38
1144+
4096 66 47 38
1145+
8192 70 48 38
1146+
16384 72 48 42
1147+
32768 77 54 47
1148+
65536 96 75 61
1149+
131072 115 119 130
1150+
262144 137 162 185
1151+
Unit: ns/row
1152+
*/
1153+
}
1154+
1155+
ignore("single key field, single value field, varying linear distinct keys") {
1156+
val N = 20 << 21;
1157+
1158+
var timeStart: Long = 0L
1159+
var timeEnd: Long = 0L
1160+
var nsPerRow: Long = 0L
1161+
var i = 0
1162+
sparkSession.conf.set("spark.sql.codegen.wholeStage", "true")
1163+
sparkSession.conf.set("spark.sql.codegen.aggregate.map.columns.max", "30")
1164+
1165+
// scalastyle:off
1166+
println(Benchmark.getJVMOSInfo())
1167+
println(Benchmark.getProcessorName())
1168+
printf("%20s %20s %20s %20s\n", "Num. Distinct Keys", "No Fast Hashmap",
1169+
"Vectorized", "Row-based")
1170+
// scalastyle:on
1171+
1172+
val modes = List("skip", "vectorized", "rowbased")
1173+
1174+
while (i < 21) {
1175+
val results = modes.map(mode => {
1176+
sparkSession.conf.set("spark.sql.codegen.aggregate.map.enforce.impl", mode)
1177+
var j = 0
1178+
var minTime: Long = 1000
1179+
while (j < 5) {
1180+
System.gc()
1181+
val s = "id & " + ((1<<i)-1) + " as k"
1182+
sparkSession.range(N)
1183+
.selectExpr(List.range(0, 2).map(x => s + x): _*)
1184+
.createOrReplaceTempView("test")
1185+
timeStart = System.nanoTime
1186+
sparkSession.sql("select sum(k1) from test group by k0").collect()
1187+
timeEnd = System.nanoTime
1188+
nsPerRow = (timeEnd - timeStart) / N
1189+
// printf("nsPerRow i=%d j=%d mode=%10s %20s\n", i, j, mode, nsPerRow)
1190+
if (j > 1 && minTime > nsPerRow) minTime = nsPerRow
1191+
j += 1
1192+
}
1193+
minTime
1194+
})
1195+
printf("%20s %20s %20s %20s\n", (1<<i), results(0), results(1), results(2))
1196+
i += 1
1197+
}
1198+
printf("Unit: ns/row\n")
1199+
1200+
/*
1201+
Java HotSpot(TM) 64-Bit Server VM 1.8.0_91-b14 on Mac OS X 10.11.5
1202+
Intel(R) Core(TM) i7-4980HQ CPU @ 2.80GHz
1203+
1204+
Partial results:
1205+
1206+
Num. Distinct Keys No Fast Hashmap Vectorized Row-based
1207+
1 23 14 12
1208+
8 25 13 14
1209+
64 24 13 14
1210+
512 27 15 14
1211+
4096 29 18 15
1212+
32768 39 17 16
1213+
65536 46 19 15
1214+
131072 65 38 35
1215+
262144 91 74 86
1216+
524288 119 93 95
1217+
Unit: ns/row
1218+
*/
1219+
}
1220+
10811221
ignore("TPCDS mini-scale benchmark") {
10821222
/*
10831223
Java HotSpot(TM) 64-Bit Server VM 1.8.0_91-b14 on Mac OS X 10.11.5
@@ -1177,4 +1317,4 @@ class AggregateBenchmark extends BenchmarkBase {
11771317
q24b 962 959 (1.0x) 981 (0.9x)
11781318
*/
11791319
}
1180-
}
1320+
}

0 commit comments

Comments
 (0)