Skip to content

Commit 441aafc

Browse files
lw-linueshin
authored andcommitted
[SPARK-16845][SQL] GeneratedClass$SpecificOrdering grows beyond 64 KB
Prior to this patch, we'll generate `compare(...)` for `GeneratedClass$SpecificOrdering` like below, leading to Janino exceptions saying the code grows beyond 64 KB. ``` scala /* 005 */ class SpecificOrdering extends o.a.s.sql.catalyst.expressions.codegen.BaseOrdering { /* ..... */ ... /* 10969 */ private int compare(InternalRow a, InternalRow b) { /* 10970 */ InternalRow i = null; // Holds current row being evaluated. /* 10971 */ /* 1.... */ code for comparing field0 /* 1.... */ code for comparing field1 /* 1.... */ ... /* 1.... */ code for comparing field449 /* 15012 */ /* 15013 */ return 0; /* 15014 */ } /* 15015 */ } ``` This patch would break `compare(...)` into smaller `compare_xxx(...)` methods when necessary; then we'll get generated `compare(...)` like: ``` scala /* 001 */ public SpecificOrdering generate(Object[] references) { /* 002 */ return new SpecificOrdering(references); /* 003 */ } /* 004 */ /* 005 */ class SpecificOrdering extends o.a.s.sql.catalyst.expressions.codegen.BaseOrdering { /* 006 */ /* 007 */ ... /* 1.... */ /* 11290 */ private int compare_0(InternalRow a, InternalRow b) { /* 11291 */ InternalRow i = null; // Holds current row being evaluated. /* 11292 */ /* 11293 */ i = a; /* 11294 */ boolean isNullA; /* 11295 */ UTF8String primitiveA; /* 11296 */ { /* 11297 */ /* 11298 */ Object obj = ((Expression) references[0]).eval(null); /* 11299 */ UTF8String value = (UTF8String) obj; /* 11300 */ isNullA = false; /* 11301 */ primitiveA = value; /* 11302 */ } /* 11303 */ i = b; /* 11304 */ boolean isNullB; /* 11305 */ UTF8String primitiveB; /* 11306 */ { /* 11307 */ /* 11308 */ Object obj = ((Expression) references[0]).eval(null); /* 11309 */ UTF8String value = (UTF8String) obj; /* 11310 */ isNullB = false; /* 11311 */ primitiveB = value; /* 11312 */ } /* 11313 */ if (isNullA && isNullB) { /* 11314 */ // Nothing /* 11315 */ } else if (isNullA) { /* 11316 */ return -1; /* 11317 */ } else if (isNullB) { /* 11318 */ return 1; /* 11319 */ } else { /* 11320 */ int comp = primitiveA.compare(primitiveB); /* 11321 */ if (comp != 0) { /* 11322 */ return comp; /* 11323 */ } /* 11324 */ } /* 11325 */ /* 11326 */ /* 11327 */ i = a; /* 11328 */ boolean isNullA1; /* 11329 */ UTF8String primitiveA1; /* 11330 */ { /* 11331 */ /* 11332 */ Object obj1 = ((Expression) references[1]).eval(null); /* 11333 */ UTF8String value1 = (UTF8String) obj1; /* 11334 */ isNullA1 = false; /* 11335 */ primitiveA1 = value1; /* 11336 */ } /* 11337 */ i = b; /* 11338 */ boolean isNullB1; /* 11339 */ UTF8String primitiveB1; /* 11340 */ { /* 11341 */ /* 11342 */ Object obj1 = ((Expression) references[1]).eval(null); /* 11343 */ UTF8String value1 = (UTF8String) obj1; /* 11344 */ isNullB1 = false; /* 11345 */ primitiveB1 = value1; /* 11346 */ } /* 11347 */ if (isNullA1 && isNullB1) { /* 11348 */ // Nothing /* 11349 */ } else if (isNullA1) { /* 11350 */ return -1; /* 11351 */ } else if (isNullB1) { /* 11352 */ return 1; /* 11353 */ } else { /* 11354 */ int comp = primitiveA1.compare(primitiveB1); /* 11355 */ if (comp != 0) { /* 11356 */ return comp; /* 11357 */ } /* 11358 */ } /* 1.... */ /* 1.... */ ... /* 1.... */ /* 12652 */ return 0; /* 12653 */ } /* 1.... */ /* 1.... */ ... /* 15387 */ /* 15388 */ public int compare(InternalRow a, InternalRow b) { /* 15389 */ /* 15390 */ int comp_0 = compare_0(a, b); /* 15391 */ if (comp_0 != 0) { /* 15392 */ return comp_0; /* 15393 */ } /* 15394 */ /* 15395 */ int comp_1 = compare_1(a, b); /* 15396 */ if (comp_1 != 0) { /* 15397 */ return comp_1; /* 15398 */ } /* 1.... */ /* 1.... */ ... /* 1.... */ /* 15450 */ return 0; /* 15451 */ } /* 15452 */ } ``` - a new added test case which - would fail prior to this patch - would pass with this patch - ordering correctness should already be covered by existing tests like those in `OrderingSuite` A major part of this PR - the refactoring work of `splitExpression()` - has been done by ueshin. Author: Liwei Lin <[email protected]> Author: Takuya UESHIN <[email protected]> Author: Takuya Ueshin <[email protected]> Closes #15480 from lw-lin/spec-ordering-64k-.
1 parent a50ef3d commit 441aafc

File tree

3 files changed

+62
-7
lines changed

3 files changed

+62
-7
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -364,6 +364,27 @@ class CodeGenContext {
364364
* @param expressions the codes to evaluate expressions.
365365
*/
366366
def splitExpressions(row: String, expressions: Seq[String]): String = {
367+
splitExpressions(expressions, "apply", ("InternalRow", row) :: Nil)
368+
}
369+
370+
/**
371+
* Splits the generated code of expressions into multiple functions, because function has
372+
* 64kb code size limit in JVM
373+
*
374+
* @param expressions the codes to evaluate expressions.
375+
* @param funcName the split function name base.
376+
* @param arguments the list of (type, name) of the arguments of the split function.
377+
* @param returnType the return type of the split function.
378+
* @param makeSplitFunction makes split function body, e.g. add preparation or cleanup.
379+
* @param foldFunctions folds the split function calls.
380+
*/
381+
def splitExpressions(
382+
expressions: Seq[String],
383+
funcName: String,
384+
arguments: Seq[(String, String)],
385+
returnType: String = "void",
386+
makeSplitFunction: String => String = identity,
387+
foldFunctions: Seq[String] => String = _.mkString("", ";\n", ";")): String = {
367388
val blocks = new ArrayBuffer[String]()
368389
val blockBuilder = new StringBuilder()
369390
for (code <- expressions) {
@@ -380,19 +401,20 @@ class CodeGenContext {
380401
// inline execution if only one block
381402
blocks.head
382403
} else {
383-
val apply = freshName("apply")
404+
val func = freshName(funcName)
405+
val argString = arguments.map { case (t, name) => s"$t $name" }.mkString(", ")
384406
val functions = blocks.zipWithIndex.map { case (body, i) =>
385-
val name = s"${apply}_$i"
407+
val name = s"${func}_$i"
386408
val code = s"""
387-
|private void $name(InternalRow $row) {
388-
| $body
409+
|private $returnType $name($argString) {
410+
| ${makeSplitFunction(body)}
389411
|}
390412
""".stripMargin
391413
addNewFunction(name, code)
392414
name
393415
}
394416

395-
functions.map(name => s"$name($row);").mkString("\n")
417+
foldFunctions(functions.map(name => s"$name(${arguments.map(_._2).mkString(", ")})"))
396418
}
397419
}
398420

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -103,8 +103,31 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR
103103
}
104104
}
105105
"""
106-
}.mkString("\n")
107-
comparisons
106+
}
107+
108+
ctx.splitExpressions(
109+
expressions = comparisons,
110+
funcName = "compare",
111+
arguments = Seq(("InternalRow", "a"), ("InternalRow", "b")),
112+
returnType = "int",
113+
makeSplitFunction = { body =>
114+
s"""
115+
InternalRow ${ctx.INPUT_ROW} = null; // Holds current row being evaluated.
116+
$body
117+
return 0;
118+
"""
119+
},
120+
foldFunctions = { funCalls =>
121+
funCalls.zipWithIndex.map { case (funCall, i) =>
122+
val comp = ctx.freshName("comp")
123+
s"""
124+
int $comp = $funCall;
125+
if ($comp != 0) {
126+
return $comp;
127+
}
128+
"""
129+
}.mkString
130+
})
108131
}
109132

110133
protected def create(ordering: Seq[SortOrder]): BaseOrdering = {

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,4 +121,14 @@ class OrderingSuite extends SparkFunSuite with ExpressionEvalHelper {
121121
}
122122
}
123123
}
124+
125+
test("SPARK-16845: GeneratedClass$SpecificOrdering grows beyond 64 KB") {
126+
val sortOrder = Literal("abc").asc
127+
128+
// this is passing prior to SPARK-16845, and it should also be passing after SPARK-16845
129+
GenerateOrdering.generate(Array.fill(40)(sortOrder))
130+
131+
// verify that we can support up to 5000 ordering comparisons, which should be sufficient
132+
GenerateOrdering.generate(Array.fill(5000)(sortOrder))
133+
}
124134
}

0 commit comments

Comments
 (0)