Skip to content

Commit 31914c0

Browse files
committed
address review comments
1 parent a9d40e9 commit 31914c0

File tree

4 files changed

+34
-23
lines changed

4 files changed

+34
-23
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala

Lines changed: 31 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ class CodegenContext {
137137
var currentVars: Seq[ExprCode] = null
138138

139139
/**
140-
* Holding expressions' mutable states like `MonotonicallyIncreasingID.count` as a
140+
* Holding expressions' inlined mutable states like `MonotonicallyIncreasingID.count` as a
141141
* 2-tuple: java type, variable name.
142142
* As an example, ("int", "count") will produce code:
143143
* {{{
@@ -150,15 +150,22 @@ class CodegenContext {
150150
val inlinedMutableStates: mutable.ArrayBuffer[(String, String)] =
151151
mutable.ArrayBuffer.empty[(String, String)]
152152

153-
// An map keyed by mutable states' types holds the status of mutableStateArray
153+
/**
154+
* The mapping between mutable state types and corrseponding compacted arrays.
155+
* The keys are java type string. The values are [[MutableStateArrays]] which encapsulates
156+
* the compacted arrays for the mutable states with the same java type.
157+
*/
154158
val arrayCompactedMutableStates: mutable.Map[String, MutableStateArrays] =
155159
mutable.Map.empty[String, MutableStateArrays]
156160

157161
// An array holds the code that will initialize each state
158162
val mutableStateInitCode: mutable.ArrayBuffer[String] =
159163
mutable.ArrayBuffer.empty[String]
160164

161-
// Holding names and current index of mutableStateArrays for a certain type
165+
/**
166+
* This class holds a set of names of mutableStateArrays that is used for compacting mutable
167+
* states for a certain type, and holds the next available slot of the current compacted array.
168+
*/
162169
class MutableStateArrays {
163170
val arrayNames = mutable.ListBuffer.empty[String]
164171
createNewArray()
@@ -169,6 +176,11 @@ class CodegenContext {
169176

170177
def getCurrentIndex: Int = currentIndex
171178

179+
/**
180+
* Returns the reference of next available slot in current compacted array. The size of each
181+
* compacted array is controlled by the config `CodeGenerator.MUTABLESTATEARRAY_SIZE_LIMIT`.
182+
* Once reaching the threshold, new compacted array is created.
183+
*/
172184
def getNextSlot(): String = {
173185
if (currentIndex < CodeGenerator.MUTABLESTATEARRAY_SIZE_LIMIT) {
174186
val res = s"${arrayNames.last}[$currentIndex]"
@@ -199,17 +211,19 @@ class CodegenContext {
199211
* compacted. Please set `true` into forceInline, if you want to access the
200212
* status fast (e.g. frequently accessed) or if you want to use the original
201213
* variable name
202-
* @param useFreshName If false and inline is true, the name is not changed
203-
* @return the name of the mutable state variable, which is either the original name if the
204-
* variable is inlined to the outer class, or an array access if the variable is to be
205-
* stored in an array of variables of the same type.
206-
* There are two use cases. One is to use the original name for global variable instead
207-
* of fresh name. Second is to use the original initialization statement since it is
208-
* complex (e.g. allocate multi-dimensional array or object constructor has varibles).
209-
* Primitive type variables will be inlined into outer class when the total number of
210-
* mutable variables is less than `CodeGenerator.OUTER_CLASS_VARIABLES_THRESHOLD`
211-
* the max size of an array for compaction is given by
212-
* `CodeGenerator.MUTABLESTATEARRAY_SIZE_LIMIT`.
214+
* @param useFreshName If this is false and forceInline is true, the name is not changed
215+
* @return the name of the mutable state variable, which is the original name or fresh name if
216+
* the variable is inlined to the outer class, or an array access if the variable is to
217+
* be stored in an array of variables of the same type.
218+
* A variable will be inlined into the outer class when one of the following conditions
219+
* are satisfied:
220+
* 1. forceInline is true
221+
* 2. its type is primitive type and the total number of the inlined mutable variables
222+
* is less than `CodeGenerator.OUTER_CLASS_VARIABLES_THRESHOLD`
223+
* 3. its type is multi-dimensional array
224+
* A primitive type variable will be inlined into outer class when the total number of
225+
* When a variable is compacted into an array, the max size of the array for compaction
226+
* is given by `CodeGenerator.MUTABLESTATEARRAY_SIZE_LIMIT`.
213227
*/
214228
def addMutableState(
215229
javaType: String,
@@ -1099,9 +1113,9 @@ class CodegenContext {
10991113
val commonExprs = equivalentExpressions.getAllEquivalentExprs.filter(_.size > 1)
11001114
commonExprs.foreach { e =>
11011115
val expr = e.head
1102-
val fnName = freshName("evalExpr")
1103-
val isNull = s"${fnName}IsNull"
1104-
val value = s"${fnName}Value"
1116+
val fnName = freshName("subExpr")
1117+
val isNull = addMutableState(JAVA_BOOLEAN, "subExprIsNull")
1118+
val value = addMutableState(javaType(expr.dataType), "subExprValue")
11051119

11061120
// Generate the code for this expression tree and wrap it in a function.
11071121
val eval = expr.genCode(this)
@@ -1127,8 +1141,6 @@ class CodegenContext {
11271141
// 2. Less code.
11281142
// Currently, we will do this for all non-leaf only expression trees (i.e. expr trees with
11291143
// at least two nodes) as the cost of doing it is expected to be low.
1130-
addMutableState(JAVA_BOOLEAN, isNull, forceInline = true, useFreshName = false)
1131-
addMutableState(javaType(expr.dataType), value, forceInline = true, useFreshName = false)
11321144

11331145
subexprFunctions += s"${addNewFunction(fnName, fn)}($INPUT_ROW);"
11341146
val state = SubExprEliminationState(isNull, value)

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -425,4 +425,3 @@ class CodeGenerationSuite extends SparkFunSuite with ExpressionEvalHelper {
425425
assert(ctx2.mutableStateInitCode.size == CodeGenerator.MUTABLESTATEARRAY_SIZE_LIMIT + 10)
426426
}
427427
}
428-

sql/core/src/main/scala/org/apache/spark/sql/execution/SortExec.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -139,9 +139,9 @@ case class SortExec(
139139
// the iterator to return sorted rows.
140140
val thisPlan = ctx.addReferenceObj("plan", this)
141141
sorterVariable = ctx.addMutableState(classOf[UnsafeExternalRowSorter].getName, "sorter",
142-
v => s"$v = $thisPlan.createSorter();")
142+
v => s"$v = $thisPlan.createSorter();", forceInline = true)
143143
val metrics = ctx.addMutableState(classOf[TaskMetrics].getName, "metrics",
144-
v => s"$v = org.apache.spark.TaskContext.get().taskMetrics();")
144+
v => s"$v = org.apache.spark.TaskContext.get().taskMetrics();", forceInline = true)
145145
val sortedIterator = ctx.addMutableState("scala.collection.Iterator<UnsafeRow>", "sortedIter",
146146
forceInline = true)
147147

sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ trait BaseLimitExec extends UnaryExecNode with CodegenSupport {
7171
}
7272

7373
override def doConsume(ctx: CodegenContext, input: Seq[ExprCode], row: ExprCode): String = {
74-
val stopEarly = ctx.addMutableState(ctx.JAVA_BOOLEAN, "stopEarly") // init as stopEarly = 0
74+
val stopEarly = ctx.addMutableState(ctx.JAVA_BOOLEAN, "stopEarly") // init as stopEarly = false
7575

7676
ctx.addNewFunction("stopEarly", s"""
7777
@Override

0 commit comments

Comments
 (0)