Skip to content

Commit 73cde01

Browse files
Move SQLConf back. Assign default sizeInBytes to SparkLogicalPlan.
1 parent 73412be commit 73cde01

File tree

4 files changed

+25
-19
lines changed

4 files changed

+25
-19
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -19,29 +19,24 @@ package org.apache.spark.sql.catalyst.plans.logical
1919

2020
import org.apache.spark.sql.catalyst.errors.TreeNodeException
2121
import org.apache.spark.sql.catalyst.expressions._
22-
import org.apache.spark.sql.catalyst.planning.SQLConf
2322
import org.apache.spark.sql.catalyst.plans.QueryPlan
2423
import org.apache.spark.sql.catalyst.types.StructType
2524
import org.apache.spark.sql.catalyst.trees
2625

27-
abstract class LogicalPlan extends QueryPlan[LogicalPlan] with SQLConf {
26+
abstract class LogicalPlan extends QueryPlan[LogicalPlan] {
2827
self: Product =>
2928

3029
// TODO: make a case class?
30+
/**
31+
* Estimates of various statistics. The default estimation logic simply sums up the corresponding
32+
* statistic produced by the children. To override this behavior, override `statistics` and
33+
* assign it a overriden version of `Statistics`.
34+
*/
3135
protected class Statistics {
3236
lazy val childrenStats = children.map(_.statistics)
33-
3437
lazy val numTuples: Long = childrenStats.map(_.numTuples).sum
35-
36-
lazy val sizeInBytes: Long = {
37-
val sum = childrenStats.map(_.sizeInBytes).sum
38-
if (sum == 0) statsDefaultSizeInBytes else sum
39-
}
38+
lazy val sizeInBytes: Long = childrenStats.map(_.sizeInBytes).sum
4039
}
41-
42-
/**
43-
* Estimates of various statistics.
44-
*/
4540
lazy val statistics: Statistics = new Statistics
4641

4742
/**

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/SQLConf.scala renamed to sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
* limitations under the License.
1616
*/
1717

18-
package org.apache.spark.sql.catalyst.planning
18+
package org.apache.spark.sql
1919

2020
import java.util.Properties
2121

@@ -40,8 +40,8 @@ private object SQLConf {
4040
trait SQLConf {
4141
import SQLConf._
4242

43-
import SQLConf._
44-
protected[spark] val settings = confSettings
43+
import org.apache.spark.sql.SQLConf._
44+
@transient protected[spark] val settings = confSettings
4545

4646
/** ************************ Spark SQL Params/Hints ******************* */
4747
// TODO: refactor so that these hints accessors don't pollute the name space of SQLContext?

sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717

1818
package org.apache.spark.sql
1919

20-
2120
import scala.language.implicitConversions
2221
import scala.reflect.runtime.universe.TypeTag
2322

@@ -30,7 +29,6 @@ import org.apache.spark.sql.catalyst.analysis._
3029
import org.apache.spark.sql.catalyst.dsl.ExpressionConversions
3130
import org.apache.spark.sql.catalyst.expressions._
3231
import org.apache.spark.sql.catalyst.optimizer.Optimizer
33-
import org.apache.spark.sql.catalyst.planning.SQLConf
3432
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
3533
import org.apache.spark.sql.catalyst.rules.RuleExecutor
3634
import org.apache.spark.sql.catalyst.types._

sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ package org.apache.spark.sql.execution
1919

2020
import org.apache.spark.annotation.DeveloperApi
2121
import org.apache.spark.rdd.RDD
22-
import org.apache.spark.sql.{Logging, Row}
22+
import org.apache.spark.sql.{Logging, Row, SQLConf}
2323
import org.apache.spark.sql.catalyst.trees
2424
import org.apache.spark.sql.catalyst.analysis.MultiInstanceRelation
2525
import org.apache.spark.sql.catalyst.expressions.GenericRow
@@ -67,7 +67,7 @@ abstract class SparkPlan extends QueryPlan[SparkPlan] with Logging {
6767
*/
6868
@DeveloperApi
6969
case class SparkLogicalPlan(alreadyPlanned: SparkPlan)
70-
extends LogicalPlan with MultiInstanceRelation {
70+
extends LogicalPlan with MultiInstanceRelation with SQLConf {
7171

7272
def output = alreadyPlanned.output
7373
override def references = Set.empty
@@ -80,6 +80,19 @@ case class SparkLogicalPlan(alreadyPlanned: SparkPlan)
8080
case _ => sys.error("Multiple instance of the same relation detected.")
8181
}).asInstanceOf[this.type]
8282
}
83+
84+
override lazy val statistics = new Statistics {
85+
// If this is wrapping around ExistingRdd and no reasonable estimation logic is implemented,
86+
// return a default value.
87+
override lazy val sizeInBytes: Long = {
88+
val defaultSum = childrenStats.map(_.sizeInBytes).sum
89+
alreadyPlanned match {
90+
case e: ExistingRdd if defaultSum == 0 => statsDefaultSizeInBytes
91+
case _ => defaultSum
92+
}
93+
}
94+
}
95+
8396
}
8497

8598
private[sql] trait LeafNode extends SparkPlan with trees.LeafNode[SparkPlan] {

0 commit comments

Comments
 (0)