Skip to content

Commit e8c4af1

Browse files
committed
Fix
1 parent 36fa198 commit e8c4af1

File tree

4 files changed

+156
-22
lines changed

4 files changed

+156
-22
lines changed
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
package org.apache.spark.sql;
18+
19+
import org.apache.spark.annotation.Unstable;
20+
21+
/**
22+
* ExplainMode is used to specify the expected output format of plans (logical and physical)
23+
* for debugging purpose.
24+
*
25+
* @since 3.0.0
26+
*/
27+
@Unstable
28+
public enum ExplainMode {
29+
/**
30+
* Simple mode means that when printing explain for a DataFrame, only a physical plan is
31+
* expected to be printed to the console.
32+
*
33+
* @since 3.0.0
34+
*/
35+
Simple,
36+
/**
37+
* Extended mode means that when printing explain for a DataFrame, both logical and physical
38+
* plans are expected to be printed to the console.
39+
*
40+
* @since 3.0.0
41+
*/
42+
Extended,
43+
/**
44+
* Extended mode means that when printing explain for a DataFrame, if generated codes are
45+
* available, a physical plan and the generated codes are expected to be printed to the console.
46+
*
47+
* @since 3.0.0
48+
*/
49+
Codegen,
50+
/**
51+
* Extended mode means that when printing explain for a DataFrame, if plan node statistics are
52+
* available, a logical plan and the statistics are expected to be printed to the console.
53+
*
54+
* @since 3.0.0
55+
*/
56+
Cost,
57+
/**
58+
* Formatted mode means that when printing explain for a DataFrame, explain output is
59+
* expected to be split into two sections: a physical plan outline and node details.
60+
*
61+
* @since 3.0.0
62+
*/
63+
Formatted
64+
}

sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala

Lines changed: 31 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -522,36 +522,58 @@ class Dataset[T] private[sql](
522522
// scalastyle:on println
523523

524524
/**
525-
* Prints the plans (logical and physical) to the console for debugging purposes.
525+
* Prints the plans (logical and physical) with a format specified by a given explain mode.
526526
*
527527
* @group basic
528-
* @since 1.6.0
528+
* @since 3.0.0
529529
*/
530-
def explain(extended: Boolean): Unit = {
530+
def explain(mode: ExplainMode): Unit = {
531531
// Because temporary views are resolved during analysis when we create a Dataset, and
532532
// `ExplainCommand` analyzes input query plan and resolves temporary views again. Using
533533
// `ExplainCommand` here will probably output different query plans, compared to the results
534534
// of evaluation of the Dataset. So just output QueryExecution's query plans here.
535535
val qe = ExplainCommandUtil.explainedQueryExecution(sparkSession, logicalPlan, queryExecution)
536536

537-
val outputString =
538-
if (extended) {
539-
qe.toString
540-
} else {
537+
val outputString = mode match {
538+
case ExplainMode.Simple =>
541539
qe.simpleString
542-
}
540+
case ExplainMode.Extended =>
541+
qe.toString
542+
case ExplainMode.Codegen =>
543+
try {
544+
org.apache.spark.sql.execution.debug.codegenString(queryExecution.executedPlan)
545+
} catch {
546+
case e: AnalysisException => e.toString
547+
}
548+
case ExplainMode.Cost =>
549+
qe.stringWithStats
550+
case ExplainMode.Formatted =>
551+
qe.simpleString(formatted = true)
552+
}
543553
// scalastyle:off println
544554
println(outputString)
545555
// scalastyle:on println
546556
}
547557

558+
/**
559+
* Prints the plans (logical and physical) to the console for debugging purposes.
560+
*
561+
* @group basic
562+
* @since 1.6.0
563+
*/
564+
def explain(extended: Boolean): Unit = if (extended) {
565+
explain(ExplainMode.Extended)
566+
} else {
567+
explain(ExplainMode.Simple)
568+
}
569+
548570
/**
549571
* Prints the physical plan to the console for debugging purposes.
550572
*
551573
* @group basic
552574
* @since 1.6.0
553575
*/
554-
def explain(): Unit = explain(extended = false)
576+
def explain(): Unit = explain(ExplainMode.Simple)
555577

556578
/**
557579
* Returns all column names and their data types as an array.

sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,7 @@ package org.apache.spark.sql.execution.command
2020
import java.util.UUID
2121

2222
import org.apache.spark.rdd.RDD
23-
import org.apache.spark.sql.{AnalysisException, SparkSession}
24-
import org.apache.spark.sql.{Row, SparkSession}
23+
import org.apache.spark.sql.{AnalysisException, Row, SparkSession}
2524
import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow}
2625
import org.apache.spark.sql.catalyst.errors.TreeNodeException
2726
import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
@@ -132,13 +131,15 @@ case class DataWritingCommandExec(cmd: DataWritingCommand, child: SparkPlan)
132131
* (but do NOT actually execute it).
133132
*
134133
* {{{
135-
* EXPLAIN (EXTENDED | CODEGEN) SELECT * FROM ...
134+
* EXPLAIN (EXTENDED | CODEGEN | COST | FORMATTED) SELECT * FROM ...
136135
* }}}
137136
*
138137
* @param logicalPlan plan to explain
139138
* @param extended whether to do extended explain or not
140139
* @param codegen whether to output generated code from whole-stage codegen or not
141140
* @param cost whether to show cost information for operators.
141+
* @param formatted whether to split explain output into two sections: a physical plan outline
142+
* and node details.
142143
*/
143144
case class ExplainCommand(
144145
logicalPlan: LogicalPlan,

sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala

Lines changed: 57 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -25,16 +25,19 @@ import org.apache.spark.sql.types.StructType
2525
class ExplainSuite extends QueryTest with SharedSparkSession {
2626
import testImplicits._
2727

28-
/**
29-
* Get the explain from a DataFrame and run the specified action on it.
30-
*/
31-
private def withNormalizedExplain(df: DataFrame, extended: Boolean)(f: String => Unit) = {
28+
private def getNormalizedExplain(df: DataFrame, mode: ExplainMode): String = {
3229
val output = new java.io.ByteArrayOutputStream()
3330
Console.withOut(output) {
34-
df.explain(extended = extended)
31+
df.explain(mode)
3532
}
36-
val normalizedOutput = output.toString.replaceAll("#\\d+", "#x")
37-
f(normalizedOutput)
33+
output.toString.replaceAll("#\\d+", "#x")
34+
}
35+
36+
/**
37+
* Get the explain from a DataFrame and run the specified action on it.
38+
*/
39+
private def withNormalizedExplain(df: DataFrame, mode: ExplainMode)(f: String => Unit) = {
40+
f(getNormalizedExplain(df, mode))
3841
}
3942

4043
/**
@@ -53,14 +56,19 @@ class ExplainSuite extends QueryTest with SharedSparkSession {
5356
/**
5457
* Runs the plan and makes sure the plans contains all of the keywords.
5558
*/
56-
private def checkKeywordsExistsInExplain(df: DataFrame, keywords: String*): Unit = {
57-
withNormalizedExplain(df, extended = true) { normalizedOutput =>
59+
private def checkKeywordsExistsInExplain(
60+
df: DataFrame, mode: ExplainMode, keywords: String*): Unit = {
61+
withNormalizedExplain(df, mode) { normalizedOutput =>
5862
for (key <- keywords) {
5963
assert(normalizedOutput.contains(key))
6064
}
6165
}
6266
}
6367

68+
private def checkKeywordsExistsInExplain(df: DataFrame, keywords: String*): Unit = {
69+
checkKeywordsExistsInExplain(df, ExplainMode.Extended, keywords: _*)
70+
}
71+
6472
test("SPARK-23034 show rdd names in RDD scan nodes (Dataset)") {
6573
val rddWithName = spark.sparkContext.parallelize(Row(1, "abc") :: Nil).setName("testRdd")
6674
val df = spark.createDataFrame(rddWithName, StructType.fromDDL("c0 int, c1 string"))
@@ -209,7 +217,7 @@ class ExplainSuite extends QueryTest with SharedSparkSession {
209217
test("SPARK-26659: explain of DataWritingCommandExec should not contain duplicate cmd.nodeName") {
210218
withTable("temptable") {
211219
val df = sql("create table temptable using parquet as select * from range(2)")
212-
withNormalizedExplain(df, extended = false) { normalizedOutput =>
220+
withNormalizedExplain(df, ExplainMode.Simple) { normalizedOutput =>
213221
assert("Create\\w*?TableAsSelectCommand".r.findAllMatchIn(normalizedOutput).length == 1)
214222
}
215223
}
@@ -262,6 +270,45 @@ class ExplainSuite extends QueryTest with SharedSparkSession {
262270
}
263271
}
264272
}
273+
274+
test("Support ExplainMode in Dataset.explain") {
275+
val df1 = Seq((1, 2), (2, 3)).toDF("k", "v1")
276+
val df2 = Seq((2, 3), (1, 1)).toDF("k", "v2")
277+
val testDf = df1.join(df2, "k").groupBy("k").agg(count("v1"), sum("v1"), avg("v2"))
278+
279+
val simpleExplainOutput = getNormalizedExplain(testDf, ExplainMode.Simple)
280+
assert(simpleExplainOutput.startsWith("== Physical Plan =="))
281+
Seq("== Parsed Logical Plan ==",
282+
"== Analyzed Logical Plan ==",
283+
"== Optimized Logical Plan ==").foreach { planType =>
284+
assert(!simpleExplainOutput.contains(planType))
285+
}
286+
checkKeywordsExistsInExplain(
287+
testDf,
288+
ExplainMode.Extended,
289+
"== Parsed Logical Plan ==" ::
290+
"== Analyzed Logical Plan ==" ::
291+
"== Optimized Logical Plan ==" ::
292+
"== Physical Plan ==" ::
293+
Nil: _*)
294+
checkKeywordsExistsInExplain(
295+
testDf,
296+
ExplainMode.Cost,
297+
"Statistics(sizeInBytes=" ::
298+
Nil: _*)
299+
checkKeywordsExistsInExplain(
300+
testDf,
301+
ExplainMode.Codegen,
302+
"WholeStageCodegen subtrees" ::
303+
"Generated code:" ::
304+
Nil: _*)
305+
checkKeywordsExistsInExplain(
306+
testDf,
307+
ExplainMode.Formatted,
308+
"* LocalTableScan (1)" ::
309+
"(1) LocalTableScan [codegen id :" ::
310+
Nil: _*)
311+
}
265312
}
266313

267314
case class ExplainSingleData(id: Int)

0 commit comments

Comments
 (0)