Committing to fix code review issues.

Salil Surendran · Salil Surendran · commit ecf9f34addb7 · 2017-02-03T14:54:11.000-08:00
diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md
@@ -1300,11 +1300,28 @@ Configuration of in-memory caching can be done using the `setConf` method on `Sp
 
 </table>
 
+## QueryExecutionListener Options
+Use this configuration option to attach query execution listeners
+
+<table class="table">
+  <tr><th>Property Name</th><th>Default</th><th>Meaning</th></tr>
+  <tr>
+    <td><code>spark.sql.queryExecutionListeners</code></td>
+    <td></td>
+    <td>
+      A comma-separated list of classes that implement QueryExecutionListener. When creating a SparkSession,
+      instances of these listeners will be added to it. These classes needs to have a zero-argument
+      constructor. If the specified class can't be found or the class specified doesn't have a valid
+      constructor the SparkSession creation will fail with an exception.
+    </td>
+  </tr>
+ </table>
+
 ## Other Configuration Options
 
-The following options can also be used to tune the performance of query execution and attaching
-query execution listeners. It is possible that these options will be deprecated in future release as
-more optimizations are performed automatically.
+The following options can also be used to tune the performance of query execution. It is possible
+that these options will be deprecated in future release as more optimizations are performed
+automatically.
 
 <table class="table">
   <tr><th>Property Name</th><th>Default</th><th>Meaning</th></tr>
@@ -1351,16 +1368,6 @@ more optimizations are performed automatically.
       Configures the number of partitions to use when shuffling data for joins or aggregations.
     </td>
   </tr>
-  <tr>
-      <td><code>spark.sql.queryExecutionListeners</code></td>
-      <td></td>
-      <td>
-        A comma-separated list of classes that implement QueryExecutionListener. When creating a SparkSession,
-        instances of these listeners will be added to it. These classes needs to have a zero-argument
-        constructor. If the specified class can't be found or the class specified doesn't have a valid
-        constructor the SparkSession creation will fail with an exception.
-      </td>
-    </tr>
 </table>
 
 # Distributed SQL Engine
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
@@ -192,16 +192,16 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
   }
 
   /**
-   * Executes the query and calls the {@link org.apache.spark.sql.util.QueryExecutionListener}
-   * methods.
+   * Wrap a DataFrameWriter action to track the query execution and time cost, then report to the
+   * user-registered callback functions.
    *
    * @param funcName A identifier for the method executing the query
-   * @param qe the @see [[QueryExecution]] object associated with the query
+   * @param qe the @see `QueryExecution` object associated with the query
    * @param outputParams The output parameters useful for query analysis
    * @param action the function that executes the query after which the listener methods gets
    *               called.
    */
-  private def executeAndCallQEListener(
+  private def withAction(
       funcName: String,
       qe: QueryExecution,
       outputParams: OutputParams)(action: => Unit) = {
@@ -250,11 +250,8 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
       case "jdbc" => extraOptions.get("dbtable")
       case _ => extraOptions.get("path")
     }
-
-    executeAndCallQEListener(
-      "save",
-      df.queryExecution,
-      OutputParams(source, destination, extraOptions.toMap)) {
+    val outputParams = OutputParams(source, destination, extraOptions.toMap)
+    withAction("save", df.queryExecution, outputParams) {
       dataSource.write(mode, df)
     }
   }
@@ -282,11 +279,6 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
    *
    * Because it inserts data to an existing table, format or options will be ignored.
    *
-   * Calls the callback methods of @see[[QueryExecutionListener]] after query execution with
-   * @see[[OutputParams]] having datasourceType set as the string parameter passed to the
-   * @see[[DataFrameWriter#format]] method and destination set as the name of the table into which
-   * data is being inserted into.
-   *
    * @since 1.4.0
    */
   def insertInto(tableName: String): Unit = {
@@ -311,12 +303,8 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
         child = df.logicalPlan,
         overwrite = mode == SaveMode.Overwrite,
         ifNotExists = false))
-    executeAndCallQEListener(
-      "insertInto",
-      qe,
-      new OutputParams(source, Some(tableIdent.unquotedString), extraOptions.toMap)) {
-        qe.toRdd
-    }
+    val outputParams = OutputParams(source, Some(tableIdent.unquotedString), extraOptions.toMap)
+    withAction("insertInto", qe, outputParams)(qe.toRdd)
   }
 
   private def normalizedParCols: Option[Seq[String]] = partitioningColumns.map { cols =>
@@ -408,10 +396,6 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
    * like Hive will be able to read this table. Otherwise, the table is persisted in a Spark SQL
    * specific format.
    *
-   * Calls the callback methods of @see[[QueryExecutionListener]] after query execution with a
-   * @see[[OutputParams]] object having datasourceType set as the string parameter passed to the
-   * @see[[DataFrameWriter#format]] and destination set as the name of the table being
-   * written to
    * @since 1.4.0
    */
   def saveAsTable(tableName: String): Unit = {
@@ -483,12 +467,8 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
     )
     val qe = df.sparkSession.sessionState.executePlan(
       CreateTable(tableDesc, mode, Some(df.logicalPlan)))
-    executeAndCallQEListener(
-      "saveAsTable",
-      qe,
-      new OutputParams(source, Some(tableIdent.unquotedString), extraOptions.toMap)) {
-      qe.toRdd
-    }
+    val outputParams = new OutputParams(source, Some(tableIdent.unquotedString), extraOptions.toMap)
+    withAction("saveAsTable", qe, outputParams)(qe.toRdd)
   }
 
   /**
@@ -552,9 +532,6 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
    * indicates a timestamp format. Custom date formats follow the formats at
    * `java.text.SimpleDateFormat`. This applies to timestamp type.</li>
    * </ul>
-   * Calls the callback methods in @see[[QueryExecutionListener]] methods after query execution with
-   * @see[[OutputParams]] having datasourceType set as string constant "json" and
-   * destination set as the path to which the data is written
    *
    * @since 1.4.0
    */
@@ -576,9 +553,6 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
    * shorten names(none, `snappy`, `gzip`, and `lzo`). This will override
    * `spark.sql.parquet.compression.codec`.</li>
    * </ul>
-   * Calls the callback methods in @see[[QueryExecutionListener]] methods after query execution with
-   * @see[[OutputParams]] having datasourceType set as string constant "parquet" and
-   * destination set as the path to which the data is written
    *
    * @since 1.4.0
    */
@@ -599,9 +573,6 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
    * one of the known case-insensitive shorten names(`none`, `snappy`, `zlib`, and `lzo`).
    * This will override `orc.compress`.</li>
    * </ul>
-   * Calls the callback methods in @see[[QueryExecutionListener]] methods after query execution with
-   * @see[[OutputParams]] having datasourceType set as string constant "orc" and
-   * destination set as the path to which the data is written
    *
    * @since 1.5.0
    * @note Currently, this method can only be used after enabling Hive support
@@ -628,9 +599,6 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
    * one of the known case-insensitive shorten names (`none`, `bzip2`, `gzip`, `lz4`,
    * `snappy` and `deflate`). </li>
    * </ul>
-   * Calls the callback methods in e@see[[QueryExecutionListener]] methods after query execution
-   * with @see[[OutputParams]] having datasourceType set as string constant "text" and
-   * destination set as the path to which the data is written
    *
    * @since 1.6.0
    */
@@ -670,9 +638,6 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
    * indicates a timestamp format. Custom date formats follow the formats at
    * `java.text.SimpleDateFormat`. This applies to timestamp type.</li>
    * </ul>
-   * Calls the callback methods in @see[[QueryExecutionListener]] methods after query execution with
-   * @see[[OutputParams]] having datasourceType set as string constant "csv" and
-   * destination set as the path to which the data is written
    *
    * @since 2.0.0
    */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -40,7 +40,7 @@ import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, Range}
 import org.apache.spark.sql.execution._
 import org.apache.spark.sql.execution.datasources.LogicalRelation
 import org.apache.spark.sql.execution.ui.SQLListener
-import org.apache.spark.sql.internal.{CatalogImpl, SessionState, SharedState, SQLConf}
+import org.apache.spark.sql.internal._
 import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION
 import org.apache.spark.sql.sources.BaseRelation
 import org.apache.spark.sql.streaming._
@@ -897,7 +897,7 @@ object SparkSession {
   }
 
   private def createQueryExecutionListeners(conf: SparkConf): Seq[QueryExecutionListener] = {
-    conf.get(SQLConf.QUERY_EXECUTION_LISTENERS)
+    conf.get(StaticSQLConf.QUERY_EXECUTION_LISTENERS)
       .map(Utils.classForName(_))
       .map(_.newInstance().asInstanceOf[QueryExecutionListener])
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -660,21 +660,12 @@ object SQLConf {
       .booleanConf
       .createWithDefault(false)
 
-
-  val QUERY_EXECUTION_LISTENERS =
-    ConfigBuilder("spark.sql.queryExecutionListeners")
-      .doc("QueryExecutionListeners to be attached to the SparkSession")
-      .stringConf
-      .toSequence
-      .createWithDefault(Nil)
-
   val SESSION_LOCAL_TIMEZONE =
     SQLConfigBuilder("spark.sql.session.timeZone")
       .doc("""The ID of session local timezone, e.g. "GMT", "America/Los_Angeles", etc.""")
       .stringConf
       .createWithDefault(TimeZone.getDefault().getID())
 
-
   object Deprecated {
     val MAPRED_REDUCE_TASKS = "mapred.reduce.tasks"
   }
@@ -1056,4 +1047,14 @@ object StaticSQLConf {
       "SQL configuration and the current database.")
     .booleanConf
     .createWithDefault(false)
+
+  val QUERY_EXECUTION_LISTENERS = buildConf("spark.sql.queryExecutionListeners")
+    .doc("A comma-separated list of classes that implement QueryExecutionListener. When creating " +
+      "a SparkSession, instances of these listeners will be added to it. These classes " +
+      "needs to have a zero-argument constructor. If the specified class can't be found or" +
+      " the class specified doesn't have a valid constructor the SparkSession creation " +
+      "will fail with an exception.")
+    .stringConf
+    .toSequence
+    .createWithDefault(Nil)
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/util/QueryExecutionListener.scala b/sql/core/src/main/scala/org/apache/spark/sql/util/QueryExecutionListener.scala
@@ -45,7 +45,7 @@ trait QueryExecutionListener {
    *           physical plan, etc.
    * @param durationNs the execution time for this query in nanoseconds.
    * @param outputParams The output parameters in case the method is invoked as a result of a
-   *                     write operation. In case of a read will be @see[[None]]
+   *                     write operation. In case of a read will be @see `None`
    */
   @DeveloperApi
   def onSuccess(
@@ -61,7 +61,7 @@ trait QueryExecutionListener {
    *           physical plan, etc.
    * @param exception the exception that failed this query.
    * @param outputParams The output parameters in case the method is invoked as a result of a
-   *                     write operation. In case of a read will be @see[[None]]
+   *                     write operation. In case of a read will be @see `None`
    *
    * @note This can be invoked by multiple different threads.
    */
@@ -75,13 +75,14 @@ trait QueryExecutionListener {
 
 /**
  * Contains extra information useful for query analysis passed on from the methods in
- * @see[[org.apache.spark.sql.DataFrameWriter]] while writing to a datasource
+ * @see `org.apache.spark.sql.DataFrameWriter` while writing to a datasource
  * @param datasourceType type of data source written to like csv, parquet, json, hive, jdbc etc.
  * @param destination path or table name written to
  * @param options the map containing the output options for the underlying datasource
- *                specified by using the @see [[org.apache.spark.sql.DataFrameWriter#option]] method
+ *                specified by using the @see `org.apache.spark.sql.DataFrameWriter#option` method
  * @param writeParams will contain any extra information that the write method wants to provide
  */
+@DeveloperApi
 case class OutputParams(
     datasourceType: String,
     destination: Option[String],
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SparkSQLQueryExecutionListenerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SparkSQLQueryExecutionListenerSuite.scala
@@ -25,7 +25,7 @@ import org.apache.spark.sql.util.{OutputParams, QueryExecutionListener}
 
 /**
  * Test cases for the property 'spark.sql.queryExecutionListeners' that adds the
- * @see[[QueryExecutionListener]] to a @see[[SparkSession]]
+ * @see `QueryExecutionListener` to a @see `SparkSession`
  */
 class SparkSQLQueryExecutionListenerSuite
     extends SparkFunSuite
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/util/DataFrameCallbackSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/util/DataFrameCallbackSuite.scala
@@ -174,7 +174,6 @@ class DataFrameCallbackSuite extends QueryTest with SharedSQLContext {
       Seq(1 -> 100).toDF("x", "y").write.saveAsTable("bar")
     }
     assert(onWriteSuccessCalled)
-    spark.listenerManager.clear()
   }
 
   private def callSave(source: String, callSaveFunction: (DataFrame, String) => Unit): Unit = {
@@ -184,7 +183,6 @@ class DataFrameCallbackSuite extends QueryTest with SharedSQLContext {
       callSaveFunction(Seq(1 -> 100).toDF("x", "y"), path.getAbsolutePath)
     }
     assert(testQueryExecutionListener.onWriteSuccessCalled)
-    spark.listenerManager.clear()
   }
 
   // TODO: Currently some LongSQLMetric use -1 as initial value, so if the accumulator is never
@@ -265,4 +263,9 @@ class DataFrameCallbackSuite extends QueryTest with SharedSQLContext {
     }
   }
 
+  protected override def afterEach(): Unit = {
+    super.afterEach()
+    spark.listenerManager.clear()
+  }
+
 }

Original file line number	Diff line number	Diff line change
`@@ -174,7 +174,6 @@ class DataFrameCallbackSuite extends QueryTest with SharedSQLContext {`
`174`	`174`	`Seq(1 -> 100).toDF("x", "y").write.saveAsTable("bar")`
`175`	`175`	`}`
`176`	`176`	`assert(onWriteSuccessCalled)`
`177`		`- spark.listenerManager.clear()`
`178`	`177`	`}`
`179`	`178`
`180`	`179`	`private def callSave(source: String, callSaveFunction: (DataFrame, String) => Unit): Unit = {`
`@@ -184,7 +183,6 @@ class DataFrameCallbackSuite extends QueryTest with SharedSQLContext {`
`184`	`183`	`callSaveFunction(Seq(1 -> 100).toDF("x", "y"), path.getAbsolutePath)`
`185`	`184`	`}`
`186`	`185`	`assert(testQueryExecutionListener.onWriteSuccessCalled)`
`187`		`- spark.listenerManager.clear()`
`188`	`186`	`}`
`189`	`187`
`190`	`188`	`// TODO: Currently some LongSQLMetric use -1 as initial value, so if the accumulator is never`
`@@ -265,4 +263,9 @@ class DataFrameCallbackSuite extends QueryTest with SharedSQLContext {`
`265`	`263`	`}`
`266`	`264`	`}`
`267`	`265`
	`266`	`+ protected override def afterEach(): Unit = {`
	`267`	`+ super.afterEach()`
	`268`	`+ spark.listenerManager.clear()`
	`269`	`+ }`
	`270`	`+`
`268`	`271`	`}`