apache
diff --git a/‎.github/workflows/build_and_test.yml‎
Lines changed: 7 additions & 1 deletion b/‎.github/workflows/build_and_test.yml‎
Lines changed: 7 additions & 1 deletion
diff --git a/‎common/tags/src/test/java/org/apache/spark/tags/DedicatedJVMTest.java‎
Lines changed: 30 additions & 0 deletions b/‎common/tags/src/test/java/org/apache/spark/tags/DedicatedJVMTest.java‎
Lines changed: 30 additions & 0 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/internal/config/package.scala‎
Lines changed: 3 additions & 2 deletions b/‎core/src/main/scala/org/apache/spark/internal/config/package.scala‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/resource/ExecutorResourceRequest.scala‎
Lines changed: 2 additions & 2 deletions b/‎core/src/main/scala/org/apache/spark/resource/ExecutorResourceRequest.scala‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/resource/ExecutorResourceRequests.scala‎
Lines changed: 12 additions & 0 deletions b/‎core/src/main/scala/org/apache/spark/resource/ExecutorResourceRequests.scala‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/resource/ResourceAllocator.scala‎
Lines changed: 1 addition & 1 deletion b/‎core/src/main/scala/org/apache/spark/resource/ResourceAllocator.scala‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎core/src/main/scala/org/apache/spark/resource/ResourceProfile.scala‎
Lines changed: 34 additions & 3 deletions b/‎core/src/main/scala/org/apache/spark/resource/ResourceProfile.scala‎
Lines changed: 34 additions & 3 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/resource/ResourceProfileBuilder.scala‎
Lines changed: 16 additions & 4 deletions b/‎core/src/main/scala/org/apache/spark/resource/ResourceProfileBuilder.scala‎
Lines changed: 16 additions & 4 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/resource/TaskResourceRequest.scala‎
Lines changed: 8 additions & 2 deletions b/‎core/src/main/scala/org/apache/spark/resource/TaskResourceRequest.scala‎
Lines changed: 8 additions & 2 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/resource/TaskResourceRequests.scala‎
Lines changed: 12 additions & 1 deletion b/‎core/src/main/scala/org/apache/spark/resource/TaskResourceRequests.scala‎
Lines changed: 12 additions & 1 deletion
@@ -61,6 +61,12 @@ jobs:
             excluded-tags: org.apache.spark.tags.SlowHiveTest
             comment: "- other tests"
           # SQL tests
+          - modules: sql
+            java: 8
+            hadoop: hadoop3.2
+            hive: hive2.3
+            included-tags: org.apache.spark.tags.DedicatedJVMTest
+            comment: "- dedicated JVM tests"
           - modules: sql
             java: 8
             hadoop: hadoop3.2
@@ -71,7 +77,7 @@ jobs:
             java: 8
             hadoop: hadoop3.2
             hive: hive2.3
-            excluded-tags: org.apache.spark.tags.ExtendedSQLTest
+            excluded-tags: org.apache.spark.tags.DedicatedJVMTest,org.apache.spark.tags.ExtendedSQLTest
             comment: "- other tests"
     env:
       MODULES_TO_TEST: ${{ matrix.modules }}
 
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.tags;
+
+import org.scalatest.TagAnnotation;
+
+import java.lang.annotation.ElementType;
+import java.lang.annotation.Retention;
+import java.lang.annotation.RetentionPolicy;
+import java.lang.annotation.Target;
+
+@TagAnnotation
+@Retention(RetentionPolicy.RUNTIME)
+@Target({ElementType.METHOD, ElementType.TYPE})
+public @interface DedicatedJVMTest { }
@@ -1726,9 +1726,10 @@ package object config {
     ConfigBuilder("spark.eventLog.compression.codec")
       .doc("The codec used to compress event log. By default, Spark provides four codecs: " +
         "lz4, lzf, snappy, and zstd. You can also use fully qualified class names to specify " +
-        "the codec. If this is not given, spark.io.compression.codec will be used.")
+        "the codec.")
       .version("3.0.0")
-      .fallbackConf(IO_COMPRESSION_CODEC)
+      .stringConf
+      .createWithDefault("zstd")
 
   private[spark] val BUFFER_SIZE =
     ConfigBuilder("spark.buffer.size")
 
@@ -20,7 +20,7 @@ package org.apache.spark.resource
 import org.apache.spark.annotation.{Evolving, Since}
 
 /**
- * An Executor resource request. This is used in conjunction with the ResourceProfile to
+ * An Executor resource request. This is used in conjunction with the [[ResourceProfile]] to
  * programmatically specify the resources needed for an RDD that will be applied at the
  * stage level.
  *
@@ -39,7 +39,7 @@ import org.apache.spark.annotation.{Evolving, Since}
  *
  * See the configuration and cluster specific docs for more details.
  *
- * Use ExecutorResourceRequests class as a convenience API.
+ * Use [[ExecutorResourceRequests]] class as a convenience API.
  *
  * @param resourceName Name of the resource
  * @param amount Amount requesting
 
@@ -37,12 +37,19 @@ class ExecutorResourceRequests() extends Serializable {
 
   private val _executorResources = new ConcurrentHashMap[String, ExecutorResourceRequest]()
 
+  /**
+   * Returns all the resource requests for the task.
+   */
   def requests: Map[String, ExecutorResourceRequest] = _executorResources.asScala.toMap
 
+  /**
+   * (Java-specific) Returns all the resource requests for the executor.
+   */
   def requestsJMap: JMap[String, ExecutorResourceRequest] = requests.asJava
 
   /**
    * Specify heap memory. The value specified will be converted to MiB.
+   * This is a convenient API to add [[ExecutorResourceRequest]] for "memory" resource.
    *
    * @param amount Amount of memory. In the same format as JVM memory strings (e.g. 512m, 2g).
    *               Default unit is MiB if not specified.
@@ -57,6 +64,7 @@ class ExecutorResourceRequests() extends Serializable {
   /**
    * Specify off heap memory. The value specified will be converted to MiB.
    * This value only take effect when MEMORY_OFFHEAP_ENABLED is true.
+   * This is a convenient API to add [[ExecutorResourceRequest]] for "offHeap" resource.
    *
    * @param amount Amount of memory. In the same format as JVM memory strings (e.g. 512m, 2g).
    *               Default unit is MiB if not specified.
@@ -70,6 +78,7 @@ class ExecutorResourceRequests() extends Serializable {
 
   /**
    * Specify overhead memory. The value specified will be converted to MiB.
+   * This is a convenient API to add [[ExecutorResourceRequest]] for "memoryOverhead" resource.
    *
    * @param amount Amount of memory. In the same format as JVM memory strings (e.g. 512m, 2g).
    *               Default unit is MiB if not specified.
@@ -83,6 +92,7 @@ class ExecutorResourceRequests() extends Serializable {
 
   /**
    * Specify pyspark memory. The value specified will be converted to MiB.
+   * This is a convenient API to add [[ExecutorResourceRequest]] for "pyspark.memory" resource.
    *
    * @param amount Amount of memory. In the same format as JVM memory strings (e.g. 512m, 2g).
    *               Default unit is MiB if not specified.
@@ -96,6 +106,7 @@ class ExecutorResourceRequests() extends Serializable {
 
   /**
    * Specify number of cores per Executor.
+   * This is a convenient API to add [[ExecutorResourceRequest]] for "cores" resource.
    *
    * @param amount Number of cores to allocate per Executor.
    */
@@ -111,6 +122,7 @@ class ExecutorResourceRequests() extends Serializable {
    *  like GPUs are gpu (spark configs spark.executor.resource.gpu.*). If you pass in a resource
    *  that the cluster manager doesn't support the result is undefined, it may error or may just
    *  be ignored.
+   *  This is a convenient API to add [[ExecutorResourceRequest]] for custom resources.
    *
    * @param resourceName Name of the resource.
    * @param amount amount of that resource per executor to use.
 
@@ -25,7 +25,7 @@ import org.apache.spark.SparkException
  * Trait used to help executor/worker allocate resources.
  * Please note that this is intended to be used in a single thread.
  */
-trait ResourceAllocator {
+private[spark] trait ResourceAllocator {
 
   protected def resourceName: String
   protected def resourceAddresses: Seq[String]
 
@@ -35,7 +35,13 @@ import org.apache.spark.util.Utils
  * Resource profile to associate with an RDD. A ResourceProfile allows the user to
  * specify executor and task requirements for an RDD that will get applied during a
  * stage. This allows the user to change the resource requirements between stages.
- * This is meant to be immutable so user can't change it after building.
+ * This is meant to be immutable so user can't change it after building. Users
+ * should use [[ResourceProfileBuilder]] to build it.
+ *
+ * @param executorResources Resource requests for executors. Mapped from the resource
+ *                          name (e.g., cores, memory, CPU) to its specific request.
+ * @param taskResources Resource requests for tasks. Mapped from the resource
+ *                      name (e.g., cores, memory, CPU) to its specific request.
  */
 @Evolving
 @Since("3.1.0")
@@ -53,6 +59,9 @@ class ResourceProfile(
   private var _maxTasksPerExecutor: Option[Int] = None
   private var _coresLimitKnown: Boolean = false
 
+  /**
+   * A unique id of this ResourceProfile
+   */
   def id: Int = _id
 
   /**
@@ -242,17 +251,39 @@ class ResourceProfile(
 
 object ResourceProfile extends Logging {
   // task resources
+  /**
+   * built-in task resource: cpus
+   */
   val CPUS = "cpus"
   // Executor resources
   // Make sure add new executor resource in below allSupportedExecutorResources
+  /**
+   * built-in executor resource: cores
+   */
   val CORES = "cores"
+  /**
+   * built-in executor resource: cores
+   */
   val MEMORY = "memory"
+  /**
+   * built-in executor resource: offHeap
+   */
   val OFFHEAP_MEM = "offHeap"
+  /**
+   * built-in executor resource: memoryOverhead
+   */
   val OVERHEAD_MEM = "memoryOverhead"
+  /**
+   * built-in executor resource: pyspark.memory
+   */
   val PYSPARK_MEM = "pyspark.memory"
 
-  // all supported spark executor resources (minus the custom resources like GPUs/FPGAs)
-  val allSupportedExecutorResources = Seq(CORES, MEMORY, OVERHEAD_MEM, PYSPARK_MEM, OFFHEAP_MEM)
+  /**
+   * Return all supported Spark built-in executor resources, custom resources like GPUs/FPGAs
+   * are excluded.
+   */
+  def allSupportedExecutorResources: Array[String] =
+    Array(CORES, MEMORY, OVERHEAD_MEM, PYSPARK_MEM, OFFHEAP_MEM)
 
   val UNKNOWN_RESOURCE_PROFILE_ID = -1
   val DEFAULT_RESOURCE_PROFILE_ID = 0
 
@@ -26,17 +26,19 @@ import org.apache.spark.annotation.{Evolving, Since}
 
 
 /**
- * Resource profile builder to build a Resource profile to associate with an RDD.
- * A ResourceProfile allows the user to specify executor and task requirements for an RDD
- * that will get applied during a stage. This allows the user to change the resource
+ * Resource profile builder to build a [[ResourceProfile]] to associate with an RDD.
+ * A [[ResourceProfile]] allows the user to specify executor and task resource requirements
+ * for an RDD that will get applied during a stage. This allows the user to change the resource
  * requirements between stages.
  *
  */
 @Evolving
 @Since("3.1.0")
 class ResourceProfileBuilder() {
 
+  // Task resource requests specified by users, mapped from resource name to the request.
   private val _taskResources = new ConcurrentHashMap[String, TaskResourceRequest]()
+  // Executor resource requests specified by users, mapped from resource name to the request.
   private val _executorResources = new ConcurrentHashMap[String, ExecutorResourceRequest]()
 
   def taskResources: Map[String, TaskResourceRequest] = _taskResources.asScala.toMap
@@ -54,11 +56,21 @@ class ResourceProfileBuilder() {
     _executorResources.asScala.asJava
   }
 
+  /**
+   * Add executor resource requests
+   * @param requests The detailed executor resource requests, see [[ExecutorResourceRequests]]
+   * @return This ResourceProfileBuilder
+   */
   def require(requests: ExecutorResourceRequests): this.type = {
     _executorResources.putAll(requests.requests.asJava)
     this
   }
 
+  /**
+   * Add task resource requests
+   * @param requests The detailed task resource requests, see [[TaskResourceRequest]]
+   * @return This ResourceProfileBuilder
+   */
   def require(requests: TaskResourceRequests): this.type = {
     _taskResources.putAll(requests.requests.asJava)
     this
@@ -80,7 +92,7 @@ class ResourceProfileBuilder() {
       s"task resources: ${_taskResources.asScala.map(pair => s"${pair._1}=${pair._2.toString()}")}"
   }
 
-  def build: ResourceProfile = {
+  def build(): ResourceProfile = {
     new ResourceProfile(executorResources, taskResources)
   }
 }
 
@@ -20,11 +20,17 @@ package org.apache.spark.resource
 import org.apache.spark.annotation.{Evolving, Since}
 
 /**
- * A task resource request. This is used in conjunction with the ResourceProfile to
+ * A task resource request. This is used in conjunction with the [[ResourceProfile]] to
  * programmatically specify the resources needed for an RDD that will be applied at the
  * stage level.
  *
- * Use TaskResourceRequests class as a convenience API.
+ * Use [[TaskResourceRequests]] class as a convenience API.
+ *
+ * @param resourceName Resource name
+ * @param amount Amount requesting as a Double to support fractional resource requests.
+ *               Valid values are less than or equal to 0.5 or whole numbers. This essentially
+ *               lets you configure X number of tasks to run on a single resource,
+ *               ie amount equals 0.5 translates into 2 tasks per resource address.
  */
 @Evolving
 @Since("3.1.0")
 
@@ -36,12 +36,19 @@ class TaskResourceRequests() extends Serializable {
 
   private val _taskResources = new ConcurrentHashMap[String, TaskResourceRequest]()
 
+  /**
+   * Returns all the resource requests for the task.
+   */
   def requests: Map[String, TaskResourceRequest] = _taskResources.asScala.toMap
 
+  /**
+   * (Java-specific) Returns all the resource requests for the task.
+   */
   def requestsJMap: JMap[String, TaskResourceRequest] = requests.asJava
 
   /**
    * Specify number of cpus per Task.
+   * This is a convenient API to add [[TaskResourceRequest]] for cpus.
    *
    * @param amount Number of cpus to allocate per Task.
    */
@@ -52,7 +59,8 @@ class TaskResourceRequests() extends Serializable {
   }
 
   /**
-   *  Amount of a particular custom resource(GPU, FPGA, etc) to use.
+   * Amount of a particular custom resource(GPU, FPGA, etc) to use.
+   * This is a convenient API to add [[TaskResourceRequest]] for custom resources.
    *
    * @param resourceName Name of the resource.
    * @param amount Amount requesting as a Double to support fractional resource requests.
@@ -66,6 +74,9 @@ class TaskResourceRequests() extends Serializable {
     this
   }
 
+  /**
+   * Add a certain [[TaskResourceRequest]] to the request set.
+   */
   def addRequest(treq: TaskResourceRequest): this.type = {
     _taskResources.put(treq.resourceName, treq)
     this
Original file line number	Diff line number	Diff line change
`@@ -20,7 +20,7 @@ package org.apache.spark.resource`
`20`	`20`	`import org.apache.spark.annotation.{Evolving, Since}`
`21`	`21`
`22`	`22`	`/**`
`23`		`- * An Executor resource request. This is used in conjunction with the ResourceProfile to`
	`23`	`+ * An Executor resource request. This is used in conjunction with the [[ResourceProfile]] to`
`24`	`24`	`* programmatically specify the resources needed for an RDD that will be applied at the`
`25`	`25`	`* stage level.`
`26`	`26`	`*`
`@@ -39,7 +39,7 @@ import org.apache.spark.annotation.{Evolving, Since}`
`39`	`39`	`*`
`40`	`40`	`* See the configuration and cluster specific docs for more details.`
`41`	`41`	`*`
`42`		`- * Use ExecutorResourceRequests class as a convenience API.`
	`42`	`+ * Use [[ExecutorResourceRequests]] class as a convenience API.`
`43`	`43`	`*`
`44`	`44`	`* @param resourceName Name of the resource`
`45`	`45`	`* @param amount Amount requesting`