apache
diff --git a/‎common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleClient.java‎
Lines changed: 2 additions & 2 deletions b/‎common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleClient.java‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/OneForOneBlockFetcher.java‎
Lines changed: 6 additions & 6 deletions b/‎common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/OneForOneBlockFetcher.java‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ShuffleClient.java‎
Lines changed: 5 additions & 5 deletions b/‎common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ShuffleClient.java‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎…work/shuffle/TempShuffleFileManager.java‎ ‎…ark/network/shuffle/TempFileManager.java‎common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/TempShuffleFileManager.java renamed to common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/TempFileManager.java
Lines changed: 6 additions & 6 deletions b/‎…work/shuffle/TempShuffleFileManager.java‎ ‎…ark/network/shuffle/TempFileManager.java‎common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/TempShuffleFileManager.java renamed to common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/TempFileManager.java
Lines changed: 6 additions & 6 deletions
diff --git a/‎core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java‎
Lines changed: 5 additions & 4 deletions b/‎core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java‎
Lines changed: 5 additions & 4 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/SparkConf.scala‎
Lines changed: 3 additions & 1 deletion b/‎core/src/main/scala/org/apache/spark/SparkConf.scala‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala‎
Lines changed: 1 addition & 0 deletions b/‎core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala‎
Lines changed: 13 additions & 4 deletions b/‎core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala‎
Lines changed: 13 additions & 4 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala‎
Lines changed: 16 additions & 16 deletions b/‎core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala‎
Lines changed: 16 additions & 16 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/deploy/security/HBaseDelegationTokenProvider.scala‎
Lines changed: 3 additions & 1 deletion b/‎core/src/main/scala/org/apache/spark/deploy/security/HBaseDelegationTokenProvider.scala‎
Lines changed: 3 additions & 1 deletion
@@ -91,15 +91,15 @@ public void fetchBlocks(
       String execId,
       String[] blockIds,
       BlockFetchingListener listener,
-      TempShuffleFileManager tempShuffleFileManager) {
+      TempFileManager tempFileManager) {
     checkInit();
     logger.debug("External shuffle fetch from {}:{} (executor id {})", host, port, execId);
     try {
       RetryingBlockFetcher.BlockFetchStarter blockFetchStarter =
           (blockIds1, listener1) -> {
             TransportClient client = clientFactory.createClient(host, port);
             new OneForOneBlockFetcher(client, appId, execId,
-              blockIds1, listener1, conf, tempShuffleFileManager).start();
+              blockIds1, listener1, conf, tempFileManager).start();
           };
 
       int maxRetries = conf.maxIORetries();
 
@@ -58,7 +58,7 @@ public class OneForOneBlockFetcher {
   private final BlockFetchingListener listener;
   private final ChunkReceivedCallback chunkCallback;
   private final TransportConf transportConf;
-  private final TempShuffleFileManager tempShuffleFileManager;
+  private final TempFileManager tempFileManager;
 
   private StreamHandle streamHandle = null;
 
@@ -79,14 +79,14 @@ public OneForOneBlockFetcher(
       String[] blockIds,
       BlockFetchingListener listener,
       TransportConf transportConf,
-      TempShuffleFileManager tempShuffleFileManager) {
+      TempFileManager tempFileManager) {
     this.client = client;
     this.openMessage = new OpenBlocks(appId, execId, blockIds);
     this.blockIds = blockIds;
     this.listener = listener;
     this.chunkCallback = new ChunkCallback();
     this.transportConf = transportConf;
-    this.tempShuffleFileManager = tempShuffleFileManager;
+    this.tempFileManager = tempFileManager;
   }
 
   /** Callback invoked on receipt of each chunk. We equate a single chunk to a single block. */
@@ -125,7 +125,7 @@ public void onSuccess(ByteBuffer response) {
           // Immediately request all chunks -- we expect that the total size of the request is
           // reasonable due to higher level chunking in [[ShuffleBlockFetcherIterator]].
           for (int i = 0; i < streamHandle.numChunks; i++) {
-            if (tempShuffleFileManager != null) {
+            if (tempFileManager != null) {
               client.stream(OneForOneStreamManager.genStreamChunkId(streamHandle.streamId, i),
                 new DownloadCallback(i));
             } else {
@@ -164,7 +164,7 @@ private class DownloadCallback implements StreamCallback {
     private int chunkIndex;
 
     DownloadCallback(int chunkIndex) throws IOException {
-      this.targetFile = tempShuffleFileManager.createTempShuffleFile();
+      this.targetFile = tempFileManager.createTempFile();
       this.channel = Channels.newChannel(Files.newOutputStream(targetFile.toPath()));
       this.chunkIndex = chunkIndex;
     }
@@ -180,7 +180,7 @@ public void onComplete(String streamId) throws IOException {
       ManagedBuffer buffer = new FileSegmentManagedBuffer(transportConf, targetFile, 0,
         targetFile.length());
       listener.onBlockFetchSuccess(blockIds[chunkIndex], buffer);
-      if (!tempShuffleFileManager.registerTempShuffleFileToClean(targetFile)) {
+      if (!tempFileManager.registerTempFileToClean(targetFile)) {
         targetFile.delete();
       }
     }
 
@@ -43,18 +43,18 @@ public void init(String appId) { }
    * @param execId the executor id.
    * @param blockIds block ids to fetch.
    * @param listener the listener to receive block fetching status.
-   * @param tempShuffleFileManager TempShuffleFileManager to create and clean temp shuffle files.
-   *                               If it's not <code>null</code>, the remote blocks will be streamed
-   *                               into temp shuffle files to reduce the memory usage, otherwise,
-   *                               they will be kept in memory.
+   * @param tempFileManager TempFileManager to create and clean temp files.
+   *                        If it's not <code>null</code>, the remote blocks will be streamed
+   *                        into temp shuffle files to reduce the memory usage, otherwise,
+   *                        they will be kept in memory.
    */
   public abstract void fetchBlocks(
       String host,
       int port,
       String execId,
       String[] blockIds,
       BlockFetchingListener listener,
-      TempShuffleFileManager tempShuffleFileManager);
+      TempFileManager tempFileManager);
 
   /**
    * Get the shuffle MetricsSet from ShuffleClient, this will be used in MetricsSystem to
 
@@ -20,17 +20,17 @@
 import java.io.File;
 
 /**
- * A manager to create temp shuffle block files to reduce the memory usage and also clean temp
+ * A manager to create temp block files to reduce the memory usage and also clean temp
  * files when they won't be used any more.
  */
-public interface TempShuffleFileManager {
+public interface TempFileManager {
 
-  /** Create a temp shuffle block file. */
-  File createTempShuffleFile();
+  /** Create a temp block file. */
+  File createTempFile();
 
   /**
-   * Register a temp shuffle file to clean up when it won't be used any more. Return whether the
+   * Register a temp file to clean up when it won't be used any more. Return whether the
    * file is registered successfully. If `false`, the caller should clean up the file by itself.
    */
-  boolean registerTempShuffleFileToClean(File file);
+  boolean registerTempFileToClean(File file);
 }
@@ -172,10 +172,11 @@ public void free() {
   public void reset() {
     if (consumer != null) {
       consumer.freeArray(array);
-      // the call to consumer.allocateArray may trigger a spill
-      // which in turn access this instance and eventually re-enter this method and try to free the array again.
-      // by setting the array to null and its length to 0 we effectively make the spill code-path a no-op.
-      // setting the array to null also indicates that it has already been de-allocated which prevents a double de-allocation in free().
+      // the call to consumer.allocateArray may trigger a spill which in turn access this instance
+      // and eventually re-enter this method and try to free the array again.  by setting the array
+      // to null and its length to 0 we effectively make the spill code-path a no-op.  setting the
+      // array to null also indicates that it has already been de-allocated which prevents a double
+      // de-allocation in free().
       array = null;
       usableCapacity = 0;
       pos = 0;
 
@@ -662,7 +662,9 @@ private[spark] object SparkConf extends Logging {
     "spark.yarn.jars" -> Seq(
       AlternateConfig("spark.yarn.jar", "2.0")),
     "spark.yarn.access.hadoopFileSystems" -> Seq(
-      AlternateConfig("spark.yarn.access.namenodes", "2.2"))
+      AlternateConfig("spark.yarn.access.namenodes", "2.2")),
+    "spark.maxRemoteBlockSizeFetchToMem" -> Seq(
+      AlternateConfig("spark.reducer.maxReqSizeShuffleToMem", "2.3"))
   )
 
   /**
 
@@ -36,6 +36,7 @@ private[spark] object PythonEvalType {
   val NON_UDF = 0
   val SQL_BATCHED_UDF = 1
   val SQL_PANDAS_UDF = 2
+  val SQL_PANDAS_GROUPED_UDF = 3
 }
 
 /**
 
@@ -61,13 +61,17 @@ class SparkHadoopUtil extends Logging {
    * do a FileSystem.closeAllForUGI in order to avoid leaking Filesystems
    */
   def runAsSparkUser(func: () => Unit) {
+    createSparkUser().doAs(new PrivilegedExceptionAction[Unit] {
+      def run: Unit = func()
+    })
+  }
+
+  def createSparkUser(): UserGroupInformation = {
     val user = Utils.getCurrentUserName()
-    logDebug("running as user: " + user)
+    logDebug("creating UGI for user: " + user)
     val ugi = UserGroupInformation.createRemoteUser(user)
     transferCredentials(UserGroupInformation.getCurrentUser(), ugi)
-    ugi.doAs(new PrivilegedExceptionAction[Unit] {
-      def run: Unit = func()
-    })
+    ugi
   }
 
   def transferCredentials(source: UserGroupInformation, dest: UserGroupInformation) {
@@ -417,6 +421,11 @@ class SparkHadoopUtil extends Logging {
     creds.readTokenStorageStream(new DataInputStream(tokensBuf))
     creds
   }
+
+  def isProxyUser(ugi: UserGroupInformation): Boolean = {
+    ugi.getAuthenticationMethod() == UserGroupInformation.AuthenticationMethod.PROXY
+  }
+
 }
 
 object SparkHadoopUtil {
 
@@ -342,6 +342,22 @@ object SparkSubmit extends CommandLineUtils with Logging {
     val hadoopConf = conf.getOrElse(SparkHadoopUtil.newConfiguration(sparkConf))
     val targetDir = Utils.createTempDir()
 
+    // assure a keytab is available from any place in a JVM
+    if (clusterManager == YARN || clusterManager == LOCAL || clusterManager == MESOS) {
+      if (args.principal != null) {
+        if (args.keytab != null) {
+          require(new File(args.keytab).exists(), s"Keytab file: ${args.keytab} does not exist")
+          // Add keytab and principal configurations in sysProps to make them available
+          // for later use; e.g. in spark sql, the isolated class loader used to talk
+          // to HiveMetastore will use these settings. They will be set as Java system
+          // properties and then loaded by SparkConf
+          sysProps.put("spark.yarn.keytab", args.keytab)
+          sysProps.put("spark.yarn.principal", args.principal)
+          UserGroupInformation.loginUserFromKeytab(args.principal, args.keytab)
+        }
+      }
+    }
+
     // Resolve glob path for different resources.
     args.jars = Option(args.jars).map(resolveGlobPaths(_, hadoopConf)).orNull
     args.files = Option(args.files).map(resolveGlobPaths(_, hadoopConf)).orNull
@@ -641,22 +657,6 @@ object SparkSubmit extends CommandLineUtils with Logging {
       }
     }
 
-    // assure a keytab is available from any place in a JVM
-    if (clusterManager == YARN || clusterManager == LOCAL || clusterManager == MESOS) {
-      if (args.principal != null) {
-        if (args.keytab != null) {
-          require(new File(args.keytab).exists(), s"Keytab file: ${args.keytab} does not exist")
-          // Add keytab and principal configurations in sysProps to make them available
-          // for later use; e.g. in spark sql, the isolated class loader used to talk
-          // to HiveMetastore will use these settings. They will be set as Java system
-          // properties and then loaded by SparkConf
-          sysProps.put("spark.yarn.keytab", args.keytab)
-          sysProps.put("spark.yarn.principal", args.principal)
-          UserGroupInformation.loginUserFromKeytab(args.principal, args.keytab)
-        }
-      }
-    }
-
     if (clusterManager == MESOS && UserGroupInformation.isSecurityEnabled) {
       setRMPrincipal(sysProps)
     }
 
@@ -56,7 +56,9 @@ private[security] class HBaseDelegationTokenProvider
     None
   }
 
-  override def delegationTokensRequired(hadoopConf: Configuration): Boolean = {
+  override def delegationTokensRequired(
+      sparkConf: SparkConf,
+      hadoopConf: Configuration): Boolean = {
     hbaseConf(hadoopConf).get("hbase.security.authentication") == "kerberos"
   }
Original file line number	Diff line number	Diff line change
`@@ -36,6 +36,7 @@ private[spark] object PythonEvalType {`
`36`	`36`	`val NON_UDF = 0`
`37`	`37`	`val SQL_BATCHED_UDF = 1`
`38`	`38`	`val SQL_PANDAS_UDF = 2`
	`39`	`+ val SQL_PANDAS_GROUPED_UDF = 3`
`39`	`40`	`}`
`40`	`41`
`41`	`42`	`/**`
Original file line number	Diff line number	Diff line change
`@@ -56,7 +56,9 @@ private[security] class HBaseDelegationTokenProvider`
`56`	`56`	`None`
`57`	`57`	`}`
`58`	`58`
`59`		`- override def delegationTokensRequired(hadoopConf: Configuration): Boolean = {`
	`59`	`+ override def delegationTokensRequired(`
	`60`	`+ sparkConf: SparkConf,`
	`61`	`+ hadoopConf: Configuration): Boolean = {`
`60`	`62`	`hbaseConf(hadoopConf).get("hbase.security.authentication") == "kerberos"`
`61`	`63`	`}`
`62`	`64`