Skip to content

Commit 6e30931

Browse files
committed
Fix some potential issues when fetch remote resource from http(s) server in yarn mode
Change-Id: I6317a464c4fd526a8057c578a05a60420d975a47
1 parent c2cb5f7 commit 6e30931

File tree

1 file changed

+26
-4
lines changed

1 file changed

+26
-4
lines changed

core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -345,6 +345,31 @@ object SparkSubmit extends CommandLineUtils {
345345
}.orNull
346346
}
347347

348+
if (clusterManager == YARN) {
349+
def isNoneFsFileExist(paths: String): Boolean = {
350+
Option(paths).exists { p =>
351+
p.split(",").map(_.trim).filter(_.nonEmpty).exists { path =>
352+
val url = Utils.resolveURI(path)
353+
url.getScheme match {
354+
case "http" | "https" | "ftp" => true
355+
case _ => false
356+
}
357+
}
358+
}
359+
}
360+
361+
// Spark on YARN doesn't support upload remote resources from http, https or ftp server
362+
// directly to distributed cache, so print a warning and exit the process.
363+
if (isNoneFsFileExist(args.jars) ||
364+
isNoneFsFileExist(args.files) ||
365+
isNoneFsFileExist(args.primaryResource) ||
366+
isNoneFsFileExist(args.pyFiles) ||
367+
isNoneFsFileExist(args.archives)) {
368+
printErrorAndExit(
369+
"Spark on YARN doesn't support resources on remote http, https or ftp server.")
370+
}
371+
}
372+
348373
// If we're running a python app, set the main class to our specific python runner
349374
if (args.isPython && deployMode == CLIENT) {
350375
if (args.primaryResource == PYSPARK_SHELL) {
@@ -463,6 +488,7 @@ object SparkSubmit extends CommandLineUtils {
463488
OptionAssigner(args.queue, YARN, ALL_DEPLOY_MODES, sysProp = "spark.yarn.queue"),
464489
OptionAssigner(args.numExecutors, YARN, ALL_DEPLOY_MODES,
465490
sysProp = "spark.executor.instances"),
491+
OptionAssigner(args.pyFiles, YARN, ALL_DEPLOY_MODES, sysProp = "spark.yarn.dist.pyFiles"),
466492
OptionAssigner(args.jars, YARN, ALL_DEPLOY_MODES, sysProp = "spark.yarn.dist.jars"),
467493
OptionAssigner(args.files, YARN, ALL_DEPLOY_MODES, sysProp = "spark.yarn.dist.files"),
468494
OptionAssigner(args.archives, YARN, ALL_DEPLOY_MODES, sysProp = "spark.yarn.dist.archives"),
@@ -564,10 +590,6 @@ object SparkSubmit extends CommandLineUtils {
564590
if (args.isPython) {
565591
sysProps.put("spark.yarn.isPython", "true")
566592
}
567-
568-
if (args.pyFiles != null) {
569-
sysProps("spark.yarn.dist.pyFiles") = args.pyFiles
570-
}
571593
}
572594

573595
// assure a keytab is available from any place in a JVM

0 commit comments

Comments
 (0)