Skip to content

Commit 17e7949

Browse files
committed
fixing SPARK-14736 Deadlock in registering applications while the Master is in the RECOVERING mode
1 parent 5cb2e33 commit 17e7949

File tree

1 file changed

+11
-1
lines changed
  • core/src/main/scala/org/apache/spark/deploy/master

1 file changed

+11
-1
lines changed

core/src/main/scala/org/apache/spark/deploy/master/Master.scala

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ private[deploy] class Master(
7777
val idToApp = new HashMap[String, ApplicationInfo]
7878
private val waitingApps = new ArrayBuffer[ApplicationInfo]
7979
val apps = new HashSet[ApplicationInfo]
80+
private val waitingAppsWhileRecovering = new ArrayBuffer[ApplicationInfo]
8081

8182
private val idToWorker = new HashMap[String, WorkerInfo]
8283
private val addressToWorker = new HashMap[RpcAddress, WorkerInfo]
@@ -571,6 +572,9 @@ private[deploy] class Master(
571572
}
572573

573574
state = RecoveryState.ALIVE
575+
// Re-register the apps which were omitted during the Recovering phase.
576+
waitingAppsWhileRecovering.foreach(registerApplication)
577+
waitingAppsWhileRecovering.clear()
574578
schedule()
575579
logInfo("Recovery complete - resuming operations!")
576580
}
@@ -818,7 +822,13 @@ private[deploy] class Master(
818822
private def registerApplication(app: ApplicationInfo): Unit = {
819823
val appAddress = app.driver.address
820824
if (addressToApp.contains(appAddress)) {
821-
logInfo("Attempted to re-register application at same address: " + appAddress)
825+
if (state == RecoveryState.RECOVERING) {
826+
logInfo("Attempted to re-register application at same address: " + appAddress + " in the " +
827+
"Recovering Mode")
828+
waitingAppsWhileRecovering += app
829+
} else {
830+
logInfo("Attempted to re-register application at same address: " + appAddress)
831+
}
822832
return
823833
}
824834

0 commit comments

Comments
 (0)