Skip to content

Commit 5dbfbb4

Browse files
committed
Merge branch 'master' of github.com:apache/spark
Conflicts: core/src/main/scala/org/apache/spark/deploy/DeployWebUI.scala core/src/main/scala/org/apache/spark/deploy/WebUI.scala core/src/main/scala/org/apache/spark/deploy/master/Master.scala core/src/main/scala/org/apache/spark/ui/WebUI.scala
2 parents 60bc6d5 + d780983 commit 5dbfbb4

File tree

11,433 files changed

+247907
-176
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

11,433 files changed

+247907
-176
lines changed

assembly/pom.xml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,11 @@
7979
<artifactId>spark-graphx_${scala.binary.version}</artifactId>
8080
<version>${project.version}</version>
8181
</dependency>
82+
<dependency>
83+
<groupId>org.apache.spark</groupId>
84+
<artifactId>spark-sql_${scala.binary.version}</artifactId>
85+
<version>${project.version}</version>
86+
</dependency>
8287
<dependency>
8388
<groupId>net.sf.py4j</groupId>
8489
<artifactId>py4j</artifactId>

bin/compute-classpath.sh

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,23 +33,43 @@ fi
3333
# Build up classpath
3434
CLASSPATH="$SPARK_CLASSPATH:$FWDIR/conf"
3535

36+
# Support for interacting with Hive. Since hive pulls in a lot of dependencies that might break
37+
# existing Spark applications, it is not included in the standard spark assembly. Instead, we only
38+
# include it in the classpath if the user has explicitly requested it by running "sbt hive/assembly"
39+
# Hopefully we will find a way to avoid uber-jars entirely and deploy only the needed packages in
40+
# the future.
41+
if [ -f "$FWDIR"/sql/hive/target/scala-$SCALA_VERSION/spark-hive-assembly-*.jar ]; then
42+
echo "Hive assembly found, including hive support. If this isn't desired run sbt hive/clean."
43+
44+
# Datanucleus jars do not work if only included in the uberjar as plugin.xml metadata is lost.
45+
DATANUCLEUSJARS=$(JARS=("$FWDIR/lib_managed/jars"/datanucleus-*.jar); IFS=:; echo "${JARS[*]}")
46+
CLASSPATH=$CLASSPATH:$DATANUCLEUSJARS
47+
48+
ASSEMBLY_DIR="$FWDIR/sql/hive/target/scala-$SCALA_VERSION/"
49+
else
50+
ASSEMBLY_DIR="$FWDIR/assembly/target/scala-$SCALA_VERSION/"
51+
fi
52+
3653
# First check if we have a dependencies jar. If so, include binary classes with the deps jar
37-
if [ -f "$FWDIR"/assembly/target/scala-$SCALA_VERSION/spark-assembly*hadoop*-deps.jar ]; then
54+
if [ -f "$ASSEMBLY_DIR"/spark-assembly*hadoop*-deps.jar ]; then
3855
CLASSPATH="$CLASSPATH:$FWDIR/core/target/scala-$SCALA_VERSION/classes"
3956
CLASSPATH="$CLASSPATH:$FWDIR/repl/target/scala-$SCALA_VERSION/classes"
4057
CLASSPATH="$CLASSPATH:$FWDIR/mllib/target/scala-$SCALA_VERSION/classes"
4158
CLASSPATH="$CLASSPATH:$FWDIR/bagel/target/scala-$SCALA_VERSION/classes"
4259
CLASSPATH="$CLASSPATH:$FWDIR/graphx/target/scala-$SCALA_VERSION/classes"
4360
CLASSPATH="$CLASSPATH:$FWDIR/streaming/target/scala-$SCALA_VERSION/classes"
61+
CLASSPATH="$CLASSPATH:$FWDIR/sql/catalyst/target/scala-$SCALA_VERSION/classes"
62+
CLASSPATH="$CLASSPATH:$FWDIR/sql/core/target/scala-$SCALA_VERSION/classes"
63+
CLASSPATH="$CLASSPATH:$FWDIR/sql/hive/target/scala-$SCALA_VERSION/classes"
4464

45-
DEPS_ASSEMBLY_JAR=`ls "$FWDIR"/assembly/target/scala-$SCALA_VERSION/spark-assembly*hadoop*-deps.jar`
65+
DEPS_ASSEMBLY_JAR=`ls "$ASSEMBLY_DIR"/spark*-assembly*hadoop*-deps.jar`
4666
CLASSPATH="$CLASSPATH:$DEPS_ASSEMBLY_JAR"
4767
else
4868
# Else use spark-assembly jar from either RELEASE or assembly directory
4969
if [ -f "$FWDIR/RELEASE" ]; then
50-
ASSEMBLY_JAR=`ls "$FWDIR"/jars/spark-assembly*.jar`
70+
ASSEMBLY_JAR=`ls "$FWDIR"/jars/spark*-assembly*.jar`
5171
else
52-
ASSEMBLY_JAR=`ls "$FWDIR"/assembly/target/scala-$SCALA_VERSION/spark-assembly*hadoop*.jar`
72+
ASSEMBLY_JAR=`ls "$ASSEMBLY_DIR"/spark*-assembly*hadoop*.jar`
5373
fi
5474
CLASSPATH="$CLASSPATH:$ASSEMBLY_JAR"
5575
fi
@@ -62,6 +82,9 @@ if [[ $SPARK_TESTING == 1 ]]; then
6282
CLASSPATH="$CLASSPATH:$FWDIR/bagel/target/scala-$SCALA_VERSION/test-classes"
6383
CLASSPATH="$CLASSPATH:$FWDIR/graphx/target/scala-$SCALA_VERSION/test-classes"
6484
CLASSPATH="$CLASSPATH:$FWDIR/streaming/target/scala-$SCALA_VERSION/test-classes"
85+
CLASSPATH="$CLASSPATH:$FWDIR/sql/catalyst/target/scala-$SCALA_VERSION/test-classes"
86+
CLASSPATH="$CLASSPATH:$FWDIR/sql/core/target/scala-$SCALA_VERSION/test-classes"
87+
CLASSPATH="$CLASSPATH:$FWDIR/sql/hive/target/scala-$SCALA_VERSION/test-classes"
6588
fi
6689

6790
# Add hadoop conf dir if given -- otherwise FileSystem.*, etc fail !

core/src/main/scala/org/apache/spark/deploy/history/IndexPage.scala

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,7 @@ import javax.servlet.http.HttpServletRequest
2323

2424
import scala.xml.Node
2525

26-
import org.apache.spark.deploy.DeployWebUI
27-
import org.apache.spark.ui.UIUtils
26+
import org.apache.spark.ui.{UIUtils, WebUI}
2827

2928
private[spark] class IndexPage(parent: HistoryServer) {
3029
private val dateFmt = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss")
@@ -63,7 +62,7 @@ private[spark] class IndexPage(parent: HistoryServer) {
6362
val startTime = if (info.started) dateFmt.format(new Date(info.startTime)) else "Not started"
6463
val endTime = if (info.finished) dateFmt.format(new Date(info.endTime)) else "Not finished"
6564
val difference = if (info.started && info.finished) info.endTime - info.startTime else -1L
66-
val duration = if (difference > 0) DeployWebUI.formatDuration(difference) else "---"
65+
val duration = if (difference > 0) WebUI.formatDuration(difference) else "---"
6766
val logDirectory = parent.getAppId(info.logPath)
6867
val lastUpdated = dateFmt.format(new Date(info.lastUpdated))
6968
<tr>

core/src/main/scala/org/apache/spark/deploy/master/Master.scala

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ private[spark] class Master(
5151

5252
val conf = new SparkConf
5353

54-
val DATE_FORMAT = new SimpleDateFormat("yyyyMMddHHmmss") // For application IDs
54+
def createDateFormat = new SimpleDateFormat("yyyyMMddHHmmss") // For application IDs
5555
val WORKER_TIMEOUT = conf.getLong("spark.worker.timeout", 60) * 1000
5656
val RETAINED_APPLICATIONS = conf.getInt("spark.deploy.retainedApplications", 200)
5757
val REAPER_ITERATIONS = conf.getInt("spark.dead.worker.persistence", 15)
@@ -671,7 +671,7 @@ private[spark] class Master(
671671

672672
/** Generate a new app ID given a app's submission date */
673673
def newApplicationId(submitDate: Date): String = {
674-
val appId = "app-%s-%04d".format(DATE_FORMAT.format(submitDate), nextAppNumber)
674+
val appId = "app-%s-%04d".format(createDateFormat.format(submitDate), nextAppNumber)
675675
nextAppNumber += 1
676676
appId
677677
}
@@ -695,7 +695,7 @@ private[spark] class Master(
695695
}
696696

697697
def newDriverId(submitDate: Date): String = {
698-
val appId = "driver-%s-%04d".format(DATE_FORMAT.format(submitDate), nextDriverNumber)
698+
val appId = "driver-%s-%04d".format(createDateFormat.format(submitDate), nextDriverNumber)
699699
nextDriverNumber += 1
700700
appId
701701
}

core/src/main/scala/org/apache/spark/deploy/master/ui/IndexPage.scala

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,10 @@ import scala.xml.Node
2525
import akka.pattern.ask
2626
import org.json4s.JValue
2727

28-
import org.apache.spark.deploy.{DeployWebUI, JsonProtocol}
28+
import org.apache.spark.deploy.{JsonProtocol}
2929
import org.apache.spark.deploy.DeployMessages.{MasterStateResponse, RequestMasterState}
3030
import org.apache.spark.deploy.master.{ApplicationInfo, DriverInfo, WorkerInfo}
31-
import org.apache.spark.ui.UIUtils
31+
import org.apache.spark.ui.{WebUI, UIUtils}
3232
import org.apache.spark.util.Utils
3333

3434
private[spark] class IndexPage(parent: MasterWebUI) {
@@ -169,10 +169,10 @@ private[spark] class IndexPage(parent: MasterWebUI) {
169169
<td sorttable_customkey={app.desc.memoryPerSlave.toString}>
170170
{Utils.megabytesToString(app.desc.memoryPerSlave)}
171171
</td>
172-
<td>{DeployWebUI.formatDate(app.submitDate)}</td>
172+
<td>{WebUI.formatDate(app.submitDate)}</td>
173173
<td>{app.desc.user}</td>
174174
<td>{app.state.toString}</td>
175-
<td>{DeployWebUI.formatDuration(app.duration)}</td>
175+
<td>{WebUI.formatDuration(app.duration)}</td>
176176
</tr>
177177
}
178178

core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ private[spark] class Worker(
5656
Utils.checkHost(host, "Expected hostname")
5757
assert (port > 0)
5858

59-
val DATE_FORMAT = new SimpleDateFormat("yyyyMMddHHmmss") // For worker and executor IDs
59+
def createDateFormat = new SimpleDateFormat("yyyyMMddHHmmss") // For worker and executor IDs
6060

6161
// Send a heartbeat every (heartbeat timeout) / 4 milliseconds
6262
val HEARTBEAT_MILLIS = conf.getLong("spark.worker.timeout", 60) * 1000 / 4
@@ -319,7 +319,7 @@ private[spark] class Worker(
319319
}
320320

321321
def generateWorkerId(): String = {
322-
"worker-%s-%s-%d".format(DATE_FORMAT.format(new Date), host, port)
322+
"worker-%s-%s-%d".format(createDateFormat.format(new Date), host, port)
323323
}
324324

325325
override def postStop() {

core/src/main/scala/org/apache/spark/scheduler/JobLogger.scala

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,9 @@ class JobLogger(val user: String, val logDirName: String) extends SparkListener
5555
private val jobIdToPrintWriter = new HashMap[Int, PrintWriter]
5656
private val stageIdToJobId = new HashMap[Int, Int]
5757
private val jobIdToStageIds = new HashMap[Int, Seq[Int]]
58-
private val DATE_FORMAT = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss")
58+
private val dateFormat = new ThreadLocal[SimpleDateFormat]() {
59+
override def initialValue() = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss")
60+
}
5961
private val eventQueue = new LinkedBlockingQueue[SparkListenerEvent]
6062

6163
createLogDir()
@@ -128,7 +130,7 @@ class JobLogger(val user: String, val logDirName: String) extends SparkListener
128130
var writeInfo = info
129131
if (withTime) {
130132
val date = new Date(System.currentTimeMillis())
131-
writeInfo = DATE_FORMAT.format(date) + ": " + info
133+
writeInfo = dateFormat.get.format(date) + ": " + info
132134
}
133135
jobIdToPrintWriter.get(jobId).foreach(_.println(writeInfo))
134136
}

core/src/main/scala/org/apache/spark/ui/WebUI.scala

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@
1717

1818
package org.apache.spark.ui
1919

20+
import java.text.SimpleDateFormat
21+
import java.util.Date
22+
2023
private[spark] abstract class WebUI(name: String) {
2124
protected var serverInfo: Option[ServerInfo] = None
2225

@@ -35,3 +38,32 @@ private[spark] abstract class WebUI(name: String) {
3538
serverInfo.get.server.stop()
3639
}
3740
}
41+
42+
/**
43+
* Utilities used throughout the web UI.
44+
*/
45+
private[spark] object WebUI {
46+
// SimpleDateFormat is not thread-safe. Don't expose it to avoid improper use.
47+
private val dateFormat = new ThreadLocal[SimpleDateFormat]() {
48+
override def initialValue(): SimpleDateFormat = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss")
49+
}
50+
51+
def formatDate(date: Date): String = dateFormat.get.format(date)
52+
53+
def formatDate(timestamp: Long): String = dateFormat.get.format(new Date(timestamp))
54+
55+
def formatDuration(milliseconds: Long): String = {
56+
val seconds = milliseconds.toDouble / 1000
57+
if (seconds < 60) {
58+
return "%.0f s".format(seconds)
59+
}
60+
val minutes = seconds / 60
61+
if (minutes < 10) {
62+
return "%.1f min".format(minutes)
63+
} else if (minutes < 60) {
64+
return "%.0f min".format(minutes)
65+
}
66+
val hours = minutes / 60
67+
return "%.1f h".format(hours)
68+
}
69+
}

core/src/main/scala/org/apache/spark/ui/jobs/JobProgressUI.scala

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717

1818
package org.apache.spark.ui.jobs
1919

20-
import java.text.SimpleDateFormat
2120
import javax.servlet.http.HttpServletRequest
2221

2322
import org.eclipse.jetty.servlet.ServletContextHandler
@@ -31,7 +30,6 @@ import org.apache.spark.util.Utils
3130
/** Web UI showing progress status of all jobs in the given SparkContext. */
3231
private[ui] class JobProgressUI(parent: SparkUI) {
3332
val basePath = parent.basePath
34-
val dateFmt = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss")
3533
val live = parent.live
3634
val sc = parent.sc
3735

core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,12 @@ import javax.servlet.http.HttpServletRequest
2323
import scala.xml.Node
2424

2525
import org.apache.spark.ui.Page._
26-
import org.apache.spark.ui.UIUtils
26+
import org.apache.spark.ui.{WebUI, UIUtils}
2727
import org.apache.spark.util.{Utils, Distribution}
2828

2929
/** Page showing statistics and task list for a given stage */
3030
private[ui] class StagePage(parent: JobProgressUI) {
3131
private val basePath = parent.basePath
32-
private val dateFmt = parent.dateFmt
3332
private lazy val listener = parent.listener
3433

3534
private def appName = parent.appName
@@ -254,7 +253,7 @@ private[ui] class StagePage(parent: JobProgressUI) {
254253
<td>{info.status}</td>
255254
<td>{info.taskLocality}</td>
256255
<td>{info.host}</td>
257-
<td>{dateFmt.format(new Date(info.launchTime))}</td>
256+
<td>{WebUI.formatDate(new Date(info.launchTime))}</td>
258257
<td sorttable_customkey={duration.toString}>
259258
{formatDuration}
260259
</td>

0 commit comments

Comments
 (0)