apache
diff --git a/‎.gitignore‎
Lines changed: 4 additions & 0 deletions b/‎.gitignore‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎R/pkg/R/DataFrame.R‎
Lines changed: 1 addition & 1 deletion b/‎R/pkg/R/DataFrame.R‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎R/pkg/R/sparkR.R‎
Lines changed: 8 additions & 8 deletions b/‎R/pkg/R/sparkR.R‎
Lines changed: 8 additions & 8 deletions
diff --git a/‎core/src/main/java/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriter.java‎
Lines changed: 7 additions & 3 deletions b/‎core/src/main/java/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriter.java‎
Lines changed: 7 additions & 3 deletions
diff --git a/‎core/src/main/java/org/apache/spark/shuffle/sort/ShuffleExternalSorter.java‎
Lines changed: 13 additions & 18 deletions b/‎core/src/main/java/org/apache/spark/shuffle/sort/ShuffleExternalSorter.java‎
Lines changed: 13 additions & 18 deletions
diff --git a/‎core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterSpillWriter.java‎
Lines changed: 2 additions & 1 deletion b/‎core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterSpillWriter.java‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎core/src/main/scala/org/apache/spark/MapOutputTracker.scala‎
Lines changed: 2 additions & 0 deletions b/‎core/src/main/scala/org/apache/spark/MapOutputTracker.scala‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/SparkConf.scala‎
Lines changed: 9 additions & 4 deletions b/‎core/src/main/scala/org/apache/spark/SparkConf.scala‎
Lines changed: 9 additions & 4 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/SparkContext.scala‎
Lines changed: 3 additions & 2 deletions b/‎core/src/main/scala/org/apache/spark/SparkContext.scala‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/TaskState.scala‎
Lines changed: 3 additions & 5 deletions b/‎core/src/main/scala/org/apache/spark/TaskState.scala‎
Lines changed: 3 additions & 5 deletions
@@ -17,6 +17,7 @@
 .idea/
 .idea_modules/
 .project
+.pydevproject
 .scala_dependencies
 .settings
 /lib/
@@ -78,3 +79,6 @@ spark-warehouse/
 .RData
 .RHistory
 .Rhistory
+*.Rproj
+*.Rproj.*
+
@@ -35,7 +35,7 @@ setOldClass("structType")
 #' @slot env An R environment that stores bookkeeping states of the SparkDataFrame
 #' @slot sdf A Java object reference to the backing Scala DataFrame
 #' @seealso \link{createDataFrame}, \link{read.json}, \link{table}
-#' @seealso \url{https://spark.apache.org/docs/latest/sparkr.html#sparkdataframe}
+#' @seealso \url{https://spark.apache.org/docs/latest/sparkr.html#sparkr-dataframes}
 #' @export
 #' @examples
 #'\dontrun{
 
@@ -28,14 +28,6 @@ connExists <- function(env) {
   })
 }
 
-#' @rdname sparkR.session.stop
-#' @name sparkR.stop
-#' @export
-#' @note sparkR.stop since 1.4.0
-sparkR.stop <- function() {
-  sparkR.session.stop()
-}
-
 #' Stop the Spark Session and Spark Context
 #'
 #' Stop the Spark Session and Spark Context.
@@ -90,6 +82,14 @@ sparkR.session.stop <- function() {
   clearJobjs()
 }
 
+#' @rdname sparkR.session.stop
+#' @name sparkR.stop
+#' @export
+#' @note sparkR.stop since 1.4.0
+sparkR.stop <- function() {
+  sparkR.session.stop()
+}
+
 #' (Deprecated) Initialize a new Spark Context
 #'
 #' This function initializes a new SparkContext.
 
@@ -88,6 +88,7 @@ final class BypassMergeSortShuffleWriter<K, V> extends ShuffleWriter<K, V> {
 
   /** Array of file writers, one for each partition */
   private DiskBlockObjectWriter[] partitionWriters;
+  private FileSegment[] partitionWriterSegments;
   @Nullable private MapStatus mapStatus;
   private long[] partitionLengths;
 
@@ -131,6 +132,7 @@ public void write(Iterator<Product2<K, V>> records) throws IOException {
     final SerializerInstance serInstance = serializer.newInstance();
     final long openStartTime = System.nanoTime();
     partitionWriters = new DiskBlockObjectWriter[numPartitions];
+    partitionWriterSegments = new FileSegment[numPartitions];
     for (int i = 0; i < numPartitions; i++) {
       final Tuple2<TempShuffleBlockId, File> tempShuffleBlockIdPlusFile =
         blockManager.diskBlockManager().createTempShuffleBlock();
@@ -150,8 +152,10 @@ public void write(Iterator<Product2<K, V>> records) throws IOException {
       partitionWriters[partitioner.getPartition(key)].write(key, record._2());
     }
 
-    for (DiskBlockObjectWriter writer : partitionWriters) {
-      writer.commitAndClose();
+    for (int i = 0; i < numPartitions; i++) {
+      final DiskBlockObjectWriter writer = partitionWriters[i];
+      partitionWriterSegments[i] = writer.commitAndGet();
+      writer.close();
     }
 
     File output = shuffleBlockResolver.getDataFile(shuffleId, mapId);
@@ -184,7 +188,7 @@ private long[] writePartitionedFile(File outputFile) throws IOException {
     boolean threwException = true;
     try {
       for (int i = 0; i < numPartitions; i++) {
-        final File file = partitionWriters[i].fileSegment().file();
+        final File file = partitionWriterSegments[i].file();
         if (file.exists()) {
           final FileInputStream in = new FileInputStream(file);
           boolean copyThrewException = true;
 
@@ -37,6 +37,7 @@
 import org.apache.spark.serializer.SerializerInstance;
 import org.apache.spark.storage.BlockManager;
 import org.apache.spark.storage.DiskBlockObjectWriter;
+import org.apache.spark.storage.FileSegment;
 import org.apache.spark.storage.TempShuffleBlockId;
 import org.apache.spark.unsafe.Platform;
 import org.apache.spark.unsafe.array.LongArray;
@@ -150,10 +151,6 @@ private void writeSortedFile(boolean isLastFile) throws IOException {
     final ShuffleInMemorySorter.ShuffleSorterIterator sortedRecords =
       inMemSorter.getSortedIterator();
 
-    // Currently, we need to open a new DiskBlockObjectWriter for each partition; we can avoid this
-    // after SPARK-5581 is fixed.
-    DiskBlockObjectWriter writer;
-
     // Small writes to DiskBlockObjectWriter will be fairly inefficient. Since there doesn't seem to
     // be an API to directly transfer bytes from managed memory to the disk writer, we buffer
     // data through a byte array. This array does not need to be large enough to hold a single
@@ -175,7 +172,8 @@ private void writeSortedFile(boolean isLastFile) throws IOException {
     // around this, we pass a dummy no-op serializer.
     final SerializerInstance ser = DummySerializerInstance.INSTANCE;
 
-    writer = blockManager.getDiskWriter(blockId, file, ser, fileBufferSizeBytes, writeMetricsToUse);
+    final DiskBlockObjectWriter writer =
+      blockManager.getDiskWriter(blockId, file, ser, fileBufferSizeBytes, writeMetricsToUse);
 
     int currentPartition = -1;
     while (sortedRecords.hasNext()) {
@@ -185,12 +183,10 @@ private void writeSortedFile(boolean isLastFile) throws IOException {
       if (partition != currentPartition) {
         // Switch to the new partition
         if (currentPartition != -1) {
-          writer.commitAndClose();
-          spillInfo.partitionLengths[currentPartition] = writer.fileSegment().length();
+          final FileSegment fileSegment = writer.commitAndGet();
+          spillInfo.partitionLengths[currentPartition] = fileSegment.length();
         }
         currentPartition = partition;
-        writer =
-          blockManager.getDiskWriter(blockId, file, ser, fileBufferSizeBytes, writeMetricsToUse);
       }
 
       final long recordPointer = sortedRecords.packedRecordPointer.getRecordPointer();
@@ -209,15 +205,14 @@ private void writeSortedFile(boolean isLastFile) throws IOException {
       writer.recordWritten();
     }
 
-    if (writer != null) {
-      writer.commitAndClose();
-      // If `writeSortedFile()` was called from `closeAndGetSpills()` and no records were inserted,
-      // then the file might be empty. Note that it might be better to avoid calling
-      // writeSortedFile() in that case.
-      if (currentPartition != -1) {
-        spillInfo.partitionLengths[currentPartition] = writer.fileSegment().length();
-        spills.add(spillInfo);
-      }
+    final FileSegment committedSegment = writer.commitAndGet();
+    writer.close();
+    // If `writeSortedFile()` was called from `closeAndGetSpills()` and no records were inserted,
+    // then the file might be empty. Note that it might be better to avoid calling
+    // writeSortedFile() in that case.
+    if (currentPartition != -1) {
+      spillInfo.partitionLengths[currentPartition] = committedSegment.length();
+      spills.add(spillInfo);
     }
 
     if (!isLastFile) {  // i.e. this is a spill file
 
@@ -136,7 +136,8 @@ public void write(
   }
 
   public void close() throws IOException {
-    writer.commitAndClose();
+    writer.commitAndGet();
+    writer.close();
     writer = null;
     writeBuffer = null;
   }
 
@@ -46,6 +46,8 @@ private[spark] class MapOutputTrackerMasterEndpoint(
     override val rpcEnv: RpcEnv, tracker: MapOutputTrackerMaster, conf: SparkConf)
   extends RpcEndpoint with Logging {
 
+  logDebug("init") // force eager creation of logger
+
   override def receiveAndReply(context: RpcCallContext): PartialFunction[Any, Unit] = {
     case GetMapOutputStatuses(shuffleId: Int) =>
       val hostPort = context.senderAddress.hostPort
 
@@ -47,7 +47,7 @@ import org.apache.spark.util.Utils
  *
  * @param loadDefaults whether to also load values from Java system properties
  */
-class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging {
+class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging with Serializable {
 
   import SparkConf._
 
@@ -370,6 +370,13 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging {
     settings.entrySet().asScala.map(x => (x.getKey, x.getValue)).toArray
   }
 
+  /** Get all parameters that start with `prefix` */
+  def getAllWithPrefix(prefix: String): Array[(String, String)] = {
+    getAll.filter { case (k, v) => k.startsWith(prefix) }
+      .map { case (k, v) => (k.substring(prefix.length), v) }
+  }
+
+
   /** Get a parameter as an integer, falling back to a default if not set */
   def getInt(key: String, defaultValue: Int): Int = {
     getOption(key).map(_.toInt).getOrElse(defaultValue)
@@ -392,9 +399,7 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging {
 
   /** Get all executor environment variables set on this SparkConf */
   def getExecutorEnv: Seq[(String, String)] = {
-    val prefix = "spark.executorEnv."
-    getAll.filter{case (k, v) => k.startsWith(prefix)}
-          .map{case (k, v) => (k.substring(prefix.length), v)}
+    getAllWithPrefix("spark.executorEnv.")
   }
 
   /**
 
@@ -249,7 +249,7 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli
   def isStopped: Boolean = stopped.get()
 
   // An asynchronous listener bus for Spark events
-  private[spark] val listenerBus = new LiveListenerBus
+  private[spark] val listenerBus = new LiveListenerBus(this)
 
   // This function allows components created by SparkEnv to be mocked in unit tests:
   private[spark] def createSparkEnv(
@@ -556,6 +556,7 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli
     // Make sure the context is stopped if the user forgets about it. This avoids leaving
     // unfinished event logs around after the JVM exits cleanly. It doesn't help if the JVM
     // is killed, though.
+    logDebug("Adding shutdown hook") // force eager creation of logger
     _shutdownHookRef = ShutdownHookManager.addShutdownHook(
       ShutdownHookManager.SPARK_CONTEXT_SHUTDOWN_PRIORITY) { () =>
       logInfo("Invoking stop() from shutdown hook")
@@ -2147,7 +2148,7 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli
         }
     }
 
-    listenerBus.start(this)
+    listenerBus.start()
     _listenerBusStarted = true
   }
 
 
@@ -41,13 +41,11 @@ private[spark] object TaskState extends Enumeration {
   }
 
   def fromMesos(mesosState: MesosTaskState): TaskState = mesosState match {
-    case MesosTaskState.TASK_STAGING => LAUNCHING
-    case MesosTaskState.TASK_STARTING => LAUNCHING
-    case MesosTaskState.TASK_RUNNING => RUNNING
+    case MesosTaskState.TASK_STAGING | MesosTaskState.TASK_STARTING => LAUNCHING
+    case MesosTaskState.TASK_RUNNING | MesosTaskState.TASK_KILLING => RUNNING
     case MesosTaskState.TASK_FINISHED => FINISHED
     case MesosTaskState.TASK_FAILED => FAILED
     case MesosTaskState.TASK_KILLED => KILLED
-    case MesosTaskState.TASK_LOST => LOST
-    case MesosTaskState.TASK_ERROR => LOST
+    case MesosTaskState.TASK_LOST | MesosTaskState.TASK_ERROR => LOST
   }
 }
Original file line number	Diff line number	Diff line change
`@@ -136,7 +136,8 @@ public void write(`
`136`	`136`	`}`
`137`	`137`
`138`	`138`	`public void close() throws IOException {`
`139`		`- writer.commitAndClose();`
	`139`	`+ writer.commitAndGet();`
	`140`	`+ writer.close();`
`140`	`141`	`writer = null;`
`141`	`142`	`writeBuffer = null;`
`142`	`143`	`}`
Original file line number	Diff line number	Diff line change
`@@ -41,13 +41,11 @@ private[spark] object TaskState extends Enumeration {`
`41`	`41`	`}`
`42`	`42`
`43`	`43`	`def fromMesos(mesosState: MesosTaskState): TaskState = mesosState match {`
`44`		`- case MesosTaskState.TASK_STAGING => LAUNCHING`
`45`		`- case MesosTaskState.TASK_STARTING => LAUNCHING`
`46`		`- case MesosTaskState.TASK_RUNNING => RUNNING`
	`44`	`+ case MesosTaskState.TASK_STAGING \| MesosTaskState.TASK_STARTING => LAUNCHING`
	`45`	`+ case MesosTaskState.TASK_RUNNING \| MesosTaskState.TASK_KILLING => RUNNING`
`47`	`46`	`case MesosTaskState.TASK_FINISHED => FINISHED`
`48`	`47`	`case MesosTaskState.TASK_FAILED => FAILED`
`49`	`48`	`case MesosTaskState.TASK_KILLED => KILLED`
`50`		`- case MesosTaskState.TASK_LOST => LOST`
`51`		`- case MesosTaskState.TASK_ERROR => LOST`
	`49`	`+ case MesosTaskState.TASK_LOST \| MesosTaskState.TASK_ERROR => LOST`
`52`	`50`	`}`
`53`	`51`	`}`