apache
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 1 deletion b/‎.gitignore‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.rat-excludes‎
Lines changed: 1 addition & 0 deletions b/‎.rat-excludes‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎README.md‎
Lines changed: 4 additions & 3 deletions b/‎README.md‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎assembly/pom.xml‎
Lines changed: 2 additions & 2 deletions b/‎assembly/pom.xml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎bagel/pom.xml‎
Lines changed: 1 addition & 1 deletion b/‎bagel/pom.xml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎bagel/src/test/scala/org/apache/spark/bagel/BagelSuite.scala‎
Lines changed: 2 additions & 4 deletions b/‎bagel/src/test/scala/org/apache/spark/bagel/BagelSuite.scala‎
Lines changed: 2 additions & 4 deletions
diff --git a/‎bin/compute-classpath.cmd‎
Lines changed: 23 additions & 1 deletion b/‎bin/compute-classpath.cmd‎
Lines changed: 23 additions & 1 deletion
diff --git a/‎bin/compute-classpath.sh‎
Lines changed: 25 additions & 9 deletions b/‎bin/compute-classpath.sh‎
Lines changed: 25 additions & 9 deletions
diff --git a/‎bin/pyspark‎
Lines changed: 13 additions & 3 deletions b/‎bin/pyspark‎
Lines changed: 13 additions & 3 deletions
diff --git a/‎bin/run-example‎
Lines changed: 12 additions & 13 deletions b/‎bin/run-example‎
Lines changed: 12 additions & 13 deletions
@@ -7,7 +7,7 @@
 sbt/*.jar
 .settings
 .cache
-.mima-excludes
+.generated-mima*
 /build/
 work/
 out/
 
@@ -3,6 +3,7 @@ target
 .project
 .classpath
 .mima-excludes
+.generated-mima-excludes
 .rat-excludes
 .*md
 derby.log
 
@@ -9,13 +9,14 @@ You can find the latest Spark documentation, including a programming
 guide, on the project webpage at <http://spark.apache.org/documentation.html>.
 This README file only contains basic setup instructions.
 
-
 ## Building Spark
 
 Spark is built on Scala 2.10. To build Spark and its example programs, run:
 
     ./sbt/sbt assembly
 
+(You do not need to do this if you downloaded a pre-built package.)
+
 ## Interactive Scala Shell
 
 The easiest way to start using Spark is through the Scala shell:
@@ -41,9 +42,9 @@ And run the following command, which should also return 1000:
 Spark also comes with several sample programs in the `examples` directory.
 To run one of them, use `./bin/run-example <class> [params]`. For example:
 
-    ./bin/run-example org.apache.spark.examples.SparkLR
+    ./bin/run-example SparkPi
 
-will run the Logistic Regression example locally.
+will run the Pi example locally.
 
 You can set the MASTER environment variable when running examples to submit
 examples to a cluster. This can be a mesos:// or spark:// URL, 
 
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent</artifactId>
-    <version>1.0.0-SNAPSHOT</version>
+    <version>1.1.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
@@ -96,7 +96,7 @@
             <filter>
               <artifact>*:*</artifact>
               <excludes>
-                <exclude>org.datanucleus:*</exclude>
+                <exclude>org/datanucleus/**</exclude>
                 <exclude>META-INF/*.SF</exclude>
                 <exclude>META-INF/*.DSA</exclude>
                 <exclude>META-INF/*.RSA</exclude>
 
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent</artifactId>
-    <version>1.0.0-SNAPSHOT</version>
+    <version>1.1.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
 
@@ -38,8 +38,6 @@ class BagelSuite extends FunSuite with Assertions with BeforeAndAfter with Timeo
       sc.stop()
       sc = null
     }
-    // To avoid Akka rebinding to the same port, since it doesn't unbind immediately on shutdown
-    System.clearProperty("spark.driver.port")
   }
 
   test("halting by voting") {
@@ -82,7 +80,7 @@ class BagelSuite extends FunSuite with Assertions with BeforeAndAfter with Timeo
   test("large number of iterations") {
     // This tests whether jobs with a large number of iterations finish in a reasonable time,
     // because non-memoized recursion in RDD or DAGScheduler used to cause them to hang
-    failAfter(10 seconds) {
+    failAfter(30 seconds) {
       sc = new SparkContext("local", "test")
       val verts = sc.parallelize((1 to 4).map(id => (id.toString, new TestVertex(true, 0))))
       val msgs = sc.parallelize(Array[(String, TestMessage)]())
@@ -103,7 +101,7 @@ class BagelSuite extends FunSuite with Assertions with BeforeAndAfter with Timeo
       sc = new SparkContext("local", "test")
       val verts = sc.parallelize((1 to 4).map(id => (id.toString, new TestVertex(true, 0))))
       val msgs = sc.parallelize(Array[(String, TestMessage)]())
-      val numSupersteps = 50
+      val numSupersteps = 20
       val result =
         Bagel.run(sc, verts, msgs, sc.defaultParallelism, StorageLevel.DISK_ONLY) {
           (self: TestVertex, msgs: Option[Array[TestMessage]], superstep: Int) =>
 
@@ -20,6 +20,13 @@ rem
 rem This script computes Spark's classpath and prints it to stdout; it's used by both the "run"
 rem script and the ExecutorRunner in standalone cluster mode.
 
+rem If we're called from spark-class2.cmd, it already set enabledelayedexpansion and setting
+rem it here would stop us from affecting its copy of the CLASSPATH variable; otherwise we
+rem need to set it here because we use !datanucleus_jars! below.
+if "%DONT_PRINT_CLASSPATH%"=="1" goto skip_delayed_expansion
+setlocal enabledelayedexpansion
+:skip_delayed_expansion
+
 set SCALA_VERSION=2.10
 
 rem Figure out where the Spark framework is installed
@@ -31,7 +38,7 @@ if exist "%FWDIR%conf\spark-env.cmd" call "%FWDIR%conf\spark-env.cmd"
 rem Build up classpath
 set CLASSPATH=%FWDIR%conf
 if exist "%FWDIR%RELEASE" (
-  for %%d in ("%FWDIR%jars\spark-assembly*.jar") do (
+  for %%d in ("%FWDIR%lib\spark-assembly*.jar") do (
     set ASSEMBLY_JAR=%%d
   )
 ) else (
@@ -42,6 +49,21 @@ if exist "%FWDIR%RELEASE" (
 
 set CLASSPATH=%CLASSPATH%;%ASSEMBLY_JAR%
 
+rem When Hive support is needed, Datanucleus jars must be included on the classpath.
+rem Datanucleus jars do not work if only included in the uber jar as plugin.xml metadata is lost.
+rem Both sbt and maven will populate "lib_managed/jars/" with the datanucleus jars when Spark is
+rem built with Hive, so look for them there.
+if exist "%FWDIR%RELEASE" (
+  set datanucleus_dir=%FWDIR%lib
+) else (
+  set datanucleus_dir=%FWDIR%lib_managed\jars
+)
+set "datanucleus_jars="
+for %%d in ("%datanucleus_dir%\datanucleus-*.jar") do (
+  set datanucleus_jars=!datanucleus_jars!;%%d
+)
+set CLASSPATH=%CLASSPATH%;%datanucleus_jars%
+
 set SPARK_CLASSES=%FWDIR%core\target\scala-%SCALA_VERSION%\classes
 set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%repl\target\scala-%SCALA_VERSION%\classes
 set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%mllib\target\scala-%SCALA_VERSION%\classes
 
@@ -38,8 +38,10 @@ else
   JAR_CMD="jar"
 fi
 
-# First check if we have a dependencies jar. If so, include binary classes with the deps jar
-if [ -f "$ASSEMBLY_DIR"/spark-assembly*hadoop*-deps.jar ]; then
+# A developer option to prepend more recently compiled Spark classes
+if [ -n "$SPARK_PREPEND_CLASSES" ]; then
+  echo "NOTE: SPARK_PREPEND_CLASSES is set, placing locally compiled Spark"\
+    "classes ahead of assembly." >&2
   CLASSPATH="$CLASSPATH:$FWDIR/core/target/scala-$SCALA_VERSION/classes"
   CLASSPATH="$CLASSPATH:$FWDIR/repl/target/scala-$SCALA_VERSION/classes"
   CLASSPATH="$CLASSPATH:$FWDIR/mllib/target/scala-$SCALA_VERSION/classes"
@@ -51,17 +53,31 @@ if [ -f "$ASSEMBLY_DIR"/spark-assembly*hadoop*-deps.jar ]; then
   CLASSPATH="$CLASSPATH:$FWDIR/sql/core/target/scala-$SCALA_VERSION/classes"
   CLASSPATH="$CLASSPATH:$FWDIR/sql/hive/target/scala-$SCALA_VERSION/classes"
   CLASSPATH="$CLASSPATH:$FWDIR/yarn/stable/target/scala-$SCALA_VERSION/classes"
+fi
 
-  ASSEMBLY_JAR=$(ls "$ASSEMBLY_DIR"/spark-assembly*hadoop*-deps.jar 2>/dev/null)
+# Use spark-assembly jar from either RELEASE or assembly directory
+if [ -f "$FWDIR/RELEASE" ]; then
+  assembly_folder="$FWDIR"/lib
 else
-  # Else use spark-assembly jar from either RELEASE or assembly directory
-  if [ -f "$FWDIR/RELEASE" ]; then
-    ASSEMBLY_JAR=$(ls "$FWDIR"/lib/spark-assembly*hadoop*.jar 2>/dev/null)
-  else
-    ASSEMBLY_JAR=$(ls "$ASSEMBLY_DIR"/spark-assembly*hadoop*.jar 2>/dev/null)
-  fi
+  assembly_folder="$ASSEMBLY_DIR"
 fi
 
+num_jars=$(ls "$assembly_folder" | grep "spark-assembly.*hadoop.*\.jar" | wc -l)
+if [ "$num_jars" -eq "0" ]; then
+  echo "Failed to find Spark assembly in $assembly_folder"
+  echo "You need to build Spark before running this program."
+  exit 1
+fi
+if [ "$num_jars" -gt "1" ]; then
+  jars_list=$(ls "$assembly_folder" | grep "spark-assembly.*hadoop.*.jar")
+  echo "Found multiple Spark assembly jars in $assembly_folder:"
+  echo "$jars_list"
+  echo "Please remove all but one jar."
+  exit 1
+fi
+
+ASSEMBLY_JAR=$(ls "$assembly_folder"/spark-assembly*hadoop*.jar 2>/dev/null)
+
 # Verify that versions of java used to build the jars and run Spark are compatible
 jar_error_check=$("$JAR_CMD" -tf "$ASSEMBLY_JAR" nonexistent/class/path 2>&1)
 if [[ "$jar_error_check" =~ "invalid CEN header" ]]; then
 
@@ -17,7 +17,7 @@
 # limitations under the License.
 #
 
-# Figure out where the Scala framework is installed
+# Figure out where Spark is installed
 FWDIR="$(cd `dirname $0`/..; pwd)"
 
 # Export this as SPARK_HOME
@@ -45,7 +45,7 @@ fi
 . $FWDIR/bin/load-spark-env.sh
 
 # Figure out which Python executable to use
-if [ -z "$PYSPARK_PYTHON" ] ; then
+if [[ -z "$PYSPARK_PYTHON" ]]; then
   PYSPARK_PYTHON="python"
 fi
 export PYSPARK_PYTHON
@@ -59,7 +59,7 @@ export OLD_PYTHONSTARTUP=$PYTHONSTARTUP
 export PYTHONSTARTUP=$FWDIR/python/pyspark/shell.py
 
 # If IPython options are specified, assume user wants to run IPython
-if [ -n "$IPYTHON_OPTS" ]; then
+if [[ -n "$IPYTHON_OPTS" ]]; then
   IPYTHON=1
 fi
 
@@ -76,6 +76,16 @@ for i in "$@"; do
 done
 export PYSPARK_SUBMIT_ARGS
 
+# For pyspark tests
+if [[ -n "$SPARK_TESTING" ]]; then
+  if [[ -n "$PYSPARK_DOC_TEST" ]]; then
+    exec "$PYSPARK_PYTHON" -m doctest $1
+  else
+    exec "$PYSPARK_PYTHON" $1
+  fi
+  exit
+fi
+
 # If a python file is provided, directly run spark-submit.
 if [[ "$1" =~ \.py$ ]]; then
   echo -e "\nWARNING: Running python applications through ./bin/pyspark is deprecated as of Spark 1.0." 1>&2
 
@@ -23,6 +23,16 @@ FWDIR="$(cd `dirname $0`/..; pwd)"
 export SPARK_HOME="$FWDIR"
 EXAMPLES_DIR="$FWDIR"/examples
 
+if [ -n "$1" ]; then
+  EXAMPLE_CLASS="$1"
+  shift
+else
+  echo "Usage: ./bin/run-example <example-class> [example-args]"
+  echo "  - set MASTER=XX to use a specific master"
+  echo "  - can use abbreviated example class name (e.g. SparkPi, mllib.LinearRegression)"
+  exit 1
+fi
+
 if [ -f "$FWDIR/RELEASE" ]; then
   export SPARK_EXAMPLES_JAR=`ls "$FWDIR"/lib/spark-examples-*hadoop*.jar`
 elif [ -e "$EXAMPLES_DIR"/target/scala-$SCALA_VERSION/spark-examples-*hadoop*.jar ]; then
@@ -37,23 +47,12 @@ fi
 
 EXAMPLE_MASTER=${MASTER:-"local[*]"}
 
-if [ -n "$1" ]; then
-  EXAMPLE_CLASS="$1"
-  shift
-else 
-  echo "usage: ./bin/run-example <example-class> [example-args]" 
-  echo "  - set MASTER=XX to use a specific master"
-  echo "  - can use abbreviated example class name (e.g. SparkPi, mllib.MovieLensALS)"
-  echo
-  exit -1
-fi
-
 if [[ ! $EXAMPLE_CLASS == org.apache.spark.examples* ]]; then
   EXAMPLE_CLASS="org.apache.spark.examples.$EXAMPLE_CLASS"
 fi
 
-./bin/spark-submit \
+"$FWDIR"/bin/spark-submit \
   --master $EXAMPLE_MASTER \
   --class $EXAMPLE_CLASS \
-  $SPARK_EXAMPLES_JAR \
+  "$SPARK_EXAMPLES_JAR" \
   "$@"