Skip to content

Commit 6994827

Browse files
committed
Merge pull request #1 from rxin/openstack
Bring the branch up to date and fixed some documentation typos.
2 parents cca7192 + ac0679e commit 6994827

File tree

464 files changed

+16905
-5833
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

464 files changed

+16905
-5833
lines changed

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
sbt/*.jar
88
.settings
99
.cache
10-
.mima-excludes
10+
.generated-mima*
1111
/build/
1212
work/
1313
out/

.rat-excludes

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ target
33
.project
44
.classpath
55
.mima-excludes
6+
.generated-mima-excludes
67
.rat-excludes
78
.*md
89
derby.log

README.md

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,14 @@ You can find the latest Spark documentation, including a programming
99
guide, on the project webpage at <http://spark.apache.org/documentation.html>.
1010
This README file only contains basic setup instructions.
1111

12-
1312
## Building Spark
1413

1514
Spark is built on Scala 2.10. To build Spark and its example programs, run:
1615

1716
./sbt/sbt assembly
1817

18+
(You do not need to do this if you downloaded a pre-built package.)
19+
1920
## Interactive Scala Shell
2021

2122
The easiest way to start using Spark is through the Scala shell:
@@ -41,9 +42,9 @@ And run the following command, which should also return 1000:
4142
Spark also comes with several sample programs in the `examples` directory.
4243
To run one of them, use `./bin/run-example <class> [params]`. For example:
4344

44-
./bin/run-example org.apache.spark.examples.SparkLR
45+
./bin/run-example SparkPi
4546

46-
will run the Logistic Regression example locally.
47+
will run the Pi example locally.
4748

4849
You can set the MASTER environment variable when running examples to submit
4950
examples to a cluster. This can be a mesos:// or spark:// URL,

assembly/pom.xml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
<parent>
2222
<groupId>org.apache.spark</groupId>
2323
<artifactId>spark-parent</artifactId>
24-
<version>1.0.0-SNAPSHOT</version>
24+
<version>1.1.0-SNAPSHOT</version>
2525
<relativePath>../pom.xml</relativePath>
2626
</parent>
2727

@@ -96,7 +96,7 @@
9696
<filter>
9797
<artifact>*:*</artifact>
9898
<excludes>
99-
<exclude>org.datanucleus:*</exclude>
99+
<exclude>org/datanucleus/**</exclude>
100100
<exclude>META-INF/*.SF</exclude>
101101
<exclude>META-INF/*.DSA</exclude>
102102
<exclude>META-INF/*.RSA</exclude>

bagel/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
<parent>
2222
<groupId>org.apache.spark</groupId>
2323
<artifactId>spark-parent</artifactId>
24-
<version>1.0.0-SNAPSHOT</version>
24+
<version>1.1.0-SNAPSHOT</version>
2525
<relativePath>../pom.xml</relativePath>
2626
</parent>
2727

bagel/src/test/scala/org/apache/spark/bagel/BagelSuite.scala

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,6 @@ class BagelSuite extends FunSuite with Assertions with BeforeAndAfter with Timeo
3838
sc.stop()
3939
sc = null
4040
}
41-
// To avoid Akka rebinding to the same port, since it doesn't unbind immediately on shutdown
42-
System.clearProperty("spark.driver.port")
4341
}
4442

4543
test("halting by voting") {
@@ -82,7 +80,7 @@ class BagelSuite extends FunSuite with Assertions with BeforeAndAfter with Timeo
8280
test("large number of iterations") {
8381
// This tests whether jobs with a large number of iterations finish in a reasonable time,
8482
// because non-memoized recursion in RDD or DAGScheduler used to cause them to hang
85-
failAfter(10 seconds) {
83+
failAfter(30 seconds) {
8684
sc = new SparkContext("local", "test")
8785
val verts = sc.parallelize((1 to 4).map(id => (id.toString, new TestVertex(true, 0))))
8886
val msgs = sc.parallelize(Array[(String, TestMessage)]())
@@ -103,7 +101,7 @@ class BagelSuite extends FunSuite with Assertions with BeforeAndAfter with Timeo
103101
sc = new SparkContext("local", "test")
104102
val verts = sc.parallelize((1 to 4).map(id => (id.toString, new TestVertex(true, 0))))
105103
val msgs = sc.parallelize(Array[(String, TestMessage)]())
106-
val numSupersteps = 50
104+
val numSupersteps = 20
107105
val result =
108106
Bagel.run(sc, verts, msgs, sc.defaultParallelism, StorageLevel.DISK_ONLY) {
109107
(self: TestVertex, msgs: Option[Array[TestMessage]], superstep: Int) =>

bin/compute-classpath.cmd

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,13 @@ rem
2020
rem This script computes Spark's classpath and prints it to stdout; it's used by both the "run"
2121
rem script and the ExecutorRunner in standalone cluster mode.
2222

23+
rem If we're called from spark-class2.cmd, it already set enabledelayedexpansion and setting
24+
rem it here would stop us from affecting its copy of the CLASSPATH variable; otherwise we
25+
rem need to set it here because we use !datanucleus_jars! below.
26+
if "%DONT_PRINT_CLASSPATH%"=="1" goto skip_delayed_expansion
27+
setlocal enabledelayedexpansion
28+
:skip_delayed_expansion
29+
2330
set SCALA_VERSION=2.10
2431

2532
rem Figure out where the Spark framework is installed
@@ -31,7 +38,7 @@ if exist "%FWDIR%conf\spark-env.cmd" call "%FWDIR%conf\spark-env.cmd"
3138
rem Build up classpath
3239
set CLASSPATH=%FWDIR%conf
3340
if exist "%FWDIR%RELEASE" (
34-
for %%d in ("%FWDIR%jars\spark-assembly*.jar") do (
41+
for %%d in ("%FWDIR%lib\spark-assembly*.jar") do (
3542
set ASSEMBLY_JAR=%%d
3643
)
3744
) else (
@@ -42,6 +49,21 @@ if exist "%FWDIR%RELEASE" (
4249

4350
set CLASSPATH=%CLASSPATH%;%ASSEMBLY_JAR%
4451

52+
rem When Hive support is needed, Datanucleus jars must be included on the classpath.
53+
rem Datanucleus jars do not work if only included in the uber jar as plugin.xml metadata is lost.
54+
rem Both sbt and maven will populate "lib_managed/jars/" with the datanucleus jars when Spark is
55+
rem built with Hive, so look for them there.
56+
if exist "%FWDIR%RELEASE" (
57+
set datanucleus_dir=%FWDIR%lib
58+
) else (
59+
set datanucleus_dir=%FWDIR%lib_managed\jars
60+
)
61+
set "datanucleus_jars="
62+
for %%d in ("%datanucleus_dir%\datanucleus-*.jar") do (
63+
set datanucleus_jars=!datanucleus_jars!;%%d
64+
)
65+
set CLASSPATH=%CLASSPATH%;%datanucleus_jars%
66+
4567
set SPARK_CLASSES=%FWDIR%core\target\scala-%SCALA_VERSION%\classes
4668
set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%repl\target\scala-%SCALA_VERSION%\classes
4769
set SPARK_CLASSES=%SPARK_CLASSES%;%FWDIR%mllib\target\scala-%SCALA_VERSION%\classes

bin/compute-classpath.sh

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,10 @@ else
3838
JAR_CMD="jar"
3939
fi
4040

41-
# First check if we have a dependencies jar. If so, include binary classes with the deps jar
42-
if [ -f "$ASSEMBLY_DIR"/spark-assembly*hadoop*-deps.jar ]; then
41+
# A developer option to prepend more recently compiled Spark classes
42+
if [ -n "$SPARK_PREPEND_CLASSES" ]; then
43+
echo "NOTE: SPARK_PREPEND_CLASSES is set, placing locally compiled Spark"\
44+
"classes ahead of assembly." >&2
4345
CLASSPATH="$CLASSPATH:$FWDIR/core/target/scala-$SCALA_VERSION/classes"
4446
CLASSPATH="$CLASSPATH:$FWDIR/repl/target/scala-$SCALA_VERSION/classes"
4547
CLASSPATH="$CLASSPATH:$FWDIR/mllib/target/scala-$SCALA_VERSION/classes"
@@ -51,17 +53,31 @@ if [ -f "$ASSEMBLY_DIR"/spark-assembly*hadoop*-deps.jar ]; then
5153
CLASSPATH="$CLASSPATH:$FWDIR/sql/core/target/scala-$SCALA_VERSION/classes"
5254
CLASSPATH="$CLASSPATH:$FWDIR/sql/hive/target/scala-$SCALA_VERSION/classes"
5355
CLASSPATH="$CLASSPATH:$FWDIR/yarn/stable/target/scala-$SCALA_VERSION/classes"
56+
fi
5457

55-
ASSEMBLY_JAR=$(ls "$ASSEMBLY_DIR"/spark-assembly*hadoop*-deps.jar 2>/dev/null)
58+
# Use spark-assembly jar from either RELEASE or assembly directory
59+
if [ -f "$FWDIR/RELEASE" ]; then
60+
assembly_folder="$FWDIR"/lib
5661
else
57-
# Else use spark-assembly jar from either RELEASE or assembly directory
58-
if [ -f "$FWDIR/RELEASE" ]; then
59-
ASSEMBLY_JAR=$(ls "$FWDIR"/lib/spark-assembly*hadoop*.jar 2>/dev/null)
60-
else
61-
ASSEMBLY_JAR=$(ls "$ASSEMBLY_DIR"/spark-assembly*hadoop*.jar 2>/dev/null)
62-
fi
62+
assembly_folder="$ASSEMBLY_DIR"
6363
fi
6464

65+
num_jars=$(ls "$assembly_folder" | grep "spark-assembly.*hadoop.*\.jar" | wc -l)
66+
if [ "$num_jars" -eq "0" ]; then
67+
echo "Failed to find Spark assembly in $assembly_folder"
68+
echo "You need to build Spark before running this program."
69+
exit 1
70+
fi
71+
if [ "$num_jars" -gt "1" ]; then
72+
jars_list=$(ls "$assembly_folder" | grep "spark-assembly.*hadoop.*.jar")
73+
echo "Found multiple Spark assembly jars in $assembly_folder:"
74+
echo "$jars_list"
75+
echo "Please remove all but one jar."
76+
exit 1
77+
fi
78+
79+
ASSEMBLY_JAR=$(ls "$assembly_folder"/spark-assembly*hadoop*.jar 2>/dev/null)
80+
6581
# Verify that versions of java used to build the jars and run Spark are compatible
6682
jar_error_check=$("$JAR_CMD" -tf "$ASSEMBLY_JAR" nonexistent/class/path 2>&1)
6783
if [[ "$jar_error_check" =~ "invalid CEN header" ]]; then

bin/pyspark

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
# limitations under the License.
1818
#
1919

20-
# Figure out where the Scala framework is installed
20+
# Figure out where Spark is installed
2121
FWDIR="$(cd `dirname $0`/..; pwd)"
2222

2323
# Export this as SPARK_HOME
@@ -45,7 +45,7 @@ fi
4545
. $FWDIR/bin/load-spark-env.sh
4646

4747
# Figure out which Python executable to use
48-
if [ -z "$PYSPARK_PYTHON" ] ; then
48+
if [[ -z "$PYSPARK_PYTHON" ]]; then
4949
PYSPARK_PYTHON="python"
5050
fi
5151
export PYSPARK_PYTHON
@@ -59,7 +59,7 @@ export OLD_PYTHONSTARTUP=$PYTHONSTARTUP
5959
export PYTHONSTARTUP=$FWDIR/python/pyspark/shell.py
6060

6161
# If IPython options are specified, assume user wants to run IPython
62-
if [ -n "$IPYTHON_OPTS" ]; then
62+
if [[ -n "$IPYTHON_OPTS" ]]; then
6363
IPYTHON=1
6464
fi
6565

@@ -76,6 +76,16 @@ for i in "$@"; do
7676
done
7777
export PYSPARK_SUBMIT_ARGS
7878

79+
# For pyspark tests
80+
if [[ -n "$SPARK_TESTING" ]]; then
81+
if [[ -n "$PYSPARK_DOC_TEST" ]]; then
82+
exec "$PYSPARK_PYTHON" -m doctest $1
83+
else
84+
exec "$PYSPARK_PYTHON" $1
85+
fi
86+
exit
87+
fi
88+
7989
# If a python file is provided, directly run spark-submit.
8090
if [[ "$1" =~ \.py$ ]]; then
8191
echo -e "\nWARNING: Running python applications through ./bin/pyspark is deprecated as of Spark 1.0." 1>&2

bin/run-example

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,16 @@ FWDIR="$(cd `dirname $0`/..; pwd)"
2323
export SPARK_HOME="$FWDIR"
2424
EXAMPLES_DIR="$FWDIR"/examples
2525

26+
if [ -n "$1" ]; then
27+
EXAMPLE_CLASS="$1"
28+
shift
29+
else
30+
echo "Usage: ./bin/run-example <example-class> [example-args]"
31+
echo " - set MASTER=XX to use a specific master"
32+
echo " - can use abbreviated example class name (e.g. SparkPi, mllib.LinearRegression)"
33+
exit 1
34+
fi
35+
2636
if [ -f "$FWDIR/RELEASE" ]; then
2737
export SPARK_EXAMPLES_JAR=`ls "$FWDIR"/lib/spark-examples-*hadoop*.jar`
2838
elif [ -e "$EXAMPLES_DIR"/target/scala-$SCALA_VERSION/spark-examples-*hadoop*.jar ]; then
@@ -37,23 +47,12 @@ fi
3747

3848
EXAMPLE_MASTER=${MASTER:-"local[*]"}
3949

40-
if [ -n "$1" ]; then
41-
EXAMPLE_CLASS="$1"
42-
shift
43-
else
44-
echo "usage: ./bin/run-example <example-class> [example-args]"
45-
echo " - set MASTER=XX to use a specific master"
46-
echo " - can use abbreviated example class name (e.g. SparkPi, mllib.MovieLensALS)"
47-
echo
48-
exit -1
49-
fi
50-
5150
if [[ ! $EXAMPLE_CLASS == org.apache.spark.examples* ]]; then
5251
EXAMPLE_CLASS="org.apache.spark.examples.$EXAMPLE_CLASS"
5352
fi
5453

55-
./bin/spark-submit \
54+
"$FWDIR"/bin/spark-submit \
5655
--master $EXAMPLE_MASTER \
5756
--class $EXAMPLE_CLASS \
58-
$SPARK_EXAMPLES_JAR \
57+
"$SPARK_EXAMPLES_JAR" \
5958
"$@"

0 commit comments

Comments
 (0)