Skip to content

Commit 34a49d5

Browse files
committed
Merge remote-tracking branch 'upstream/master' into broadcast-on-executors
2 parents 1b499d1 + 3476390 commit 34a49d5

File tree

2,265 files changed

+110826
-37114
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

2,265 files changed

+110826
-37114
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ dependency-reduced-pom.xml
4242
derby.log
4343
dev/create-release/*final
4444
dev/create-release/*txt
45+
dev/pr-deps/
4546
dist/
4647
docs/_site
4748
docs/api

.travis.yml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@ dist: trusty
2828
# 2. Choose language and target JDKs for parallel builds.
2929
language: java
3030
jdk:
31-
- oraclejdk7
3231
- oraclejdk8
3332

3433
# 3. Setup cache directory for SBT and Maven.
@@ -44,7 +43,7 @@ notifications:
4443
# 5. Run maven install before running lint-java.
4544
install:
4645
- export MAVEN_SKIP_RC=1
47-
- build/mvn -T 4 -q -DskipTests -Pmesos -Pyarn -Phadoop-2.3 -Pkinesis-asl -Phive -Phive-thriftserver install
46+
- build/mvn -T 4 -q -DskipTests -Pmesos -Pyarn -Pkinesis-asl -Phive -Phive-thriftserver install
4847

4948
# 6. Run lint-java.
5049
script:

LICENSE

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -249,11 +249,11 @@ The text of each license is also included at licenses/LICENSE-[project].txt.
249249
(Interpreter classes (all .scala files in repl/src/main/scala
250250
except for Main.Scala, SparkHelper.scala and ExecutorClassLoader.scala),
251251
and for SerializableMapWrapper in JavaUtils.scala)
252-
(BSD-like) Scala Actors library (org.scala-lang:scala-actors:2.11.7 - http://www.scala-lang.org/)
253-
(BSD-like) Scala Compiler (org.scala-lang:scala-compiler:2.11.7 - http://www.scala-lang.org/)
254-
(BSD-like) Scala Compiler (org.scala-lang:scala-reflect:2.11.7 - http://www.scala-lang.org/)
255-
(BSD-like) Scala Library (org.scala-lang:scala-library:2.11.7 - http://www.scala-lang.org/)
256-
(BSD-like) Scalap (org.scala-lang:scalap:2.11.7 - http://www.scala-lang.org/)
252+
(BSD-like) Scala Actors library (org.scala-lang:scala-actors:2.11.8 - http://www.scala-lang.org/)
253+
(BSD-like) Scala Compiler (org.scala-lang:scala-compiler:2.11.8 - http://www.scala-lang.org/)
254+
(BSD-like) Scala Compiler (org.scala-lang:scala-reflect:2.11.8 - http://www.scala-lang.org/)
255+
(BSD-like) Scala Library (org.scala-lang:scala-library:2.11.8 - http://www.scala-lang.org/)
256+
(BSD-like) Scalap (org.scala-lang:scalap:2.11.8 - http://www.scala-lang.org/)
257257
(BSD-style) scalacheck (org.scalacheck:scalacheck_2.11:1.10.0 - http://www.scalacheck.org)
258258
(BSD-style) spire (org.spire-math:spire_2.11:0.7.1 - http://spire-math.org)
259259
(BSD-style) spire-macros (org.spire-math:spire-macros_2.11:0.7.1 - http://spire-math.org)
@@ -297,3 +297,4 @@ The text of each license is also included at licenses/LICENSE-[project].txt.
297297
(MIT License) RowsGroup (http://datatables.net/license/mit)
298298
(MIT License) jsonFormatter (http://www.jqueryscript.net/other/jQuery-Plugin-For-Pretty-JSON-Formatting-jsonFormatter.html)
299299
(MIT License) modernizr (https://github.com/Modernizr/Modernizr/blob/master/LICENSE)
300+
(MIT License) machinist (https://github.com/typelevel/machinist)

R/WINDOWS.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ To build SparkR on Windows, the following steps are required
66
include Rtools and R in `PATH`.
77

88
2. Install
9-
[JDK7](http://www.oracle.com/technetwork/java/javase/downloads/jdk7-downloads-1880260.html) and set
9+
[JDK8](http://www.oracle.com/technetwork/java/javase/downloads/jdk8-downloads-2133151.html) and set
1010
`JAVA_HOME` in the system environment variables.
1111

1212
3. Download and install [Maven](http://maven.apache.org/download.html). Also include the `bin`
@@ -38,6 +38,6 @@ To run the SparkR unit tests on Windows, the following steps are required —ass
3838

3939
```
4040
R -e "install.packages('testthat', repos='http://cran.us.r-project.org')"
41-
.\bin\spark-submit2.cmd --conf spark.hadoop.fs.default.name="file:///" R\pkg\tests\run-all.R
41+
.\bin\spark-submit2.cmd --conf spark.hadoop.fs.defaultFS="file:///" R\pkg\tests\run-all.R
4242
```
4343

R/check-cran.sh

Lines changed: 10 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -20,29 +20,18 @@
2020
set -o pipefail
2121
set -e
2222

23-
FWDIR="$(cd `dirname $0`; pwd)"
24-
pushd $FWDIR > /dev/null
23+
FWDIR="$(cd "`dirname "${BASH_SOURCE[0]}"`"; pwd)"
24+
pushd "$FWDIR" > /dev/null
2525

26-
if [ ! -z "$R_HOME" ]
27-
then
28-
R_SCRIPT_PATH="$R_HOME/bin"
29-
else
30-
# if system wide R_HOME is not found, then exit
31-
if [ ! `command -v R` ]; then
32-
echo "Cannot find 'R_HOME'. Please specify 'R_HOME' or make sure R is properly installed."
33-
exit 1
34-
fi
35-
R_SCRIPT_PATH="$(dirname $(which R))"
36-
fi
37-
echo "Using R_SCRIPT_PATH = ${R_SCRIPT_PATH}"
26+
. "$FWDIR/find-r.sh"
3827

3928
# Install the package (this is required for code in vignettes to run when building it later)
4029
# Build the latest docs, but not vignettes, which is built with the package next
41-
$FWDIR/create-docs.sh
30+
. "$FWDIR/install-dev.sh"
4231

4332
# Build source package with vignettes
4433
SPARK_HOME="$(cd "${FWDIR}"/..; pwd)"
45-
. "${SPARK_HOME}"/bin/load-spark-env.sh
34+
. "${SPARK_HOME}/bin/load-spark-env.sh"
4635
if [ -f "${SPARK_HOME}/RELEASE" ]; then
4736
SPARK_JARS_DIR="${SPARK_HOME}/jars"
4837
else
@@ -51,16 +40,16 @@ fi
5140

5241
if [ -d "$SPARK_JARS_DIR" ]; then
5342
# Build a zip file containing the source package with vignettes
54-
SPARK_HOME="${SPARK_HOME}" "$R_SCRIPT_PATH/"R CMD build $FWDIR/pkg
43+
SPARK_HOME="${SPARK_HOME}" "$R_SCRIPT_PATH/R" CMD build "$FWDIR/pkg"
5544

5645
find pkg/vignettes/. -not -name '.' -not -name '*.Rmd' -not -name '*.md' -not -name '*.pdf' -not -name '*.html' -delete
5746
else
58-
echo "Error Spark JARs not found in $SPARK_HOME"
47+
echo "Error Spark JARs not found in '$SPARK_HOME'"
5948
exit 1
6049
fi
6150

6251
# Run check as-cran.
63-
VERSION=`grep Version $FWDIR/pkg/DESCRIPTION | awk '{print $NF}'`
52+
VERSION=`grep Version "$FWDIR/pkg/DESCRIPTION" | awk '{print $NF}'`
6453

6554
CRAN_CHECK_OPTIONS="--as-cran"
6655

@@ -78,25 +67,10 @@ echo "Running CRAN check with $CRAN_CHECK_OPTIONS options"
7867

7968
if [ -n "$NO_TESTS" ] && [ -n "$NO_MANUAL" ]
8069
then
81-
"$R_SCRIPT_PATH/"R CMD check $CRAN_CHECK_OPTIONS SparkR_"$VERSION".tar.gz
70+
"$R_SCRIPT_PATH/R" CMD check $CRAN_CHECK_OPTIONS "SparkR_$VERSION.tar.gz"
8271
else
8372
# This will run tests and/or build vignettes, and require SPARK_HOME
84-
SPARK_HOME="${SPARK_HOME}" "$R_SCRIPT_PATH/"R CMD check $CRAN_CHECK_OPTIONS SparkR_"$VERSION".tar.gz
85-
fi
86-
87-
# Install source package to get it to generate vignettes rds files, etc.
88-
if [ -n "$CLEAN_INSTALL" ]
89-
then
90-
echo "Removing lib path and installing from source package"
91-
LIB_DIR="$FWDIR/lib"
92-
rm -rf $LIB_DIR
93-
mkdir -p $LIB_DIR
94-
"$R_SCRIPT_PATH/"R CMD INSTALL SparkR_"$VERSION".tar.gz --library=$LIB_DIR
95-
96-
# Zip the SparkR package so that it can be distributed to worker nodes on YARN
97-
pushd $LIB_DIR > /dev/null
98-
jar cfM "$LIB_DIR/sparkr.zip" SparkR
99-
popd > /dev/null
73+
SPARK_HOME="${SPARK_HOME}" "$R_SCRIPT_PATH/R" CMD check $CRAN_CHECK_OPTIONS "SparkR_$VERSION.tar.gz"
10074
fi
10175

10276
popd > /dev/null

R/create-docs.sh

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -29,26 +29,27 @@ set -o pipefail
2929
set -e
3030

3131
# Figure out where the script is
32-
export FWDIR="$(cd "`dirname "$0"`"; pwd)"
33-
export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
32+
export FWDIR="$(cd "`dirname "${BASH_SOURCE[0]}"`"; pwd)"
33+
export SPARK_HOME="$(cd "`dirname "${BASH_SOURCE[0]}"`"/..; pwd)"
3434

3535
# Required for setting SPARK_SCALA_VERSION
36-
. "${SPARK_HOME}"/bin/load-spark-env.sh
36+
. "${SPARK_HOME}/bin/load-spark-env.sh"
3737

3838
echo "Using Scala $SPARK_SCALA_VERSION"
3939

40-
pushd $FWDIR
40+
pushd "$FWDIR" > /dev/null
41+
. "$FWDIR/find-r.sh"
4142

4243
# Install the package (this will also generate the Rd files)
43-
./install-dev.sh
44+
. "$FWDIR/install-dev.sh"
4445

4546
# Now create HTML files
4647

4748
# knit_rd puts html in current working directory
4849
mkdir -p pkg/html
4950
pushd pkg/html
5051

51-
Rscript -e 'libDir <- "../../lib"; library(SparkR, lib.loc=libDir); library(knitr); knit_rd("SparkR", links = tools::findHTMLlinks(paste(libDir, "SparkR", sep="/")))'
52+
"$R_SCRIPT_PATH/Rscript" -e 'libDir <- "../../lib"; library(SparkR, lib.loc=libDir); library(knitr); knit_rd("SparkR", links = tools::findHTMLlinks(paste(libDir, "SparkR", sep="/")))'
5253

5354
popd
5455

R/create-rd.sh

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
#!/bin/bash
2+
3+
#
4+
# Licensed to the Apache Software Foundation (ASF) under one or more
5+
# contributor license agreements. See the NOTICE file distributed with
6+
# this work for additional information regarding copyright ownership.
7+
# The ASF licenses this file to You under the Apache License, Version 2.0
8+
# (the "License"); you may not use this file except in compliance with
9+
# the License. You may obtain a copy of the License at
10+
#
11+
# http://www.apache.org/licenses/LICENSE-2.0
12+
#
13+
# Unless required by applicable law or agreed to in writing, software
14+
# distributed under the License is distributed on an "AS IS" BASIS,
15+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16+
# See the License for the specific language governing permissions and
17+
# limitations under the License.
18+
#
19+
20+
# This scripts packages the SparkR source files (R and C files) and
21+
# creates a package that can be loaded in R. The package is by default installed to
22+
# $FWDIR/lib and the package can be loaded by using the following command in R:
23+
#
24+
# library(SparkR, lib.loc="$FWDIR/lib")
25+
#
26+
# NOTE(shivaram): Right now we use $SPARK_HOME/R/lib to be the installation directory
27+
# to load the SparkR package on the worker nodes.
28+
29+
set -o pipefail
30+
set -e
31+
32+
FWDIR="$(cd "`dirname "${BASH_SOURCE[0]}"`"; pwd)"
33+
pushd "$FWDIR" > /dev/null
34+
. "$FWDIR/find-r.sh"
35+
36+
# Generate Rd files if devtools is installed
37+
"$R_SCRIPT_PATH/Rscript" -e ' if("devtools" %in% rownames(installed.packages())) { library(devtools); devtools::document(pkg="./pkg", roclets=c("rd")) }'

R/find-r.sh

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
#!/bin/bash
2+
3+
#
4+
# Licensed to the Apache Software Foundation (ASF) under one or more
5+
# contributor license agreements. See the NOTICE file distributed with
6+
# this work for additional information regarding copyright ownership.
7+
# The ASF licenses this file to You under the Apache License, Version 2.0
8+
# (the "License"); you may not use this file except in compliance with
9+
# the License. You may obtain a copy of the License at
10+
#
11+
# http://www.apache.org/licenses/LICENSE-2.0
12+
#
13+
# Unless required by applicable law or agreed to in writing, software
14+
# distributed under the License is distributed on an "AS IS" BASIS,
15+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16+
# See the License for the specific language governing permissions and
17+
# limitations under the License.
18+
#
19+
20+
if [ -z "$R_SCRIPT_PATH" ]
21+
then
22+
if [ ! -z "$R_HOME" ]
23+
then
24+
R_SCRIPT_PATH="$R_HOME/bin"
25+
else
26+
# if system wide R_HOME is not found, then exit
27+
if [ ! `command -v R` ]; then
28+
echo "Cannot find 'R_HOME'. Please specify 'R_HOME' or make sure R is properly installed."
29+
exit 1
30+
fi
31+
R_SCRIPT_PATH="$(dirname $(which R))"
32+
fi
33+
echo "Using R_SCRIPT_PATH = ${R_SCRIPT_PATH}"
34+
fi

R/install-dev.sh

Lines changed: 9 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -29,33 +29,21 @@
2929
set -o pipefail
3030
set -e
3131

32-
FWDIR="$(cd `dirname $0`; pwd)"
32+
FWDIR="$(cd "`dirname "${BASH_SOURCE[0]}"`"; pwd)"
3333
LIB_DIR="$FWDIR/lib"
3434

35-
mkdir -p $LIB_DIR
36-
37-
pushd $FWDIR > /dev/null
38-
if [ ! -z "$R_HOME" ]
39-
then
40-
R_SCRIPT_PATH="$R_HOME/bin"
41-
else
42-
# if system wide R_HOME is not found, then exit
43-
if [ ! `command -v R` ]; then
44-
echo "Cannot find 'R_HOME'. Please specify 'R_HOME' or make sure R is properly installed."
45-
exit 1
46-
fi
47-
R_SCRIPT_PATH="$(dirname $(which R))"
48-
fi
49-
echo "Using R_SCRIPT_PATH = ${R_SCRIPT_PATH}"
50-
51-
# Generate Rd files if devtools is installed
52-
"$R_SCRIPT_PATH/"Rscript -e ' if("devtools" %in% rownames(installed.packages())) { library(devtools); devtools::document(pkg="./pkg", roclets=c("rd")) }'
35+
mkdir -p "$LIB_DIR"
36+
37+
pushd "$FWDIR" > /dev/null
38+
. "$FWDIR/find-r.sh"
39+
40+
. "$FWDIR/create-rd.sh"
5341

5442
# Install SparkR to $LIB_DIR
55-
"$R_SCRIPT_PATH/"R CMD INSTALL --library=$LIB_DIR $FWDIR/pkg/
43+
"$R_SCRIPT_PATH/R" CMD INSTALL --library="$LIB_DIR" "$FWDIR/pkg/"
5644

5745
# Zip the SparkR package so that it can be distributed to worker nodes on YARN
58-
cd $LIB_DIR
46+
cd "$LIB_DIR"
5947
jar cfM "$LIB_DIR/sparkr.zip" SparkR
6048

6149
popd > /dev/null

R/install-source-package.sh

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
#!/bin/bash
2+
3+
#
4+
# Licensed to the Apache Software Foundation (ASF) under one or more
5+
# contributor license agreements. See the NOTICE file distributed with
6+
# this work for additional information regarding copyright ownership.
7+
# The ASF licenses this file to You under the Apache License, Version 2.0
8+
# (the "License"); you may not use this file except in compliance with
9+
# the License. You may obtain a copy of the License at
10+
#
11+
# http://www.apache.org/licenses/LICENSE-2.0
12+
#
13+
# Unless required by applicable law or agreed to in writing, software
14+
# distributed under the License is distributed on an "AS IS" BASIS,
15+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16+
# See the License for the specific language governing permissions and
17+
# limitations under the License.
18+
#
19+
20+
# This scripts packages the SparkR source files (R and C files) and
21+
# creates a package that can be loaded in R. The package is by default installed to
22+
# $FWDIR/lib and the package can be loaded by using the following command in R:
23+
#
24+
# library(SparkR, lib.loc="$FWDIR/lib")
25+
#
26+
# NOTE(shivaram): Right now we use $SPARK_HOME/R/lib to be the installation directory
27+
# to load the SparkR package on the worker nodes.
28+
29+
set -o pipefail
30+
set -e
31+
32+
FWDIR="$(cd "`dirname "${BASH_SOURCE[0]}"`"; pwd)"
33+
pushd "$FWDIR" > /dev/null
34+
. "$FWDIR/find-r.sh"
35+
36+
if [ -z "$VERSION" ]; then
37+
VERSION=`grep Version "$FWDIR/pkg/DESCRIPTION" | awk '{print $NF}'`
38+
fi
39+
40+
if [ ! -f "$FWDIR/SparkR_$VERSION.tar.gz" ]; then
41+
echo -e "R source package file '$FWDIR/SparkR_$VERSION.tar.gz' is not found."
42+
echo -e "Please build R source package with check-cran.sh"
43+
exit -1;
44+
fi
45+
46+
echo "Removing lib path and installing from source package"
47+
LIB_DIR="$FWDIR/lib"
48+
rm -rf "$LIB_DIR"
49+
mkdir -p "$LIB_DIR"
50+
"$R_SCRIPT_PATH/R" CMD INSTALL "SparkR_$VERSION.tar.gz" --library="$LIB_DIR"
51+
52+
# Zip the SparkR package so that it can be distributed to worker nodes on YARN
53+
pushd "$LIB_DIR" > /dev/null
54+
jar cfM "$LIB_DIR/sparkr.zip" SparkR
55+
popd > /dev/null
56+
57+
popd

0 commit comments

Comments
 (0)