Skip to content

Commit b495b7c

Browse files
author
Marcelo Vanzin
committed
Merge branch 'master' into bm-event-tstamp
Conflicts: core/src/test/scala/org/apache/spark/ui/storage/StorageTabSuite.scala core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala
2 parents 7d2fe9e + c9f7439 commit b495b7c

File tree

473 files changed

+16174
-6310
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

473 files changed

+16174
-6310
lines changed

.rat-excludes

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ log4j-defaults.properties
2525
bootstrap-tooltip.js
2626
jquery-1.11.1.min.js
2727
sorttable.js
28+
.*avsc
2829
.*txt
2930
.*json
3031
.*data

README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,11 @@ If your project is built with Maven, add this to your POM file's `<dependencies>
115115
</dependency>
116116

117117

118+
## A Note About Thrift JDBC server and CLI for Spark SQL
119+
120+
Spark SQL supports Thrift JDBC server and CLI.
121+
See sql-programming-guide.md for more information about using the JDBC server.
122+
118123
## Configuration
119124

120125
Please refer to the [Configuration guide](http://spark.apache.org/docs/latest/configuration.html)

assembly/pom.xml

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,12 @@
4343
</properties>
4444

4545
<dependencies>
46+
<!-- Promote Guava to compile scope in this module so it's included while shading. -->
47+
<dependency>
48+
<groupId>com.google.guava</groupId>
49+
<artifactId>guava</artifactId>
50+
<scope>compile</scope>
51+
</dependency>
4652
<dependency>
4753
<groupId>org.apache.spark</groupId>
4854
<artifactId>spark-core_${scala.binary.version}</artifactId>
@@ -113,6 +119,18 @@
113119
<goal>shade</goal>
114120
</goals>
115121
<configuration>
122+
<relocations>
123+
<relocation>
124+
<pattern>com.google</pattern>
125+
<shadedPattern>org.spark-project.guava</shadedPattern>
126+
<includes>
127+
<include>com.google.common.**</include>
128+
</includes>
129+
<excludes>
130+
<exclude>com.google.common.base.Optional**</exclude>
131+
</excludes>
132+
</relocation>
133+
</relocations>
116134
<transformers>
117135
<transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer" />
118136
<transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
@@ -163,11 +181,6 @@
163181
<artifactId>spark-hive_${scala.binary.version}</artifactId>
164182
<version>${project.version}</version>
165183
</dependency>
166-
</dependencies>
167-
</profile>
168-
<profile>
169-
<id>hive-thriftserver</id>
170-
<dependencies>
171184
<dependency>
172185
<groupId>org.apache.spark</groupId>
173186
<artifactId>spark-hive-thriftserver_${scala.binary.version}</artifactId>

bin/beeline

Lines changed: 7 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -17,29 +17,14 @@
1717
# limitations under the License.
1818
#
1919

20-
# Figure out where Spark is installed
21-
FWDIR="$(cd `dirname $0`/..; pwd)"
20+
#
21+
# Shell script for starting BeeLine
2222

23-
# Find the java binary
24-
if [ -n "${JAVA_HOME}" ]; then
25-
RUNNER="${JAVA_HOME}/bin/java"
26-
else
27-
if [ `command -v java` ]; then
28-
RUNNER="java"
29-
else
30-
echo "JAVA_HOME is not set" >&2
31-
exit 1
32-
fi
33-
fi
23+
# Enter posix mode for bash
24+
set -o posix
3425

35-
# Compute classpath using external script
36-
classpath_output=$($FWDIR/bin/compute-classpath.sh)
37-
if [[ "$?" != "0" ]]; then
38-
echo "$classpath_output"
39-
exit 1
40-
else
41-
CLASSPATH=$classpath_output
42-
fi
26+
# Figure out where Spark is installed
27+
FWDIR="$(cd `dirname $0`/..; pwd)"
4328

4429
CLASS="org.apache.hive.beeline.BeeLine"
45-
exec "$RUNNER" -cp "$CLASSPATH" $CLASS "$@"
30+
exec "$FWDIR/bin/spark-class" $CLASS "$@"

bin/pyspark

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,18 @@ FWDIR="$(cd `dirname $0`/..; pwd)"
2323
# Export this as SPARK_HOME
2424
export SPARK_HOME="$FWDIR"
2525

26+
source $FWDIR/bin/utils.sh
27+
2628
SCALA_VERSION=2.10
2729

28-
if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
30+
function usage() {
2931
echo "Usage: ./bin/pyspark [options]" 1>&2
3032
$FWDIR/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
3133
exit 0
34+
}
35+
36+
if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
37+
usage
3238
fi
3339

3440
# Exit if the user hasn't compiled Spark
@@ -66,10 +72,11 @@ fi
6672
# Build up arguments list manually to preserve quotes and backslashes.
6773
# We export Spark submit arguments as an environment variable because shell.py must run as a
6874
# PYTHONSTARTUP script, which does not take in arguments. This is required for IPython notebooks.
69-
75+
SUBMIT_USAGE_FUNCTION=usage
76+
gatherSparkSubmitOpts "$@"
7077
PYSPARK_SUBMIT_ARGS=""
7178
whitespace="[[:space:]]"
72-
for i in "$@"; do
79+
for i in "${SUBMISSION_OPTS[@]}"; do
7380
if [[ $i =~ \" ]]; then i=$(echo $i | sed 's/\"/\\\"/g'); fi
7481
if [[ $i =~ $whitespace ]]; then i=\"$i\"; fi
7582
PYSPARK_SUBMIT_ARGS="$PYSPARK_SUBMIT_ARGS $i"
@@ -90,7 +97,10 @@ fi
9097
if [[ "$1" =~ \.py$ ]]; then
9198
echo -e "\nWARNING: Running python applications through ./bin/pyspark is deprecated as of Spark 1.0." 1>&2
9299
echo -e "Use ./bin/spark-submit <python file>\n" 1>&2
93-
exec $FWDIR/bin/spark-submit "$@"
100+
primary=$1
101+
shift
102+
gatherSparkSubmitOpts "$@"
103+
exec $FWDIR/bin/spark-submit "${SUBMISSION_OPTS[@]}" $primary "${APPLICATION_OPTS[@]}"
94104
else
95105
# Only use ipython if no command line arguments were provided [SPARK-1134]
96106
if [[ "$IPYTHON" = "1" ]]; then

bin/spark-class

Lines changed: 38 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
# limitations under the License.
1818
#
1919

20+
# NOTE: Any changes to this file must be reflected in SparkSubmitDriverBootstrapper.scala!
21+
2022
cygwin=false
2123
case "`uname`" in
2224
CYGWIN*) cygwin=true;;
@@ -39,7 +41,7 @@ fi
3941

4042
if [ -n "$SPARK_MEM" ]; then
4143
echo -e "Warning: SPARK_MEM is deprecated, please use a more specific config option" 1>&2
42-
echo -e "(e.g., spark.executor.memory or SPARK_DRIVER_MEMORY)." 1>&2
44+
echo -e "(e.g., spark.executor.memory or spark.driver.memory)." 1>&2
4345
fi
4446

4547
# Use SPARK_MEM or 512m as the default memory, to be overridden by specific options
@@ -73,11 +75,17 @@ case "$1" in
7375
OUR_JAVA_MEM=${SPARK_EXECUTOR_MEMORY:-$DEFAULT_MEM}
7476
;;
7577

76-
# Spark submit uses SPARK_SUBMIT_OPTS and SPARK_JAVA_OPTS
77-
'org.apache.spark.deploy.SparkSubmit')
78-
OUR_JAVA_OPTS="$SPARK_JAVA_OPTS $SPARK_SUBMIT_OPTS \
79-
-Djava.library.path=$SPARK_SUBMIT_LIBRARY_PATH"
78+
# Spark submit uses SPARK_JAVA_OPTS + SPARK_SUBMIT_OPTS +
79+
# SPARK_DRIVER_MEMORY + SPARK_SUBMIT_DRIVER_MEMORY.
80+
'org.apache.spark.deploy.SparkSubmit')
81+
OUR_JAVA_OPTS="$SPARK_JAVA_OPTS $SPARK_SUBMIT_OPTS"
8082
OUR_JAVA_MEM=${SPARK_DRIVER_MEMORY:-$DEFAULT_MEM}
83+
if [ -n "$SPARK_SUBMIT_LIBRARY_PATH" ]; then
84+
OUR_JAVA_OPTS="$OUR_JAVA_OPTS -Djava.library.path=$SPARK_SUBMIT_LIBRARY_PATH"
85+
fi
86+
if [ -n "$SPARK_SUBMIT_DRIVER_MEMORY" ]; then
87+
OUR_JAVA_MEM="$SPARK_SUBMIT_DRIVER_MEMORY"
88+
fi
8189
;;
8290

8391
*)
@@ -101,11 +109,12 @@ fi
101109
# Set JAVA_OPTS to be able to load native libraries and to set heap size
102110
JAVA_OPTS="-XX:MaxPermSize=128m $OUR_JAVA_OPTS"
103111
JAVA_OPTS="$JAVA_OPTS -Xms$OUR_JAVA_MEM -Xmx$OUR_JAVA_MEM"
112+
104113
# Load extra JAVA_OPTS from conf/java-opts, if it exists
105114
if [ -e "$FWDIR/conf/java-opts" ] ; then
106115
JAVA_OPTS="$JAVA_OPTS `cat $FWDIR/conf/java-opts`"
107116
fi
108-
export JAVA_OPTS
117+
109118
# Attention: when changing the way the JAVA_OPTS are assembled, the change must be reflected in CommandUtils.scala!
110119

111120
TOOLS_DIR="$FWDIR"/tools
@@ -146,10 +155,28 @@ if $cygwin; then
146155
fi
147156
export CLASSPATH
148157

149-
if [ "$SPARK_PRINT_LAUNCH_COMMAND" == "1" ]; then
150-
echo -n "Spark Command: " 1>&2
151-
echo "$RUNNER" -cp "$CLASSPATH" $JAVA_OPTS "$@" 1>&2
152-
echo -e "========================================\n" 1>&2
158+
# In Spark submit client mode, the driver is launched in the same JVM as Spark submit itself.
159+
# Here we must parse the properties file for relevant "spark.driver.*" configs before launching
160+
# the driver JVM itself. Instead of handling this complexity in Bash, we launch a separate JVM
161+
# to prepare the launch environment of this driver JVM.
162+
163+
if [ -n "$SPARK_SUBMIT_BOOTSTRAP_DRIVER" ]; then
164+
# This is used only if the properties file actually contains these special configs
165+
# Export the environment variables needed by SparkSubmitDriverBootstrapper
166+
export RUNNER
167+
export CLASSPATH
168+
export JAVA_OPTS
169+
export OUR_JAVA_MEM
170+
export SPARK_CLASS=1
171+
shift # Ignore main class (org.apache.spark.deploy.SparkSubmit) and use our own
172+
exec "$RUNNER" org.apache.spark.deploy.SparkSubmitDriverBootstrapper "$@"
173+
else
174+
# Note: The format of this command is closely echoed in SparkSubmitDriverBootstrapper.scala
175+
if [ -n "$SPARK_PRINT_LAUNCH_COMMAND" ]; then
176+
echo -n "Spark Command: " 1>&2
177+
echo "$RUNNER" -cp "$CLASSPATH" $JAVA_OPTS "$@" 1>&2
178+
echo -e "========================================\n" 1>&2
179+
fi
180+
exec "$RUNNER" -cp "$CLASSPATH" $JAVA_OPTS "$@"
153181
fi
154182

155-
exec "$RUNNER" -cp "$CLASSPATH" $JAVA_OPTS "$@"

bin/spark-shell

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -31,13 +31,21 @@ set -o posix
3131
## Global script variables
3232
FWDIR="$(cd `dirname $0`/..; pwd)"
3333

34+
function usage() {
35+
echo "Usage: ./bin/spark-shell [options]"
36+
$FWDIR/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
37+
exit 0
38+
}
39+
3440
if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
35-
echo "Usage: ./bin/spark-shell [options]"
36-
$FWDIR/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
37-
exit 0
41+
usage
3842
fi
3943

40-
function main(){
44+
source $FWDIR/bin/utils.sh
45+
SUBMIT_USAGE_FUNCTION=usage
46+
gatherSparkSubmitOpts "$@"
47+
48+
function main() {
4149
if $cygwin; then
4250
# Workaround for issue involving JLine and Cygwin
4351
# (see http://sourceforge.net/p/jline/bugs/40/).
@@ -46,11 +54,11 @@ function main(){
4654
# (see https://github.com/sbt/sbt/issues/562).
4755
stty -icanon min 1 -echo > /dev/null 2>&1
4856
export SPARK_SUBMIT_OPTS="$SPARK_SUBMIT_OPTS -Djline.terminal=unix"
49-
$FWDIR/bin/spark-submit --class org.apache.spark.repl.Main spark-shell "$@"
57+
$FWDIR/bin/spark-submit --class org.apache.spark.repl.Main "${SUBMISSION_OPTS[@]}" spark-shell "${APPLICATION_OPTS[@]}"
5058
stty icanon echo > /dev/null 2>&1
5159
else
5260
export SPARK_SUBMIT_OPTS
53-
$FWDIR/bin/spark-submit --class org.apache.spark.repl.Main spark-shell "$@"
61+
$FWDIR/bin/spark-submit --class org.apache.spark.repl.Main "${SUBMISSION_OPTS[@]}" spark-shell "${APPLICATION_OPTS[@]}"
5462
fi
5563
}
5664

bin/spark-shell.cmd

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,4 +19,4 @@ rem
1919

2020
set SPARK_HOME=%~dp0..
2121

22-
cmd /V /E /C %SPARK_HOME%\bin\spark-submit.cmd spark-shell --class org.apache.spark.repl.Main %*
22+
cmd /V /E /C %SPARK_HOME%\bin\spark-submit.cmd --class org.apache.spark.repl.Main %* spark-shell

bin/spark-sql

Lines changed: 62 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,14 +23,72 @@
2323
# Enter posix mode for bash
2424
set -o posix
2525

26+
CLASS="org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver"
27+
2628
# Figure out where Spark is installed
2729
FWDIR="$(cd `dirname $0`/..; pwd)"
2830

29-
if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
30-
echo "Usage: ./sbin/spark-sql [options]"
31+
function usage {
32+
echo "Usage: ./bin/spark-sql [options] [cli option]"
33+
pattern="usage"
34+
pattern+="\|Spark assembly has been built with Hive"
35+
pattern+="\|NOTE: SPARK_PREPEND_CLASSES is set"
36+
pattern+="\|Spark Command: "
37+
pattern+="\|--help"
38+
pattern+="\|======="
39+
3140
$FWDIR/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
41+
echo
42+
echo "CLI options:"
43+
$FWDIR/bin/spark-class $CLASS --help 2>&1 | grep -v "$pattern" 1>&2
44+
}
45+
46+
function ensure_arg_number {
47+
arg_number=$1
48+
at_least=$2
49+
50+
if [[ $arg_number -lt $at_least ]]; then
51+
usage
52+
exit 1
53+
fi
54+
}
55+
56+
if [[ "$@" = --help ]] || [[ "$@" = -h ]]; then
57+
usage
3258
exit 0
3359
fi
3460

35-
CLASS="org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver"
36-
exec "$FWDIR"/bin/spark-submit --class $CLASS spark-internal $@
61+
CLI_ARGS=()
62+
SUBMISSION_ARGS=()
63+
64+
while (($#)); do
65+
case $1 in
66+
-d | --define | --database | -f | -h | --hiveconf | --hivevar | -i | -p)
67+
ensure_arg_number $# 2
68+
CLI_ARGS+=("$1"); shift
69+
CLI_ARGS+=("$1"); shift
70+
;;
71+
72+
-e)
73+
ensure_arg_number $# 2
74+
CLI_ARGS+=("$1"); shift
75+
CLI_ARGS+=("$1"); shift
76+
;;
77+
78+
-s | --silent)
79+
CLI_ARGS+=("$1"); shift
80+
;;
81+
82+
-v | --verbose)
83+
# Both SparkSubmit and SparkSQLCLIDriver recognizes -v | --verbose
84+
CLI_ARGS+=("$1")
85+
SUBMISSION_ARGS+=("$1"); shift
86+
;;
87+
88+
*)
89+
SUBMISSION_ARGS+=("$1"); shift
90+
;;
91+
esac
92+
done
93+
94+
exec "$FWDIR"/bin/spark-submit --class $CLASS "${SUBMISSION_ARGS[@]}" spark-internal "${CLI_ARGS[@]}"

0 commit comments

Comments
 (0)