Skip to content

Commit 01e0813

Browse files
committed
Merge remote-tracking branch 'upstream/master' into pyspark-inputformats
2 parents 15a7d07 + 862283e commit 01e0813

File tree

37 files changed

+1492
-854
lines changed

37 files changed

+1492
-854
lines changed

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
sbt/*.jar
88
.settings
99
.cache
10-
.mima-excludes
10+
.generated-mima-excludes
1111
/build/
1212
work/
1313
out/

.rat-excludes

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ target
33
.project
44
.classpath
55
.mima-excludes
6+
.generated-mima-excludes
67
.rat-excludes
78
.*md
89
derby.log

core/pom.xml

Lines changed: 0 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -258,35 +258,6 @@
258258
<outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
259259
<testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
260260
<plugins>
261-
<plugin>
262-
<groupId>org.apache.maven.plugins</groupId>
263-
<artifactId>maven-antrun-plugin</artifactId>
264-
<executions>
265-
<execution>
266-
<phase>test</phase>
267-
<goals>
268-
<goal>run</goal>
269-
</goals>
270-
<configuration>
271-
<exportAntProperties>true</exportAntProperties>
272-
<target>
273-
<property name="spark.classpath" refid="maven.test.classpath" />
274-
<property environment="env" />
275-
<fail message="Please set the SCALA_HOME (or SCALA_LIBRARY_PATH if scala is on the path) environment variables and retry.">
276-
<condition>
277-
<not>
278-
<or>
279-
<isset property="env.SCALA_HOME" />
280-
<isset property="env.SCALA_LIBRARY_PATH" />
281-
</or>
282-
</not>
283-
</condition>
284-
</fail>
285-
</target>
286-
</configuration>
287-
</execution>
288-
</executions>
289-
</plugin>
290261
<plugin>
291262
<groupId>org.scalatest</groupId>
292263
<artifactId>scalatest-maven-plugin</artifactId>

core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -272,6 +272,26 @@ private object SpecialLengths {
272272
private[spark] object PythonRDD extends Logging {
273273
val UTF8 = Charset.forName("UTF-8")
274274

275+
/**
276+
* Adapter for calling SparkContext#runJob from Python.
277+
*
278+
* This method will return an iterator of an array that contains all elements in the RDD
279+
* (effectively a collect()), but allows you to run on a certain subset of partitions,
280+
* or to enable local execution.
281+
*/
282+
def runJob(
283+
sc: SparkContext,
284+
rdd: JavaRDD[Array[Byte]],
285+
partitions: JArrayList[Int],
286+
allowLocal: Boolean): Iterator[Array[Byte]] = {
287+
type ByteArray = Array[Byte]
288+
type UnrolledPartition = Array[ByteArray]
289+
val allPartitions: Array[UnrolledPartition] =
290+
sc.runJob(rdd, (x: Iterator[ByteArray]) => x.toArray, partitions, allowLocal)
291+
val flattenedPartition: UnrolledPartition = Array.concat(allPartitions: _*)
292+
flattenedPartition.iterator
293+
}
294+
275295
def readRDDFromFile(sc: JavaSparkContext, filename: String, parallelism: Int):
276296
JavaRDD[Array[Byte]] = {
277297
val file = new DataInputStream(new FileInputStream(filename))

core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -381,16 +381,19 @@ private[spark] class SparkSubmitArguments(args: Seq[String]) {
381381
object SparkSubmitArguments {
382382
/** Load properties present in the given file. */
383383
def getPropertiesFromFile(file: File): Seq[(String, String)] = {
384-
require(file.exists(), s"Properties file ${file.getName} does not exist")
384+
require(file.exists(), s"Properties file $file does not exist")
385+
require(file.isFile(), s"Properties file $file is not a normal file")
385386
val inputStream = new FileInputStream(file)
386-
val properties = new Properties()
387387
try {
388+
val properties = new Properties()
388389
properties.load(inputStream)
390+
properties.stringPropertyNames().toSeq.map(k => (k, properties(k).trim))
389391
} catch {
390392
case e: IOException =>
391-
val message = s"Failed when loading Spark properties file ${file.getName}"
393+
val message = s"Failed when loading Spark properties file $file"
392394
throw new SparkException(message, e)
395+
} finally {
396+
inputStream.close()
393397
}
394-
properties.stringPropertyNames().toSeq.map(k => (k, properties(k).trim))
395398
}
396399
}

dev/mima

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
#!/usr/bin/env bash
2+
3+
#
4+
# Licensed to the Apache Software Foundation (ASF) under one or more
5+
# contributor license agreements. See the NOTICE file distributed with
6+
# this work for additional information regarding copyright ownership.
7+
# The ASF licenses this file to You under the Apache License, Version 2.0
8+
# (the "License"); you may not use this file except in compliance with
9+
# the License. You may obtain a copy of the License at
10+
#
11+
# http://www.apache.org/licenses/LICENSE-2.0
12+
#
13+
# Unless required by applicable law or agreed to in writing, software
14+
# distributed under the License is distributed on an "AS IS" BASIS,
15+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16+
# See the License for the specific language governing permissions and
17+
# limitations under the License.
18+
#
19+
20+
set -o pipefail
21+
22+
# Go to the Spark project root directory
23+
FWDIR="$(cd `dirname $0`/..; pwd)"
24+
cd $FWDIR
25+
26+
./bin/spark-class org.apache.spark.tools.GenerateMIMAIgnore
27+
echo -e "q\n" | sbt/sbt mima-report-binary-issues | grep -v -e "info.*Resolving"
28+
ret_val=$?
29+
30+
if [ $ret_val != 0 ]; then
31+
echo "NOTE: Exceptions to binary compatibility can be added in project/MimaExcludes.scala"
32+
fi
33+
34+
exit $ret_val

dev/run-tests

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,5 +81,4 @@ fi
8181
echo "========================================================================="
8282
echo "Detecting binary incompatibilites with MiMa"
8383
echo "========================================================================="
84-
./bin/spark-class org.apache.spark.tools.GenerateMIMAIgnore
85-
echo -e "q\n" | sbt/sbt mima-report-binary-issues | grep -v -e "info.*Resolving"
84+
dev/mima

docs/sql-programming-guide.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,9 @@ A schema can be applied to an existing RDD by calling `applySchema` and providin
170170
for the JavaBean.
171171

172172
{% highlight java %}
173-
JavaSQLContext ctx = new org.apache.spark.sql.api.java.JavaSQLContext(sc)
173+
174+
JavaSparkContext ctx = ...; // An existing JavaSparkContext.
175+
JavaSQLContext sqlCtx = new org.apache.spark.sql.api.java.JavaSQLContext(ctx)
174176

175177
// Load a text file and convert each line to a JavaBean.
176178
JavaRDD<Person> people = ctx.textFile("examples/src/main/resources/people.txt").map(

0 commit comments

Comments
 (0)