Skip to content

Commit 78b66d8

Browse files
committed
execute isort/pep8 on example files
+ manual editing (replace '\' by parenthesis for multiline syntax)
1 parent 3ccabdf commit 78b66d8

File tree

117 files changed

+646
-503
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

117 files changed

+646
-503
lines changed

docs/streaming-programming-guide.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1534,10 +1534,10 @@ See the full [source code]({{site.SPARK_GITHUB_URL}}/blob/v{{site.SPARK_VERSION_
15341534
# Lazily instantiated global instance of SparkSession
15351535
def getSparkSessionInstance(sparkConf):
15361536
if ("sparkSessionSingletonInstance" not in globals()):
1537-
globals()["sparkSessionSingletonInstance"] = SparkSession \
1538-
.builder \
1539-
.config(conf=sparkConf) \
1540-
.getOrCreate()
1537+
globals()["sparkSessionSingletonInstance"] = (SparkSession
1538+
.builder
1539+
.config(conf=sparkConf)
1540+
.getOrCreate())
15411541
return globals()["sparkSessionSingletonInstance"]
15421542

15431543
...

examples/src/main/python/als.py

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,8 @@
2626
import sys
2727

2828
import numpy as np
29-
from numpy.random import rand
3029
from numpy import matrix
30+
from numpy.random import rand
3131
from pyspark.sql import SparkSession
3232

3333
LAMBDA = 0.01 # regularization
@@ -62,10 +62,10 @@ def update(i, mat, ratings):
6262
example. Please use pyspark.ml.recommendation.ALS for more
6363
conventional use.""", file=sys.stderr)
6464

65-
spark = SparkSession\
66-
.builder\
67-
.appName("PythonALS")\
68-
.getOrCreate()
65+
spark = (SparkSession
66+
.builder
67+
.appName("PythonALS")
68+
.getOrCreate())
6969

7070
sc = spark.sparkContext
7171

@@ -87,17 +87,19 @@ def update(i, mat, ratings):
8787
usb = sc.broadcast(us)
8888

8989
for i in range(ITERATIONS):
90-
ms = sc.parallelize(range(M), partitions) \
91-
.map(lambda x: update(x, usb.value, Rb.value)) \
92-
.collect()
90+
ms = (sc
91+
.parallelize(range(M), partitions)
92+
.map(lambda x: update(x, usb.value, Rb.value))
93+
.collect())
9394
# collect() returns a list, so array ends up being
9495
# a 3-d array, we take the first 2 dims for the matrix
9596
ms = matrix(np.array(ms)[:, :, 0])
9697
msb = sc.broadcast(ms)
9798

98-
us = sc.parallelize(range(U), partitions) \
99-
.map(lambda x: update(x, msb.value, Rb.value.T)) \
100-
.collect()
99+
us = (sc
100+
.parallelize(range(U), partitions)
101+
.map(lambda x: update(x, msb.value, Rb.value.T))
102+
.collect())
101103
us = matrix(np.array(us)[:, :, 0])
102104
usb = sc.broadcast(us)
103105

examples/src/main/python/avro_inputformat.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,11 @@
1818
from __future__ import print_function
1919

2020
import sys
21-
2221
from functools import reduce
22+
2323
from pyspark.sql import SparkSession
2424

25+
2526
"""
2627
Read data file users.avro in local Spark distro:
2728
@@ -65,10 +66,10 @@
6566

6667
path = sys.argv[1]
6768

68-
spark = SparkSession\
69-
.builder\
70-
.appName("AvroKeyInputFormat")\
71-
.getOrCreate()
69+
spark = (SparkSession
70+
.builder
71+
.appName("AvroKeyInputFormat")
72+
.getOrCreate())
7273

7374
sc = spark.sparkContext
7475

examples/src/main/python/kmeans.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -55,10 +55,10 @@ def closestPoint(p, centers):
5555
as an example! Please refer to examples/src/main/python/ml/kmeans_example.py for an
5656
example on how to use ML's KMeans implementation.""", file=sys.stderr)
5757

58-
spark = SparkSession\
59-
.builder\
60-
.appName("PythonKMeans")\
61-
.getOrCreate()
58+
spark = (SparkSession
59+
.builder
60+
.appName("PythonKMeans")
61+
.getOrCreate())
6262

6363
lines = spark.read.text(sys.argv[1]).rdd.map(lambda r: r[0])
6464
data = lines.map(parseVector).cache()

examples/src/main/python/logistic_regression.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@
2929
import numpy as np
3030
from pyspark.sql import SparkSession
3131

32-
3332
D = 10 # Number of dimensions
3433

3534

@@ -55,13 +54,15 @@ def readPointBatch(iterator):
5554
Please refer to examples/src/main/python/ml/logistic_regression_with_elastic_net.py
5655
to see how ML's implementation is used.""", file=sys.stderr)
5756

58-
spark = SparkSession\
59-
.builder\
60-
.appName("PythonLR")\
61-
.getOrCreate()
62-
63-
points = spark.read.text(sys.argv[1]).rdd.map(lambda r: r[0])\
64-
.mapPartitions(readPointBatch).cache()
57+
spark = (SparkSession
58+
.builder
59+
.appName("PythonLR")
60+
.getOrCreate())
61+
points = (spark.read
62+
.text(sys.argv[1])
63+
.rdd.map(lambda r: r[0])
64+
.mapPartitions(readPointBatch)
65+
.cache())
6566
iterations = int(sys.argv[2])
6667

6768
# Initialize w to a random value

examples/src/main/python/ml/aft_survival_regression.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@
1818
from __future__ import print_function
1919

2020
# $example on$
21-
from pyspark.ml.regression import AFTSurvivalRegression
2221
from pyspark.ml.linalg import Vectors
22+
from pyspark.ml.regression import AFTSurvivalRegression
2323
# $example off$
2424
from pyspark.sql import SparkSession
2525

@@ -30,10 +30,10 @@
3030
"""
3131

3232
if __name__ == "__main__":
33-
spark = SparkSession \
34-
.builder \
35-
.appName("AFTSurvivalRegressionExample") \
36-
.getOrCreate()
33+
spark = (SparkSession
34+
.builder
35+
.appName("AFTSurvivalRegressionExample")
36+
.getOrCreate())
3737

3838
# $example on$
3939
training = spark.createDataFrame([

examples/src/main/python/ml/als_example.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,22 +18,23 @@
1818
from __future__ import print_function
1919

2020
import sys
21-
if sys.version >= '3':
22-
long = int
23-
24-
from pyspark.sql import SparkSession
2521

2622
# $example on$
2723
from pyspark.ml.evaluation import RegressionEvaluator
2824
from pyspark.ml.recommendation import ALS
2925
from pyspark.sql import Row
3026
# $example off$
27+
from pyspark.sql import SparkSession
28+
29+
if sys.version >= '3':
30+
long = int
31+
3132

3233
if __name__ == "__main__":
33-
spark = SparkSession\
34-
.builder\
35-
.appName("ALSExample")\
36-
.getOrCreate()
34+
spark = (SparkSession
35+
.builder
36+
.appName("ALSExample")
37+
.getOrCreate())
3738

3839
# $example on$
3940
lines = spark.read.text("data/mllib/als/sample_movielens_ratings.txt").rdd

examples/src/main/python/ml/binarizer_example.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,16 +17,17 @@
1717

1818
from __future__ import print_function
1919

20-
from pyspark.sql import SparkSession
2120
# $example on$
2221
from pyspark.ml.feature import Binarizer
2322
# $example off$
23+
from pyspark.sql import SparkSession
24+
2425

2526
if __name__ == "__main__":
26-
spark = SparkSession\
27-
.builder\
28-
.appName("BinarizerExample")\
29-
.getOrCreate()
27+
spark = (SparkSession
28+
.builder
29+
.appName("BinarizerExample")
30+
.getOrCreate())
3031

3132
# $example on$
3233
continuousDataFrame = spark.createDataFrame([

examples/src/main/python/ml/bisecting_k_means_example.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,10 @@
2929
"""
3030

3131
if __name__ == "__main__":
32-
spark = SparkSession\
33-
.builder\
34-
.appName("BisectingKMeansExample")\
35-
.getOrCreate()
32+
spark = (SparkSession
33+
.builder
34+
.appName("BisectingKMeansExample")
35+
.getOrCreate())
3636

3737
# $example on$
3838
# Loads data.

examples/src/main/python/ml/bucketizer_example.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,16 +17,17 @@
1717

1818
from __future__ import print_function
1919

20-
from pyspark.sql import SparkSession
2120
# $example on$
2221
from pyspark.ml.feature import Bucketizer
2322
# $example off$
23+
from pyspark.sql import SparkSession
24+
2425

2526
if __name__ == "__main__":
26-
spark = SparkSession\
27-
.builder\
28-
.appName("BucketizerExample")\
29-
.getOrCreate()
27+
spark = (SparkSession
28+
.builder
29+
.appName("BucketizerExample")
30+
.getOrCreate())
3031

3132
# $example on$
3233
splits = [-float("inf"), -0.5, 0.0, 0.5, float("inf")]
@@ -39,7 +40,7 @@
3940
# Transform original data into its bucket index.
4041
bucketedData = bucketizer.transform(dataFrame)
4142

42-
print("Bucketizer output with %d buckets" % (len(bucketizer.getSplits())-1))
43+
print("Bucketizer output with %d buckets" % (len(bucketizer.getSplits()) - 1))
4344
bucketedData.show()
4445
# $example off$
4546

0 commit comments

Comments
 (0)