apache
diff --git a/‎docs/streaming-programming-guide.md‎
Lines changed: 4 additions & 4 deletions b/‎docs/streaming-programming-guide.md‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎examples/src/main/python/als.py‎
Lines changed: 13 additions & 11 deletions b/‎examples/src/main/python/als.py‎
Lines changed: 13 additions & 11 deletions
diff --git a/‎examples/src/main/python/avro_inputformat.py‎
Lines changed: 6 additions & 5 deletions b/‎examples/src/main/python/avro_inputformat.py‎
Lines changed: 6 additions & 5 deletions
diff --git a/‎examples/src/main/python/kmeans.py‎
Lines changed: 4 additions & 4 deletions b/‎examples/src/main/python/kmeans.py‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎examples/src/main/python/logistic_regression.py‎
Lines changed: 9 additions & 8 deletions b/‎examples/src/main/python/logistic_regression.py‎
Lines changed: 9 additions & 8 deletions
diff --git a/‎examples/src/main/python/ml/aft_survival_regression.py‎
Lines changed: 5 additions & 5 deletions b/‎examples/src/main/python/ml/aft_survival_regression.py‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎examples/src/main/python/ml/als_example.py‎
Lines changed: 9 additions & 8 deletions b/‎examples/src/main/python/ml/als_example.py‎
Lines changed: 9 additions & 8 deletions
diff --git a/‎examples/src/main/python/ml/binarizer_example.py‎
Lines changed: 6 additions & 5 deletions b/‎examples/src/main/python/ml/binarizer_example.py‎
Lines changed: 6 additions & 5 deletions
diff --git a/‎examples/src/main/python/ml/bisecting_k_means_example.py‎
Lines changed: 4 additions & 4 deletions b/‎examples/src/main/python/ml/bisecting_k_means_example.py‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎examples/src/main/python/ml/bucketizer_example.py‎
Lines changed: 7 additions & 6 deletions b/‎examples/src/main/python/ml/bucketizer_example.py‎
Lines changed: 7 additions & 6 deletions
@@ -1534,10 +1534,10 @@ See the full [source code]({{site.SPARK_GITHUB_URL}}/blob/v{{site.SPARK_VERSION_
 # Lazily instantiated global instance of SparkSession
 def getSparkSessionInstance(sparkConf):
     if ("sparkSessionSingletonInstance" not in globals()):
-        globals()["sparkSessionSingletonInstance"] = SparkSession \
-            .builder \
-            .config(conf=sparkConf) \
-            .getOrCreate()
+        globals()["sparkSessionSingletonInstance"] = (SparkSession
+            .builder
+            .config(conf=sparkConf)
+            .getOrCreate())
     return globals()["sparkSessionSingletonInstance"]
 
 ...
 
@@ -26,8 +26,8 @@
 import sys
 
 import numpy as np
-from numpy.random import rand
 from numpy import matrix
+from numpy.random import rand
 from pyspark.sql import SparkSession
 
 LAMBDA = 0.01   # regularization
@@ -62,10 +62,10 @@ def update(i, mat, ratings):
       example. Please use pyspark.ml.recommendation.ALS for more
       conventional use.""", file=sys.stderr)
 
-    spark = SparkSession\
-        .builder\
-        .appName("PythonALS")\
-        .getOrCreate()
+    spark = (SparkSession
+             .builder
+             .appName("PythonALS")
+             .getOrCreate())
 
     sc = spark.sparkContext
 
@@ -87,17 +87,19 @@ def update(i, mat, ratings):
     usb = sc.broadcast(us)
 
     for i in range(ITERATIONS):
-        ms = sc.parallelize(range(M), partitions) \
-               .map(lambda x: update(x, usb.value, Rb.value)) \
-               .collect()
+        ms = (sc
+              .parallelize(range(M), partitions)
+              .map(lambda x: update(x, usb.value, Rb.value))
+              .collect())
         # collect() returns a list, so array ends up being
         # a 3-d array, we take the first 2 dims for the matrix
         ms = matrix(np.array(ms)[:, :, 0])
         msb = sc.broadcast(ms)
 
-        us = sc.parallelize(range(U), partitions) \
-               .map(lambda x: update(x, msb.value, Rb.value.T)) \
-               .collect()
+        us = (sc
+              .parallelize(range(U), partitions)
+              .map(lambda x: update(x, msb.value, Rb.value.T))
+              .collect())
         us = matrix(np.array(us)[:, :, 0])
         usb = sc.broadcast(us)
 
 
@@ -18,10 +18,11 @@
 from __future__ import print_function
 
 import sys
-
 from functools import reduce
+
 from pyspark.sql import SparkSession
 
+
 """
 Read data file users.avro in local Spark distro:
 
@@ -65,10 +66,10 @@
 
     path = sys.argv[1]
 
-    spark = SparkSession\
-        .builder\
-        .appName("AvroKeyInputFormat")\
-        .getOrCreate()
+    spark = (SparkSession
+             .builder
+             .appName("AvroKeyInputFormat")
+             .getOrCreate())
 
     sc = spark.sparkContext
 
 
@@ -55,10 +55,10 @@ def closestPoint(p, centers):
        as an example! Please refer to examples/src/main/python/ml/kmeans_example.py for an
        example on how to use ML's KMeans implementation.""", file=sys.stderr)
 
-    spark = SparkSession\
-        .builder\
-        .appName("PythonKMeans")\
-        .getOrCreate()
+    spark = (SparkSession
+             .builder
+             .appName("PythonKMeans")
+             .getOrCreate())
 
     lines = spark.read.text(sys.argv[1]).rdd.map(lambda r: r[0])
     data = lines.map(parseVector).cache()
 
@@ -29,7 +29,6 @@
 import numpy as np
 from pyspark.sql import SparkSession
 
-
 D = 10  # Number of dimensions
 
 
@@ -55,13 +54,15 @@ def readPointBatch(iterator):
       Please refer to examples/src/main/python/ml/logistic_regression_with_elastic_net.py
       to see how ML's implementation is used.""", file=sys.stderr)
 
-    spark = SparkSession\
-        .builder\
-        .appName("PythonLR")\
-        .getOrCreate()
-
-    points = spark.read.text(sys.argv[1]).rdd.map(lambda r: r[0])\
-        .mapPartitions(readPointBatch).cache()
+    spark = (SparkSession
+             .builder
+             .appName("PythonLR")
+             .getOrCreate())
+    points = (spark.read
+              .text(sys.argv[1])
+              .rdd.map(lambda r: r[0])
+              .mapPartitions(readPointBatch)
+              .cache())
     iterations = int(sys.argv[2])
 
     # Initialize w to a random value
 
@@ -18,8 +18,8 @@
 from __future__ import print_function
 
 # $example on$
-from pyspark.ml.regression import AFTSurvivalRegression
 from pyspark.ml.linalg import Vectors
+from pyspark.ml.regression import AFTSurvivalRegression
 # $example off$
 from pyspark.sql import SparkSession
 
@@ -30,10 +30,10 @@
 """
 
 if __name__ == "__main__":
-    spark = SparkSession \
-        .builder \
-        .appName("AFTSurvivalRegressionExample") \
-        .getOrCreate()
+    spark = (SparkSession
+             .builder
+             .appName("AFTSurvivalRegressionExample")
+             .getOrCreate())
 
     # $example on$
     training = spark.createDataFrame([
 
@@ -18,22 +18,23 @@
 from __future__ import print_function
 
 import sys
-if sys.version >= '3':
-    long = int
-
-from pyspark.sql import SparkSession
 
 # $example on$
 from pyspark.ml.evaluation import RegressionEvaluator
 from pyspark.ml.recommendation import ALS
 from pyspark.sql import Row
 # $example off$
+from pyspark.sql import SparkSession
+
+if sys.version >= '3':
+    long = int
+
 
 if __name__ == "__main__":
-    spark = SparkSession\
-        .builder\
-        .appName("ALSExample")\
-        .getOrCreate()
+    spark = (SparkSession
+             .builder
+             .appName("ALSExample")
+             .getOrCreate())
 
     # $example on$
     lines = spark.read.text("data/mllib/als/sample_movielens_ratings.txt").rdd
 
@@ -17,16 +17,17 @@
 
 from __future__ import print_function
 
-from pyspark.sql import SparkSession
 # $example on$
 from pyspark.ml.feature import Binarizer
 # $example off$
+from pyspark.sql import SparkSession
+
 
 if __name__ == "__main__":
-    spark = SparkSession\
-        .builder\
-        .appName("BinarizerExample")\
-        .getOrCreate()
+    spark = (SparkSession
+             .builder
+             .appName("BinarizerExample")
+             .getOrCreate())
 
     # $example on$
     continuousDataFrame = spark.createDataFrame([
 
@@ -29,10 +29,10 @@
 """
 
 if __name__ == "__main__":
-    spark = SparkSession\
-        .builder\
-        .appName("BisectingKMeansExample")\
-        .getOrCreate()
+    spark = (SparkSession
+             .builder
+             .appName("BisectingKMeansExample")
+             .getOrCreate())
 
     # $example on$
     # Loads data.
 
@@ -17,16 +17,17 @@
 
 from __future__ import print_function
 
-from pyspark.sql import SparkSession
 # $example on$
 from pyspark.ml.feature import Bucketizer
 # $example off$
+from pyspark.sql import SparkSession
+
 
 if __name__ == "__main__":
-    spark = SparkSession\
-        .builder\
-        .appName("BucketizerExample")\
-        .getOrCreate()
+    spark = (SparkSession
+             .builder
+             .appName("BucketizerExample")
+             .getOrCreate())
 
     # $example on$
     splits = [-float("inf"), -0.5, 0.0, 0.5, float("inf")]
@@ -39,7 +40,7 @@
     # Transform original data into its bucket index.
     bucketedData = bucketizer.transform(dataFrame)
 
-    print("Bucketizer output with %d buckets" % (len(bucketizer.getSplits())-1))
+    print("Bucketizer output with %d buckets" % (len(bucketizer.getSplits()) - 1))
     bucketedData.show()
     # $example off$