Skip to content

Commit 95896b5

Browse files
committed
Add regression test for SPARK-5969.
1 parent 5757490 commit 95896b5

File tree

1 file changed

+11
-0
lines changed

1 file changed

+11
-0
lines changed

python/pyspark/tests.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -782,6 +782,17 @@ def test_narrow_dependency_in_join(self):
782782
jobId = tracker.getJobIdsForGroup("test4")[0]
783783
self.assertEqual(3, len(tracker.getJobInfo(jobId).stageIds))
784784

785+
def test_sortByKey_uses_all_partitions_not_only_first_and_last(self):
786+
# Regression test for SPARK-5969
787+
seq = [(i * 59 % 101, i) for i in range(101)] # unsorted sequence
788+
rdd = self.sc.parallelize(seq)
789+
for ascending in [True, False]:
790+
sort = rdd.sortByKey(ascending=ascending, numPartitions=5)
791+
self.assertEqual(sort.collect(), sorted(seq, reverse=not ascending))
792+
sizes = sort.glom().map(len).collect()
793+
for size in sizes:
794+
self.assertGreater(size, 0)
795+
785796

786797
class ProfilerTests(PySparkTestCase):
787798

0 commit comments

Comments
 (0)