Skip to content

Commit 1f6e2f5

Browse files
committed
Revert "[SPARK-35721][PYTHON] Path level discover for python unittests"
This reverts commit 5db51ef.
1 parent a6088e5 commit 1f6e2f5

File tree

10 files changed

+140
-117
lines changed

10 files changed

+140
-117
lines changed

dev/sparktestsupport/modules.py

Lines changed: 140 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -15,72 +15,14 @@
1515
# limitations under the License.
1616
#
1717

18-
from collections.abc import Iterable
1918
from functools import total_ordering
2019
import itertools
2120
import os
2221
import re
23-
import unittest
24-
import sys
25-
26-
from sparktestsupport import SPARK_HOME
2722

2823
all_modules = []
2924

3025

31-
def _get_module_from_name(name):
32-
__import__(name)
33-
return sys.modules[name]
34-
35-
36-
def _discover_python_unittests(*paths, discover_slow=False):
37-
"""Discover the python module which contains unittests under paths.
38-
39-
Such as:
40-
['pyspark/tests'], it will return the set of module name under the path of pyspark/tests, like
41-
{'pyspark.tests.test_appsubmit', 'pyspark.tests.test_broadcast', ...}
42-
43-
Parameters
44-
----------
45-
paths : str
46-
Paths of modules to be discovered.
47-
discover_slow : bool
48-
If True, will only discover slow tests
49-
If False, will discover all tests except slow tests
50-
51-
Returns
52-
-------
53-
A set of complete test module name discovered under specified paths
54-
"""
55-
56-
def add_test_module(testcases, modules, slow):
57-
"""Append the testcases module names to modules set"""
58-
if isinstance(testcases, Iterable):
59-
for test_case in testcases:
60-
add_test_module(test_case, modules, slow)
61-
else:
62-
name = testcases.__module__
63-
module = _get_module_from_name(name)
64-
if slow and hasattr(module, 'is_slow_test'):
65-
modules.add(name)
66-
if not slow and not hasattr(module, 'is_slow_test'):
67-
modules.add(name)
68-
69-
if not paths:
70-
return []
71-
modules = set()
72-
pyspark_path = os.path.join(SPARK_HOME, "python")
73-
for path in paths:
74-
# Discover the unittest in every path
75-
testcases = unittest.defaultTestLoader.discover(
76-
os.path.join(pyspark_path, path),
77-
top_level_dir=pyspark_path
78-
)
79-
add_test_module(testcases, modules, discover_slow)
80-
81-
return sorted(list(modules))
82-
83-
8426
@total_ordering
8527
class Module(object):
8628
"""
@@ -446,7 +388,24 @@ def __hash__(self):
446388
"pyspark.profiler",
447389
"pyspark.shuffle",
448390
"pyspark.util",
449-
] + _discover_python_unittests("pyspark/tests"),
391+
# unittests
392+
"pyspark.tests.test_appsubmit",
393+
"pyspark.tests.test_broadcast",
394+
"pyspark.tests.test_conf",
395+
"pyspark.tests.test_context",
396+
"pyspark.tests.test_daemon",
397+
"pyspark.tests.test_install_spark",
398+
"pyspark.tests.test_join",
399+
"pyspark.tests.test_profiler",
400+
"pyspark.tests.test_rdd",
401+
"pyspark.tests.test_rddbarrier",
402+
"pyspark.tests.test_readwrite",
403+
"pyspark.tests.test_serializers",
404+
"pyspark.tests.test_shuffle",
405+
"pyspark.tests.test_taskcontext",
406+
"pyspark.tests.test_util",
407+
"pyspark.tests.test_worker",
408+
]
450409
)
451410

452411
pyspark_sql = Module(
@@ -478,7 +437,32 @@ def __hash__(self):
478437
"pyspark.sql.pandas.serializers",
479438
"pyspark.sql.pandas.typehints",
480439
"pyspark.sql.pandas.utils",
481-
] + _discover_python_unittests("pyspark/sql/tests"),
440+
# unittests
441+
"pyspark.sql.tests.test_arrow",
442+
"pyspark.sql.tests.test_catalog",
443+
"pyspark.sql.tests.test_column",
444+
"pyspark.sql.tests.test_conf",
445+
"pyspark.sql.tests.test_context",
446+
"pyspark.sql.tests.test_dataframe",
447+
"pyspark.sql.tests.test_datasources",
448+
"pyspark.sql.tests.test_functions",
449+
"pyspark.sql.tests.test_group",
450+
"pyspark.sql.tests.test_pandas_cogrouped_map",
451+
"pyspark.sql.tests.test_pandas_grouped_map",
452+
"pyspark.sql.tests.test_pandas_map",
453+
"pyspark.sql.tests.test_pandas_udf",
454+
"pyspark.sql.tests.test_pandas_udf_grouped_agg",
455+
"pyspark.sql.tests.test_pandas_udf_scalar",
456+
"pyspark.sql.tests.test_pandas_udf_typehints",
457+
"pyspark.sql.tests.test_pandas_udf_window",
458+
"pyspark.sql.tests.test_readwriter",
459+
"pyspark.sql.tests.test_serde",
460+
"pyspark.sql.tests.test_session",
461+
"pyspark.sql.tests.test_streaming",
462+
"pyspark.sql.tests.test_types",
463+
"pyspark.sql.tests.test_udf",
464+
"pyspark.sql.tests.test_utils",
465+
]
482466
)
483467

484468

@@ -490,7 +474,10 @@ def __hash__(self):
490474
source_file_regexes=[
491475
"python/pyspark/resource"
492476
],
493-
python_test_goals=_discover_python_unittests("pyspark/resource/tests"),
477+
python_test_goals=[
478+
# unittests
479+
"pyspark.resource.tests.test_resources",
480+
]
494481
)
495482

496483

@@ -507,7 +494,12 @@ def __hash__(self):
507494
python_test_goals=[
508495
# doctests
509496
"pyspark.streaming.util",
510-
] + _discover_python_unittests("pyspark/streaming/tests"),
497+
# unittests
498+
"pyspark.streaming.tests.test_context",
499+
"pyspark.streaming.tests.test_dstream",
500+
"pyspark.streaming.tests.test_kinesis",
501+
"pyspark.streaming.tests.test_listener",
502+
]
511503
)
512504

513505

@@ -533,10 +525,17 @@ def __hash__(self):
533525
"pyspark.mllib.stat.KernelDensity",
534526
"pyspark.mllib.tree",
535527
"pyspark.mllib.util",
536-
] + _discover_python_unittests("pyspark/mllib/tests"),
528+
# unittests
529+
"pyspark.mllib.tests.test_algorithms",
530+
"pyspark.mllib.tests.test_feature",
531+
"pyspark.mllib.tests.test_linalg",
532+
"pyspark.mllib.tests.test_stat",
533+
"pyspark.mllib.tests.test_streaming_algorithms",
534+
"pyspark.mllib.tests.test_util",
535+
],
537536
excluded_python_implementations=[
538537
"PyPy" # Skip these tests under PyPy since they require numpy and it isn't available there
539-
],
538+
]
540539
)
541540

542541

@@ -560,13 +559,27 @@ def __hash__(self):
560559
"pyspark.ml.regression",
561560
"pyspark.ml.stat",
562561
"pyspark.ml.tuning",
563-
] + _discover_python_unittests("pyspark/ml/tests"),
562+
# unittests
563+
"pyspark.ml.tests.test_algorithms",
564+
"pyspark.ml.tests.test_base",
565+
"pyspark.ml.tests.test_evaluation",
566+
"pyspark.ml.tests.test_feature",
567+
"pyspark.ml.tests.test_image",
568+
"pyspark.ml.tests.test_linalg",
569+
"pyspark.ml.tests.test_param",
570+
"pyspark.ml.tests.test_persistence",
571+
"pyspark.ml.tests.test_pipeline",
572+
"pyspark.ml.tests.test_stat",
573+
"pyspark.ml.tests.test_training_summary",
574+
"pyspark.ml.tests.test_tuning",
575+
"pyspark.ml.tests.test_util",
576+
"pyspark.ml.tests.test_wrapper",
577+
],
564578
excluded_python_implementations=[
565579
"PyPy" # Skip these tests under PyPy since they require numpy and it isn't available there
566-
],
580+
]
567581
)
568582

569-
570583
pyspark_pandas = Module(
571584
name="pyspark-pandas",
572585
dependencies=[pyspark_core, pyspark_sql],
@@ -601,14 +614,59 @@ def __hash__(self):
601614
"pyspark.pandas.spark.accessors",
602615
"pyspark.pandas.spark.utils",
603616
"pyspark.pandas.typedef.typehints",
604-
] + _discover_python_unittests("pyspark/pandas/tests"),
617+
# unittests
618+
"pyspark.pandas.tests.data_type_ops.test_base",
619+
"pyspark.pandas.tests.data_type_ops.test_binary_ops",
620+
"pyspark.pandas.tests.data_type_ops.test_boolean_ops",
621+
"pyspark.pandas.tests.data_type_ops.test_categorical_ops",
622+
"pyspark.pandas.tests.data_type_ops.test_complex_ops",
623+
"pyspark.pandas.tests.data_type_ops.test_date_ops",
624+
"pyspark.pandas.tests.data_type_ops.test_datetime_ops",
625+
"pyspark.pandas.tests.data_type_ops.test_decimal_ops",
626+
"pyspark.pandas.tests.data_type_ops.test_null_ops",
627+
"pyspark.pandas.tests.data_type_ops.test_num_ops",
628+
"pyspark.pandas.tests.data_type_ops.test_string_ops",
629+
"pyspark.pandas.tests.data_type_ops.test_udt_ops",
630+
"pyspark.pandas.tests.indexes.test_category",
631+
"pyspark.pandas.tests.plot.test_frame_plot",
632+
"pyspark.pandas.tests.plot.test_frame_plot_matplotlib",
633+
"pyspark.pandas.tests.plot.test_frame_plot_plotly",
634+
"pyspark.pandas.tests.plot.test_series_plot",
635+
"pyspark.pandas.tests.plot.test_series_plot_matplotlib",
636+
"pyspark.pandas.tests.plot.test_series_plot_plotly",
637+
"pyspark.pandas.tests.test_categorical",
638+
"pyspark.pandas.tests.test_config",
639+
"pyspark.pandas.tests.test_csv",
640+
"pyspark.pandas.tests.test_dataframe_conversion",
641+
"pyspark.pandas.tests.test_dataframe_spark_io",
642+
"pyspark.pandas.tests.test_default_index",
643+
"pyspark.pandas.tests.test_expanding",
644+
"pyspark.pandas.tests.test_extension",
645+
"pyspark.pandas.tests.test_frame_spark",
646+
"pyspark.pandas.tests.test_indexops_spark",
647+
"pyspark.pandas.tests.test_internal",
648+
"pyspark.pandas.tests.test_namespace",
649+
"pyspark.pandas.tests.test_numpy_compat",
650+
"pyspark.pandas.tests.test_ops_on_diff_frames_groupby_expanding",
651+
"pyspark.pandas.tests.test_ops_on_diff_frames_groupby_rolling",
652+
"pyspark.pandas.tests.test_repr",
653+
"pyspark.pandas.tests.test_reshape",
654+
"pyspark.pandas.tests.test_rolling",
655+
"pyspark.pandas.tests.test_series_conversion",
656+
"pyspark.pandas.tests.test_series_datetime",
657+
"pyspark.pandas.tests.test_series_string",
658+
"pyspark.pandas.tests.test_spark_functions",
659+
"pyspark.pandas.tests.test_sql",
660+
"pyspark.pandas.tests.test_typedef",
661+
"pyspark.pandas.tests.test_utils",
662+
"pyspark.pandas.tests.test_window",
663+
],
605664
excluded_python_implementations=[
606665
"PyPy" # Skip these tests under PyPy since they require numpy, pandas, and pyarrow and
607-
# they aren't available there
608-
],
666+
# they aren't available there
667+
]
609668
)
610669

611-
612670
pyspark_pandas_slow = Module(
613671
name="pyspark-pandas-slow",
614672
dependencies=[pyspark_core, pyspark_sql],
@@ -620,7 +678,17 @@ def __hash__(self):
620678
"pyspark.pandas.frame",
621679
"pyspark.pandas.generic",
622680
"pyspark.pandas.series",
623-
] + _discover_python_unittests("pyspark/pandas/tests", discover_slow=True),
681+
# unittests
682+
"pyspark.pandas.tests.indexes.test_base",
683+
"pyspark.pandas.tests.indexes.test_datetime",
684+
"pyspark.pandas.tests.test_dataframe",
685+
"pyspark.pandas.tests.test_groupby",
686+
"pyspark.pandas.tests.test_indexing",
687+
"pyspark.pandas.tests.test_ops_on_diff_frames",
688+
"pyspark.pandas.tests.test_ops_on_diff_frames_groupby",
689+
"pyspark.pandas.tests.test_series",
690+
"pyspark.pandas.tests.test_stats",
691+
],
624692
excluded_python_implementations=[
625693
"PyPy" # Skip these tests under PyPy since they require numpy, pandas, and pyarrow and
626694
# they aren't available there

python/pyspark/pandas/tests/indexes/test_base.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -34,11 +34,6 @@
3434
from pyspark.testing.pandasutils import PandasOnSparkTestCase, TestUtils, SPARK_CONF_ARROW_ENABLED
3535

3636

37-
# This is used in run-tests.py to discover the slow test. See more in the doc of
38-
# _discover_python_unittests of dev/sparktestsupport/modules.py
39-
is_slow_test = True
40-
41-
4237
class IndexesTest(PandasOnSparkTestCase, TestUtils):
4338
@property
4439
def pdf(self):

python/pyspark/pandas/tests/indexes/test_datetime.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,6 @@
2525
from pyspark.testing.pandasutils import PandasOnSparkTestCase, TestUtils
2626

2727

28-
# This is used in run-tests.py to discover the slow test. See more in the doc of
29-
# _discover_python_unittests of dev/sparktestsupport/modules.py
30-
is_slow_test = True
31-
32-
3328
class DatetimeIndexTest(PandasOnSparkTestCase, TestUtils):
3429
@property
3530
def fixed_freqs(self):

python/pyspark/pandas/tests/test_dataframe.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -50,11 +50,6 @@
5050
from pyspark.pandas.utils import name_like_string
5151

5252

53-
# This is used in run-tests.py to discover the slow test. See more in the doc of
54-
# _discover_python_unittests of dev/sparktestsupport/modules.py
55-
is_slow_test = True
56-
57-
5853
class DataFrameTest(PandasOnSparkTestCase, SQLTestUtils):
5954
@property
6055
def pdf(self):

python/pyspark/pandas/tests/test_groupby.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -34,11 +34,6 @@
3434
from pyspark.testing.pandasutils import PandasOnSparkTestCase, TestUtils
3535

3636

37-
# This is used in run-tests.py to discover the slow test. See more in the doc of
38-
# _discover_python_unittests of dev/sparktestsupport/modules.py
39-
is_slow_test = True
40-
41-
4237
class GroupByTest(PandasOnSparkTestCase, TestUtils):
4338
def test_groupby_simple(self):
4439
pdf = pd.DataFrame(

python/pyspark/pandas/tests/test_indexing.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,6 @@
2727
from pyspark.testing.pandasutils import ComparisonTestBase, PandasOnSparkTestCase, compare_both
2828

2929

30-
# This is used in run-tests.py to discover the slow test. See more in the doc of
31-
# _discover_python_unittests of dev/sparktestsupport/modules.py
32-
is_slow_test = True
33-
34-
3530
class BasicIndexingTest(ComparisonTestBase):
3631
@property
3732
def pdf(self):

python/pyspark/pandas/tests/test_ops_on_diff_frames.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -35,11 +35,6 @@
3535
)
3636

3737

38-
# This is used in run-tests.py to discover the slow test. See more in the doc of
39-
# _discover_python_unittests of dev/sparktestsupport/modules.py
40-
is_slow_test = True
41-
42-
4338
class OpsOnDiffFramesEnabledTest(PandasOnSparkTestCase, SQLTestUtils):
4439
@classmethod
4540
def setUpClass(cls):

python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,6 @@
2525
from pyspark.testing.sqlutils import SQLTestUtils
2626

2727

28-
# This is used in run-tests.py to discover the slow test. See more in the doc of
29-
# _discover_python_unittests of dev/sparktestsupport/modules.py
30-
is_slow_test = True
31-
32-
3328
class OpsOnDiffFramesGroupByTest(PandasOnSparkTestCase, SQLTestUtils):
3429
@classmethod
3530
def setUpClass(cls):

python/pyspark/pandas/tests/test_series.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -44,11 +44,6 @@
4444
)
4545

4646

47-
# This is used in run-tests.py to discover the slow test. See more in the doc of
48-
# _discover_python_unittests of dev/sparktestsupport/modules.py
49-
is_slow_test = True
50-
51-
5247
class SeriesTest(PandasOnSparkTestCase, SQLTestUtils):
5348
@property
5449
def pser(self):

0 commit comments

Comments
 (0)