1515# limitations under the License.
1616#
1717
18- from collections .abc import Iterable
1918from functools import total_ordering
2019import itertools
2120import os
2221import re
23- import unittest
24- import sys
25-
26- from sparktestsupport import SPARK_HOME
2722
2823all_modules = []
2924
3025
31- def _get_module_from_name (name ):
32- __import__ (name )
33- return sys .modules [name ]
34-
35-
36- def _discover_python_unittests (* paths , discover_slow = False ):
37- """Discover the python module which contains unittests under paths.
38-
39- Such as:
40- ['pyspark/tests'], it will return the set of module name under the path of pyspark/tests, like
41- {'pyspark.tests.test_appsubmit', 'pyspark.tests.test_broadcast', ...}
42-
43- Parameters
44- ----------
45- paths : str
46- Paths of modules to be discovered.
47- discover_slow : bool
48- If True, will only discover slow tests
49- If False, will discover all tests except slow tests
50-
51- Returns
52- -------
53- A set of complete test module name discovered under specified paths
54- """
55-
56- def add_test_module (testcases , modules , slow ):
57- """Append the testcases module names to modules set"""
58- if isinstance (testcases , Iterable ):
59- for test_case in testcases :
60- add_test_module (test_case , modules , slow )
61- else :
62- name = testcases .__module__
63- module = _get_module_from_name (name )
64- if slow and hasattr (module , 'is_slow_test' ):
65- modules .add (name )
66- if not slow and not hasattr (module , 'is_slow_test' ):
67- modules .add (name )
68-
69- if not paths :
70- return []
71- modules = set ()
72- pyspark_path = os .path .join (SPARK_HOME , "python" )
73- for path in paths :
74- # Discover the unittest in every path
75- testcases = unittest .defaultTestLoader .discover (
76- os .path .join (pyspark_path , path ),
77- top_level_dir = pyspark_path
78- )
79- add_test_module (testcases , modules , discover_slow )
80-
81- return sorted (list (modules ))
82-
83-
8426@total_ordering
8527class Module (object ):
8628 """
@@ -446,7 +388,24 @@ def __hash__(self):
446388 "pyspark.profiler" ,
447389 "pyspark.shuffle" ,
448390 "pyspark.util" ,
449- ] + _discover_python_unittests ("pyspark/tests" ),
391+ # unittests
392+ "pyspark.tests.test_appsubmit" ,
393+ "pyspark.tests.test_broadcast" ,
394+ "pyspark.tests.test_conf" ,
395+ "pyspark.tests.test_context" ,
396+ "pyspark.tests.test_daemon" ,
397+ "pyspark.tests.test_install_spark" ,
398+ "pyspark.tests.test_join" ,
399+ "pyspark.tests.test_profiler" ,
400+ "pyspark.tests.test_rdd" ,
401+ "pyspark.tests.test_rddbarrier" ,
402+ "pyspark.tests.test_readwrite" ,
403+ "pyspark.tests.test_serializers" ,
404+ "pyspark.tests.test_shuffle" ,
405+ "pyspark.tests.test_taskcontext" ,
406+ "pyspark.tests.test_util" ,
407+ "pyspark.tests.test_worker" ,
408+ ]
450409)
451410
452411pyspark_sql = Module (
@@ -478,7 +437,32 @@ def __hash__(self):
478437 "pyspark.sql.pandas.serializers" ,
479438 "pyspark.sql.pandas.typehints" ,
480439 "pyspark.sql.pandas.utils" ,
481- ] + _discover_python_unittests ("pyspark/sql/tests" ),
440+ # unittests
441+ "pyspark.sql.tests.test_arrow" ,
442+ "pyspark.sql.tests.test_catalog" ,
443+ "pyspark.sql.tests.test_column" ,
444+ "pyspark.sql.tests.test_conf" ,
445+ "pyspark.sql.tests.test_context" ,
446+ "pyspark.sql.tests.test_dataframe" ,
447+ "pyspark.sql.tests.test_datasources" ,
448+ "pyspark.sql.tests.test_functions" ,
449+ "pyspark.sql.tests.test_group" ,
450+ "pyspark.sql.tests.test_pandas_cogrouped_map" ,
451+ "pyspark.sql.tests.test_pandas_grouped_map" ,
452+ "pyspark.sql.tests.test_pandas_map" ,
453+ "pyspark.sql.tests.test_pandas_udf" ,
454+ "pyspark.sql.tests.test_pandas_udf_grouped_agg" ,
455+ "pyspark.sql.tests.test_pandas_udf_scalar" ,
456+ "pyspark.sql.tests.test_pandas_udf_typehints" ,
457+ "pyspark.sql.tests.test_pandas_udf_window" ,
458+ "pyspark.sql.tests.test_readwriter" ,
459+ "pyspark.sql.tests.test_serde" ,
460+ "pyspark.sql.tests.test_session" ,
461+ "pyspark.sql.tests.test_streaming" ,
462+ "pyspark.sql.tests.test_types" ,
463+ "pyspark.sql.tests.test_udf" ,
464+ "pyspark.sql.tests.test_utils" ,
465+ ]
482466)
483467
484468
@@ -490,7 +474,10 @@ def __hash__(self):
490474 source_file_regexes = [
491475 "python/pyspark/resource"
492476 ],
493- python_test_goals = _discover_python_unittests ("pyspark/resource/tests" ),
477+ python_test_goals = [
478+ # unittests
479+ "pyspark.resource.tests.test_resources" ,
480+ ]
494481)
495482
496483
@@ -507,7 +494,12 @@ def __hash__(self):
507494 python_test_goals = [
508495 # doctests
509496 "pyspark.streaming.util" ,
510- ] + _discover_python_unittests ("pyspark/streaming/tests" ),
497+ # unittests
498+ "pyspark.streaming.tests.test_context" ,
499+ "pyspark.streaming.tests.test_dstream" ,
500+ "pyspark.streaming.tests.test_kinesis" ,
501+ "pyspark.streaming.tests.test_listener" ,
502+ ]
511503)
512504
513505
@@ -533,10 +525,17 @@ def __hash__(self):
533525 "pyspark.mllib.stat.KernelDensity" ,
534526 "pyspark.mllib.tree" ,
535527 "pyspark.mllib.util" ,
536- ] + _discover_python_unittests ("pyspark/mllib/tests" ),
528+ # unittests
529+ "pyspark.mllib.tests.test_algorithms" ,
530+ "pyspark.mllib.tests.test_feature" ,
531+ "pyspark.mllib.tests.test_linalg" ,
532+ "pyspark.mllib.tests.test_stat" ,
533+ "pyspark.mllib.tests.test_streaming_algorithms" ,
534+ "pyspark.mllib.tests.test_util" ,
535+ ],
537536 excluded_python_implementations = [
538537 "PyPy" # Skip these tests under PyPy since they require numpy and it isn't available there
539- ],
538+ ]
540539)
541540
542541
@@ -560,13 +559,27 @@ def __hash__(self):
560559 "pyspark.ml.regression" ,
561560 "pyspark.ml.stat" ,
562561 "pyspark.ml.tuning" ,
563- ] + _discover_python_unittests ("pyspark/ml/tests" ),
562+ # unittests
563+ "pyspark.ml.tests.test_algorithms" ,
564+ "pyspark.ml.tests.test_base" ,
565+ "pyspark.ml.tests.test_evaluation" ,
566+ "pyspark.ml.tests.test_feature" ,
567+ "pyspark.ml.tests.test_image" ,
568+ "pyspark.ml.tests.test_linalg" ,
569+ "pyspark.ml.tests.test_param" ,
570+ "pyspark.ml.tests.test_persistence" ,
571+ "pyspark.ml.tests.test_pipeline" ,
572+ "pyspark.ml.tests.test_stat" ,
573+ "pyspark.ml.tests.test_training_summary" ,
574+ "pyspark.ml.tests.test_tuning" ,
575+ "pyspark.ml.tests.test_util" ,
576+ "pyspark.ml.tests.test_wrapper" ,
577+ ],
564578 excluded_python_implementations = [
565579 "PyPy" # Skip these tests under PyPy since they require numpy and it isn't available there
566- ],
580+ ]
567581)
568582
569-
570583pyspark_pandas = Module (
571584 name = "pyspark-pandas" ,
572585 dependencies = [pyspark_core , pyspark_sql ],
@@ -601,14 +614,59 @@ def __hash__(self):
601614 "pyspark.pandas.spark.accessors" ,
602615 "pyspark.pandas.spark.utils" ,
603616 "pyspark.pandas.typedef.typehints" ,
604- ] + _discover_python_unittests ("pyspark/pandas/tests" ),
617+ # unittests
618+ "pyspark.pandas.tests.data_type_ops.test_base" ,
619+ "pyspark.pandas.tests.data_type_ops.test_binary_ops" ,
620+ "pyspark.pandas.tests.data_type_ops.test_boolean_ops" ,
621+ "pyspark.pandas.tests.data_type_ops.test_categorical_ops" ,
622+ "pyspark.pandas.tests.data_type_ops.test_complex_ops" ,
623+ "pyspark.pandas.tests.data_type_ops.test_date_ops" ,
624+ "pyspark.pandas.tests.data_type_ops.test_datetime_ops" ,
625+ "pyspark.pandas.tests.data_type_ops.test_decimal_ops" ,
626+ "pyspark.pandas.tests.data_type_ops.test_null_ops" ,
627+ "pyspark.pandas.tests.data_type_ops.test_num_ops" ,
628+ "pyspark.pandas.tests.data_type_ops.test_string_ops" ,
629+ "pyspark.pandas.tests.data_type_ops.test_udt_ops" ,
630+ "pyspark.pandas.tests.indexes.test_category" ,
631+ "pyspark.pandas.tests.plot.test_frame_plot" ,
632+ "pyspark.pandas.tests.plot.test_frame_plot_matplotlib" ,
633+ "pyspark.pandas.tests.plot.test_frame_plot_plotly" ,
634+ "pyspark.pandas.tests.plot.test_series_plot" ,
635+ "pyspark.pandas.tests.plot.test_series_plot_matplotlib" ,
636+ "pyspark.pandas.tests.plot.test_series_plot_plotly" ,
637+ "pyspark.pandas.tests.test_categorical" ,
638+ "pyspark.pandas.tests.test_config" ,
639+ "pyspark.pandas.tests.test_csv" ,
640+ "pyspark.pandas.tests.test_dataframe_conversion" ,
641+ "pyspark.pandas.tests.test_dataframe_spark_io" ,
642+ "pyspark.pandas.tests.test_default_index" ,
643+ "pyspark.pandas.tests.test_expanding" ,
644+ "pyspark.pandas.tests.test_extension" ,
645+ "pyspark.pandas.tests.test_frame_spark" ,
646+ "pyspark.pandas.tests.test_indexops_spark" ,
647+ "pyspark.pandas.tests.test_internal" ,
648+ "pyspark.pandas.tests.test_namespace" ,
649+ "pyspark.pandas.tests.test_numpy_compat" ,
650+ "pyspark.pandas.tests.test_ops_on_diff_frames_groupby_expanding" ,
651+ "pyspark.pandas.tests.test_ops_on_diff_frames_groupby_rolling" ,
652+ "pyspark.pandas.tests.test_repr" ,
653+ "pyspark.pandas.tests.test_reshape" ,
654+ "pyspark.pandas.tests.test_rolling" ,
655+ "pyspark.pandas.tests.test_series_conversion" ,
656+ "pyspark.pandas.tests.test_series_datetime" ,
657+ "pyspark.pandas.tests.test_series_string" ,
658+ "pyspark.pandas.tests.test_spark_functions" ,
659+ "pyspark.pandas.tests.test_sql" ,
660+ "pyspark.pandas.tests.test_typedef" ,
661+ "pyspark.pandas.tests.test_utils" ,
662+ "pyspark.pandas.tests.test_window" ,
663+ ],
605664 excluded_python_implementations = [
606665 "PyPy" # Skip these tests under PyPy since they require numpy, pandas, and pyarrow and
607- # they aren't available there
608- ],
666+ # they aren't available there
667+ ]
609668)
610669
611-
612670pyspark_pandas_slow = Module (
613671 name = "pyspark-pandas-slow" ,
614672 dependencies = [pyspark_core , pyspark_sql ],
@@ -620,7 +678,17 @@ def __hash__(self):
620678 "pyspark.pandas.frame" ,
621679 "pyspark.pandas.generic" ,
622680 "pyspark.pandas.series" ,
623- ] + _discover_python_unittests ("pyspark/pandas/tests" , discover_slow = True ),
681+ # unittests
682+ "pyspark.pandas.tests.indexes.test_base" ,
683+ "pyspark.pandas.tests.indexes.test_datetime" ,
684+ "pyspark.pandas.tests.test_dataframe" ,
685+ "pyspark.pandas.tests.test_groupby" ,
686+ "pyspark.pandas.tests.test_indexing" ,
687+ "pyspark.pandas.tests.test_ops_on_diff_frames" ,
688+ "pyspark.pandas.tests.test_ops_on_diff_frames_groupby" ,
689+ "pyspark.pandas.tests.test_series" ,
690+ "pyspark.pandas.tests.test_stats" ,
691+ ],
624692 excluded_python_implementations = [
625693 "PyPy" # Skip these tests under PyPy since they require numpy, pandas, and pyarrow and
626694 # they aren't available there
0 commit comments