|
39 | 39 | assert_array_equal, |
40 | 40 | assert_equal, |
41 | 41 | assert_identical, |
| 42 | + create_test_data, |
42 | 43 | has_cftime, |
43 | 44 | has_dask, |
44 | 45 | requires_bottleneck, |
|
62 | 63 | ] |
63 | 64 |
|
64 | 65 |
|
65 | | -def create_test_data(seed=None, add_attrs=True): |
66 | | - rs = np.random.RandomState(seed) |
67 | | - _vars = { |
68 | | - "var1": ["dim1", "dim2"], |
69 | | - "var2": ["dim1", "dim2"], |
70 | | - "var3": ["dim3", "dim1"], |
71 | | - } |
72 | | - _dims = {"dim1": 8, "dim2": 9, "dim3": 10} |
73 | | - |
74 | | - obj = Dataset() |
75 | | - obj["dim2"] = ("dim2", 0.5 * np.arange(_dims["dim2"])) |
76 | | - obj["dim3"] = ("dim3", list("abcdefghij")) |
77 | | - obj["time"] = ("time", pd.date_range("2000-01-01", periods=20)) |
78 | | - for v, dims in sorted(_vars.items()): |
79 | | - data = rs.normal(size=tuple(_dims[d] for d in dims)) |
80 | | - obj[v] = (dims, data) |
81 | | - if add_attrs: |
82 | | - obj[v].attrs = {"foo": "variable"} |
83 | | - obj.coords["numbers"] = ( |
84 | | - "dim3", |
85 | | - np.array([0, 1, 2, 0, 0, 1, 1, 2, 2, 3], dtype="int64"), |
86 | | - ) |
87 | | - obj.encoding = {"foo": "bar"} |
88 | | - assert all(obj.data.flags.writeable for obj in obj.variables.values()) |
89 | | - return obj |
90 | | - |
91 | | - |
92 | 66 | def create_append_test_data(seed=None): |
93 | 67 | rs = np.random.RandomState(seed) |
94 | 68 |
|
@@ -3785,173 +3759,6 @@ def test_squeeze_drop(self): |
3785 | 3759 | selected = data.squeeze(drop=True) |
3786 | 3760 | assert_identical(data, selected) |
3787 | 3761 |
|
3788 | | - def test_groupby(self): |
3789 | | - data = Dataset( |
3790 | | - {"z": (["x", "y"], np.random.randn(3, 5))}, |
3791 | | - {"x": ("x", list("abc")), "c": ("x", [0, 1, 0]), "y": range(5)}, |
3792 | | - ) |
3793 | | - groupby = data.groupby("x") |
3794 | | - assert len(groupby) == 3 |
3795 | | - expected_groups = {"a": 0, "b": 1, "c": 2} |
3796 | | - assert groupby.groups == expected_groups |
3797 | | - expected_items = [ |
3798 | | - ("a", data.isel(x=0)), |
3799 | | - ("b", data.isel(x=1)), |
3800 | | - ("c", data.isel(x=2)), |
3801 | | - ] |
3802 | | - for actual, expected in zip(groupby, expected_items): |
3803 | | - assert actual[0] == expected[0] |
3804 | | - assert_equal(actual[1], expected[1]) |
3805 | | - |
3806 | | - def identity(x): |
3807 | | - return x |
3808 | | - |
3809 | | - for k in ["x", "c", "y"]: |
3810 | | - actual = data.groupby(k, squeeze=False).map(identity) |
3811 | | - assert_equal(data, actual) |
3812 | | - |
3813 | | - def test_groupby_returns_new_type(self): |
3814 | | - data = Dataset({"z": (["x", "y"], np.random.randn(3, 5))}) |
3815 | | - |
3816 | | - actual = data.groupby("x").map(lambda ds: ds["z"]) |
3817 | | - expected = data["z"] |
3818 | | - assert_identical(expected, actual) |
3819 | | - |
3820 | | - actual = data["z"].groupby("x").map(lambda x: x.to_dataset()) |
3821 | | - expected = data |
3822 | | - assert_identical(expected, actual) |
3823 | | - |
3824 | | - def test_groupby_iter(self): |
3825 | | - data = create_test_data() |
3826 | | - for n, (t, sub) in enumerate(list(data.groupby("dim1"))[:3]): |
3827 | | - assert data["dim1"][n] == t |
3828 | | - assert_equal(data["var1"][n], sub["var1"]) |
3829 | | - assert_equal(data["var2"][n], sub["var2"]) |
3830 | | - assert_equal(data["var3"][:, n], sub["var3"]) |
3831 | | - |
3832 | | - def test_groupby_errors(self): |
3833 | | - data = create_test_data() |
3834 | | - with pytest.raises(TypeError, match=r"`group` must be"): |
3835 | | - data.groupby(np.arange(10)) |
3836 | | - with pytest.raises(ValueError, match=r"length does not match"): |
3837 | | - data.groupby(data["dim1"][:3]) |
3838 | | - with pytest.raises(TypeError, match=r"`group` must be"): |
3839 | | - data.groupby(data.coords["dim1"].to_index()) |
3840 | | - |
3841 | | - def test_groupby_reduce(self): |
3842 | | - data = Dataset( |
3843 | | - { |
3844 | | - "xy": (["x", "y"], np.random.randn(3, 4)), |
3845 | | - "xonly": ("x", np.random.randn(3)), |
3846 | | - "yonly": ("y", np.random.randn(4)), |
3847 | | - "letters": ("y", ["a", "a", "b", "b"]), |
3848 | | - } |
3849 | | - ) |
3850 | | - |
3851 | | - expected = data.mean("y") |
3852 | | - expected["yonly"] = expected["yonly"].variable.set_dims({"x": 3}) |
3853 | | - actual = data.groupby("x").mean(...) |
3854 | | - assert_allclose(expected, actual) |
3855 | | - |
3856 | | - actual = data.groupby("x").mean("y") |
3857 | | - assert_allclose(expected, actual) |
3858 | | - |
3859 | | - letters = data["letters"] |
3860 | | - expected = Dataset( |
3861 | | - { |
3862 | | - "xy": data["xy"].groupby(letters).mean(...), |
3863 | | - "xonly": (data["xonly"].mean().variable.set_dims({"letters": 2})), |
3864 | | - "yonly": data["yonly"].groupby(letters).mean(), |
3865 | | - } |
3866 | | - ) |
3867 | | - actual = data.groupby("letters").mean(...) |
3868 | | - assert_allclose(expected, actual) |
3869 | | - |
3870 | | - def test_groupby_math(self): |
3871 | | - def reorder_dims(x): |
3872 | | - return x.transpose("dim1", "dim2", "dim3", "time") |
3873 | | - |
3874 | | - ds = create_test_data() |
3875 | | - ds["dim1"] = ds["dim1"] |
3876 | | - for squeeze in [True, False]: |
3877 | | - grouped = ds.groupby("dim1", squeeze=squeeze) |
3878 | | - |
3879 | | - expected = reorder_dims(ds + ds.coords["dim1"]) |
3880 | | - actual = grouped + ds.coords["dim1"] |
3881 | | - assert_identical(expected, reorder_dims(actual)) |
3882 | | - |
3883 | | - actual = ds.coords["dim1"] + grouped |
3884 | | - assert_identical(expected, reorder_dims(actual)) |
3885 | | - |
3886 | | - ds2 = 2 * ds |
3887 | | - expected = reorder_dims(ds + ds2) |
3888 | | - actual = grouped + ds2 |
3889 | | - assert_identical(expected, reorder_dims(actual)) |
3890 | | - |
3891 | | - actual = ds2 + grouped |
3892 | | - assert_identical(expected, reorder_dims(actual)) |
3893 | | - |
3894 | | - grouped = ds.groupby("numbers") |
3895 | | - zeros = DataArray([0, 0, 0, 0], [("numbers", range(4))]) |
3896 | | - expected = (ds + Variable("dim3", np.zeros(10))).transpose( |
3897 | | - "dim3", "dim1", "dim2", "time" |
3898 | | - ) |
3899 | | - actual = grouped + zeros |
3900 | | - assert_equal(expected, actual) |
3901 | | - |
3902 | | - actual = zeros + grouped |
3903 | | - assert_equal(expected, actual) |
3904 | | - |
3905 | | - with pytest.raises(ValueError, match=r"incompat.* grouped binary"): |
3906 | | - grouped + ds |
3907 | | - with pytest.raises(ValueError, match=r"incompat.* grouped binary"): |
3908 | | - ds + grouped |
3909 | | - with pytest.raises(TypeError, match=r"only support binary ops"): |
3910 | | - grouped + 1 |
3911 | | - with pytest.raises(TypeError, match=r"only support binary ops"): |
3912 | | - grouped + grouped |
3913 | | - with pytest.raises(TypeError, match=r"in-place operations"): |
3914 | | - ds += grouped |
3915 | | - |
3916 | | - ds = Dataset( |
3917 | | - { |
3918 | | - "x": ("time", np.arange(100)), |
3919 | | - "time": pd.date_range("2000-01-01", periods=100), |
3920 | | - } |
3921 | | - ) |
3922 | | - with pytest.raises(ValueError, match=r"incompat.* grouped binary"): |
3923 | | - ds + ds.groupby("time.month") |
3924 | | - |
3925 | | - def test_groupby_math_virtual(self): |
3926 | | - ds = Dataset( |
3927 | | - {"x": ("t", [1, 2, 3])}, {"t": pd.date_range("20100101", periods=3)} |
3928 | | - ) |
3929 | | - grouped = ds.groupby("t.day") |
3930 | | - actual = grouped - grouped.mean(...) |
3931 | | - expected = Dataset({"x": ("t", [0, 0, 0])}, ds[["t", "t.day"]]) |
3932 | | - assert_identical(actual, expected) |
3933 | | - |
3934 | | - def test_groupby_nan(self): |
3935 | | - # nan should be excluded from groupby |
3936 | | - ds = Dataset({"foo": ("x", [1, 2, 3, 4])}, {"bar": ("x", [1, 1, 2, np.nan])}) |
3937 | | - actual = ds.groupby("bar").mean(...) |
3938 | | - expected = Dataset({"foo": ("bar", [1.5, 3]), "bar": [1, 2]}) |
3939 | | - assert_identical(actual, expected) |
3940 | | - |
3941 | | - def test_groupby_order(self): |
3942 | | - # groupby should preserve variables order |
3943 | | - ds = Dataset() |
3944 | | - for vn in ["a", "b", "c"]: |
3945 | | - ds[vn] = DataArray(np.arange(10), dims=["t"]) |
3946 | | - data_vars_ref = list(ds.data_vars.keys()) |
3947 | | - ds = ds.groupby("t").mean(...) |
3948 | | - data_vars = list(ds.data_vars.keys()) |
3949 | | - assert data_vars == data_vars_ref |
3950 | | - # coords are now at the end of the list, so the test below fails |
3951 | | - # all_vars = list(ds.variables.keys()) |
3952 | | - # all_vars_ref = list(ds.variables.keys()) |
3953 | | - # self.assertEqual(all_vars, all_vars_ref) |
3954 | | - |
3955 | 3762 | def test_resample_and_first(self): |
3956 | 3763 | times = pd.date_range("2000-01-01", freq="6H", periods=10) |
3957 | 3764 | ds = Dataset( |
|
0 commit comments