Skip to content

Commit 1808c4a

Browse files
authored
Merge 045ce02 into 1e2a85a
2 parents 1e2a85a + 045ce02 commit 1808c4a

File tree

5 files changed

+188
-101
lines changed

5 files changed

+188
-101
lines changed

benchmarks/benchmarks/stats.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
# Copyright Iris contributors
2+
#
3+
# This file is part of Iris and is released under the BSD license.
4+
# See LICENSE in the root of the repository for full licensing details.
5+
"""Stats benchmark tests."""
6+
7+
import iris
8+
from iris.analysis.stats import pearsonr
9+
import iris.tests
10+
11+
12+
class PearsonR:
13+
def setup(self):
14+
cube_temp = iris.load_cube(
15+
iris.tests.get_data_path(
16+
("NetCDF", "global", "xyt", "SMALL_total_column_co2.nc")
17+
)
18+
)
19+
20+
# Make data non-lazy.
21+
cube_temp.data
22+
23+
self.cube_a = cube_temp[:6]
24+
self.cube_b = cube_temp[20:26]
25+
self.cube_b.replace_coord(self.cube_a.coord("time"))
26+
for name in ["latitude", "longitude"]:
27+
self.cube_b.coord(name).guess_bounds()
28+
self.weights = iris.analysis.cartography.area_weights(self.cube_b)
29+
30+
def time_real(self):
31+
pearsonr(self.cube_a, self.cube_b, weights=self.weights)
32+
33+
def time_lazy(self):
34+
for cube in self.cube_a, self.cube_b:
35+
cube.data = cube.lazy_data()
36+
37+
result = pearsonr(self.cube_a, self.cube_b, weights=self.weights)
38+
result.data

docs/src/whatsnew/latest.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,9 @@ This document explains the changes made to Iris for this release
104104
lazy data from file. This will also speed up coordinate comparison.
105105
(:pull:`5610`)
106106

107+
#. `@rcomer`_ and `@trexfeathers`_ (reviewer) modified
108+
:func:`~iris.analysis.stats.pearsonr` so it preserves lazy data in all cases
109+
and also runs a little faster. (:pull:`5638`)
107110

108111
🔥 Deprecations
109112
===============

lib/iris/analysis/stats.py

Lines changed: 81 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,16 @@
44
# See LICENSE in the root of the repository for full licensing details.
55
"""Statistical operations between cubes."""
66

7+
import dask.array as da
78
import numpy as np
8-
import numpy.ma as ma
99

1010
import iris
11-
from iris.util import broadcast_to_shape
11+
from iris.common import SERVICES, Resolve
12+
from iris.common.lenient import _lenient_client
13+
from iris.util import _mask_array
1214

1315

16+
@_lenient_client(services=SERVICES)
1417
def pearsonr(
1518
cube_a,
1619
cube_b,
@@ -63,13 +66,13 @@ def pearsonr(
6366
6467
Notes
6568
-----
69+
If either of the input cubes has lazy data, the result will have lazy data.
70+
6671
Reference:
6772
https://en.wikipedia.org/wiki/Pearson_correlation_coefficient
6873
69-
This operation is non-lazy.
70-
7174
"""
72-
# Assign larger cube to cube_1
75+
# Assign larger cube to cube_1 for simplicity.
7376
if cube_b.ndim > cube_a.ndim:
7477
cube_1 = cube_b
7578
cube_2 = cube_a
@@ -79,90 +82,94 @@ def pearsonr(
7982

8083
smaller_shape = cube_2.shape
8184

82-
dim_coords_1 = [coord.name() for coord in cube_1.dim_coords]
83-
dim_coords_2 = [coord.name() for coord in cube_2.dim_coords]
84-
common_dim_coords = list(set(dim_coords_1) & set(dim_coords_2))
85+
# Get the broadcast, auto-transposed safe versions of the cube operands.
86+
resolver = Resolve(cube_1, cube_2)
87+
cube_1 = resolver.lhs_cube_resolved
88+
cube_2 = resolver.rhs_cube_resolved
89+
90+
if cube_1.has_lazy_data() or cube_2.has_lazy_data():
91+
al = da
92+
array_1 = cube_1.lazy_data()
93+
array_2 = cube_2.lazy_data()
94+
else:
95+
al = np
96+
array_1 = cube_1.data
97+
array_2 = cube_2.data
98+
8599
# If no coords passed then set to all common dimcoords of cubes.
86100
if corr_coords is None:
87-
corr_coords = common_dim_coords
88-
89-
def _ones_like(cube):
90-
# Return a copy of cube with the same mask, but all data values set to 1.
91-
# The operation is non-lazy.
92-
# For safety we also discard any cell-measures and ancillary-variables, to
93-
# avoid cube arithmetic possibly objecting to them, or inadvertently retaining
94-
# them in the result where they might be inappropriate.
95-
ones_cube = cube.copy()
96-
ones_cube.data = np.ones_like(cube.data)
97-
ones_cube.rename("unknown")
98-
ones_cube.units = 1
99-
for cm in ones_cube.cell_measures():
100-
ones_cube.remove_cell_measure(cm)
101-
for av in ones_cube.ancillary_variables():
102-
ones_cube.remove_ancillary_variable(av)
103-
return ones_cube
101+
dim_coords_1 = {coord.name() for coord in cube_1.dim_coords}
102+
dim_coords_2 = {coord.name() for coord in cube_2.dim_coords}
103+
corr_coords = list(dim_coords_1.intersection(dim_coords_2))
104+
105+
# Interpret coords as array dimensions.
106+
corr_dims = set()
107+
if isinstance(corr_coords, str):
108+
corr_coords = [corr_coords]
109+
for coord in corr_coords:
110+
corr_dims.update(cube_1.coord_dims(coord))
111+
112+
corr_dims = tuple(corr_dims)
104113

105114
# Match up data masks if required.
106115
if common_mask:
107-
# Create a cube of 1's with a common mask.
108-
if ma.is_masked(cube_2.data):
109-
mask_cube = _ones_like(cube_2)
110-
else:
111-
mask_cube = 1.0
112-
if ma.is_masked(cube_1.data):
113-
# Take a slice to avoid unnecessary broadcasting of cube_2.
114-
slice_coords = [
115-
dim_coords_1[i]
116-
for i in range(cube_1.ndim)
117-
if dim_coords_1[i] not in common_dim_coords
118-
and np.array_equal(
119-
cube_1.data.mask.any(axis=i), cube_1.data.mask.all(axis=i)
120-
)
121-
]
122-
cube_1_slice = next(cube_1.slices_over(slice_coords))
123-
mask_cube = _ones_like(cube_1_slice) * mask_cube
124-
# Apply common mask to data.
125-
if isinstance(mask_cube, iris.cube.Cube):
126-
cube_1 = cube_1 * mask_cube
127-
cube_2 = mask_cube * cube_2
128-
dim_coords_2 = [coord.name() for coord in cube_2.dim_coords]
129-
130-
# Broadcast weights to shape of cubes if necessary.
131-
if weights is None or cube_1.shape == smaller_shape:
132-
weights_1 = weights
133-
weights_2 = weights
116+
mask_1 = al.ma.getmaskarray(array_1)
117+
if al is np:
118+
# Reduce all invariant dimensions of mask_1 to length 1. This avoids
119+
# unnecessary broadcasting of array_2.
120+
index = tuple(
121+
slice(0, 1)
122+
if np.array_equal(mask_1.any(axis=dim), mask_1.all(axis=dim))
123+
else slice(None)
124+
for dim in range(mask_1.ndim)
125+
)
126+
mask_1 = mask_1[index]
127+
128+
array_2 = _mask_array(array_2, mask_1)
129+
array_1 = _mask_array(array_1, al.ma.getmaskarray(array_2))
130+
131+
# Broadcast weights to shape of arrays if necessary.
132+
if weights is None:
133+
weights_1 = weights_2 = None
134134
else:
135135
if weights.shape != smaller_shape:
136-
raise ValueError(
137-
"weights array should have dimensions {}".format(smaller_shape)
138-
)
136+
msg = f"weights array should have dimensions {smaller_shape}"
137+
raise ValueError(msg)
138+
139+
if resolver.reorder_src_dims is not None:
140+
# Apply same transposition as was done to cube_2 within Resolve.
141+
weights = weights.transpose(resolver.reorder_src_dims)
142+
143+
# Reshape to add in any length-1 dimensions that Resolve() has added
144+
# for broadcasting.
145+
weights = weights.reshape(cube_2.shape)
139146

140-
dims_1_common = [
141-
i for i in range(cube_1.ndim) if dim_coords_1[i] in common_dim_coords
142-
]
143-
weights_1 = broadcast_to_shape(weights, cube_1.shape, dims_1_common)
144-
if cube_2.shape != smaller_shape:
145-
dims_2_common = [
146-
i for i in range(cube_2.ndim) if dim_coords_2[i] in common_dim_coords
147-
]
148-
weights_2 = broadcast_to_shape(weights, cube_2.shape, dims_2_common)
149-
else:
150-
weights_2 = weights
147+
weights_2 = np.broadcast_to(weights, array_2.shape)
148+
weights_1 = np.broadcast_to(weights, array_1.shape)
151149

152150
# Calculate correlations.
153-
s1 = cube_1 - cube_1.collapsed(corr_coords, iris.analysis.MEAN, weights=weights_1)
154-
s2 = cube_2 - cube_2.collapsed(corr_coords, iris.analysis.MEAN, weights=weights_2)
151+
s1 = array_1 - al.ma.average(
152+
array_1, axis=corr_dims, weights=weights_1, keepdims=True
153+
)
154+
s2 = array_2 - al.ma.average(
155+
array_2, axis=corr_dims, weights=weights_2, keepdims=True
156+
)
155157

156-
covar = (s1 * s2).collapsed(
158+
s_prod = resolver.cube(s1 * s2)
159+
160+
# Use cube collapsed method as it takes care of coordinate collapsing and missing
161+
# data tolerance.
162+
covar = s_prod.collapsed(
157163
corr_coords, iris.analysis.SUM, weights=weights_1, mdtol=mdtol
158164
)
159-
var_1 = (s1**2).collapsed(corr_coords, iris.analysis.SUM, weights=weights_1)
160-
var_2 = (s2**2).collapsed(corr_coords, iris.analysis.SUM, weights=weights_2)
161165

162-
denom = iris.analysis.maths.apply_ufunc(
163-
np.sqrt, var_1 * var_2, new_unit=covar.units
164-
)
166+
var_1 = iris.analysis._sum(s1**2, axis=corr_dims, weights=weights_1)
167+
var_2 = iris.analysis._sum(s2**2, axis=corr_dims, weights=weights_2)
168+
169+
denom = np.sqrt(var_1 * var_2)
170+
165171
corr_cube = covar / denom
166172
corr_cube.rename("Pearson's r")
173+
corr_cube.units = 1
167174

168175
return corr_cube

lib/iris/common/resolve.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,7 @@ def __init__(self, lhs=None, rhs=None):
304304
#: operands, where the direction of the mapping is governed by
305305
#: :attr:`~iris.common.resolve.Resolve.map_rhs_to_lhs`.
306306
self.mapping = None # set in _metadata_mapping
307+
self.reorder_src_dims = None # set in _as_compatible_cubes
307308

308309
#: Cache containing a list of dim, aux and scalar coordinates prepared
309310
#: and ready for creating and attaching to the resultant resolved
@@ -439,6 +440,7 @@ def _as_compatible_cubes(self):
439440

440441
# Determine whether a transpose of the src cube is necessary.
441442
if order != sorted(order):
443+
self.reorder_src_dims = order
442444
new_src_data = new_src_data.transpose(order)
443445
logger.debug(
444446
f"transpose src {self._src_cube_position} cube with order {order}"

0 commit comments

Comments
 (0)