44# See LICENSE in the root of the repository for full licensing details.
55"""Statistical operations between cubes."""
66
7+ import dask .array as da
78import numpy as np
8- import numpy .ma as ma
99
1010import iris
11- from iris .util import broadcast_to_shape
11+ from iris .common import SERVICES , Resolve
12+ from iris .common .lenient import _lenient_client
13+ from iris .util import _mask_array
1214
1315
16+ @_lenient_client (services = SERVICES )
1417def pearsonr (
1518 cube_a ,
1619 cube_b ,
@@ -26,7 +29,8 @@ def pearsonr(
2629 cube_a, cube_b : cubes
2730 Cubes between which the correlation will be calculated. The cubes
2831 should either be the same shape and have the same dimension coordinates
29- or one cube should be broadcastable to the other.
32+ or one cube should be broadcastable to the other. Broadcasting rules
33+ are the same as those for cube arithmetic (see :ref:`cube maths`).
3034 corr_coords : str or list of str
3135 The cube coordinate name(s) over which to calculate correlations. If no
3236 names are provided then correlation will be calculated over all common
@@ -62,13 +66,13 @@ def pearsonr(
6266
6367 Notes
6468 -----
69+ If either of the input cubes has lazy data, the result will have lazy data.
70+
6571 Reference:
6672 https://en.wikipedia.org/wiki/Pearson_correlation_coefficient
6773
68- This operation is non-lazy.
69-
7074 """
71- # Assign larger cube to cube_1
75+ # Assign larger cube to cube_1 for simplicity.
7276 if cube_b .ndim > cube_a .ndim :
7377 cube_1 = cube_b
7478 cube_2 = cube_a
@@ -78,90 +82,88 @@ def pearsonr(
7882
7983 smaller_shape = cube_2 .shape
8084
81- dim_coords_1 = [coord .name () for coord in cube_1 .dim_coords ]
82- dim_coords_2 = [coord .name () for coord in cube_2 .dim_coords ]
83- common_dim_coords = list (set (dim_coords_1 ) & set (dim_coords_2 ))
85+ # Get the broadcast, auto-transposed safe versions of the cube operands.
86+ resolver = Resolve (cube_1 , cube_2 )
87+ lhs_cube_resolved = resolver .lhs_cube_resolved
88+ rhs_cube_resolved = resolver .rhs_cube_resolved
89+
90+ if lhs_cube_resolved .has_lazy_data () or rhs_cube_resolved .has_lazy_data ():
91+ al = da
92+ array_lhs = lhs_cube_resolved .lazy_data ()
93+ array_rhs = rhs_cube_resolved .lazy_data ()
94+ else :
95+ al = np
96+ array_lhs = lhs_cube_resolved .data
97+ array_rhs = rhs_cube_resolved .data
98+
8499 # If no coords passed then set to all common dimcoords of cubes.
85100 if corr_coords is None :
86- corr_coords = common_dim_coords
87-
88- def _ones_like (cube ):
89- # Return a copy of cube with the same mask, but all data values set to 1.
90- # The operation is non-lazy.
91- # For safety we also discard any cell-measures and ancillary-variables, to
92- # avoid cube arithmetic possibly objecting to them, or inadvertently retaining
93- # them in the result where they might be inappropriate.
94- ones_cube = cube .copy ()
95- ones_cube .data = np .ones_like (cube .data )
96- ones_cube .rename ("unknown" )
97- ones_cube .units = 1
98- for cm in ones_cube .cell_measures ():
99- ones_cube .remove_cell_measure (cm )
100- for av in ones_cube .ancillary_variables ():
101- ones_cube .remove_ancillary_variable (av )
102- return ones_cube
101+ dim_coords_1 = {coord .name () for coord in lhs_cube_resolved .dim_coords }
102+ dim_coords_2 = {coord .name () for coord in rhs_cube_resolved .dim_coords }
103+ corr_coords = list (dim_coords_1 .intersection (dim_coords_2 ))
104+
105+ # Interpret coords as array dimensions.
106+ corr_dims = set ()
107+ if isinstance (corr_coords , str ):
108+ corr_coords = [corr_coords ]
109+ for coord in corr_coords :
110+ corr_dims .update (lhs_cube_resolved .coord_dims (coord ))
111+
112+ corr_dims = tuple (corr_dims )
103113
104114 # Match up data masks if required.
105115 if common_mask :
106- # Create a cube of 1's with a common mask.
107- if ma .is_masked (cube_2 .data ):
108- mask_cube = _ones_like (cube_2 )
109- else :
110- mask_cube = 1.0
111- if ma .is_masked (cube_1 .data ):
112- # Take a slice to avoid unnecessary broadcasting of cube_2.
113- slice_coords = [
114- dim_coords_1 [i ]
115- for i in range (cube_1 .ndim )
116- if dim_coords_1 [i ] not in common_dim_coords
117- and np .array_equal (
118- cube_1 .data .mask .any (axis = i ), cube_1 .data .mask .all (axis = i )
119- )
120- ]
121- cube_1_slice = next (cube_1 .slices_over (slice_coords ))
122- mask_cube = _ones_like (cube_1_slice ) * mask_cube
123- # Apply common mask to data.
124- if isinstance (mask_cube , iris .cube .Cube ):
125- cube_1 = cube_1 * mask_cube
126- cube_2 = mask_cube * cube_2
127- dim_coords_2 = [coord .name () for coord in cube_2 .dim_coords ]
128-
129- # Broadcast weights to shape of cubes if necessary.
130- if weights is None or cube_1 .shape == smaller_shape :
131- weights_1 = weights
132- weights_2 = weights
116+ mask_lhs = al .ma .getmaskarray (array_lhs )
117+ if al is np :
118+ # Reduce all invariant dimensions of mask_lhs to length 1. This avoids
119+ # unnecessary broadcasting of array_rhs.
120+ index = tuple (
121+ slice (0 , 1 )
122+ if np .array_equal (mask_lhs .any (axis = dim ), mask_lhs .all (axis = dim ))
123+ else slice (None )
124+ for dim in range (mask_lhs .ndim )
125+ )
126+ mask_lhs = mask_lhs [index ]
127+
128+ array_rhs = _mask_array (array_rhs , mask_lhs )
129+ array_lhs = _mask_array (array_lhs , al .ma .getmaskarray (array_rhs ))
130+
131+ # Broadcast weights to shape of arrays if necessary.
132+ if weights is None :
133+ weights_lhs = weights_rhs = None
133134 else :
134135 if weights .shape != smaller_shape :
135- raise ValueError (
136- "weights array should have dimensions {}" .format (smaller_shape )
137- )
136+ msg = f"weights array should have dimensions { smaller_shape } "
137+ raise ValueError (msg )
138138
139- dims_1_common = [
140- i for i in range (cube_1 .ndim ) if dim_coords_1 [i ] in common_dim_coords
141- ]
142- weights_1 = broadcast_to_shape (weights , cube_1 .shape , dims_1_common )
143- if cube_2 .shape != smaller_shape :
144- dims_2_common = [
145- i for i in range (cube_2 .ndim ) if dim_coords_2 [i ] in common_dim_coords
146- ]
147- weights_2 = broadcast_to_shape (weights , cube_2 .shape , dims_2_common )
148- else :
149- weights_2 = weights
139+ wt_resolver = Resolve (cube_1 , cube_2 .copy (weights ))
140+ weights = wt_resolver .rhs_cube_resolved .data
141+ weights_rhs = np .broadcast_to (weights , array_rhs .shape )
142+ weights_lhs = np .broadcast_to (weights , array_lhs .shape )
150143
151144 # Calculate correlations.
152- s1 = cube_1 - cube_1 . collapsed ( corr_coords , iris . analysis . MEAN , weights = weights_1 )
153- s2 = cube_2 - cube_2 . collapsed ( corr_coords , iris . analysis . MEAN , weights = weights_2 )
154-
155- covar = ( s1 * s2 ). collapsed (
156- corr_coords , iris . analysis . SUM , weights = weights_1 , mdtol = mdtol
145+ s_lhs = array_lhs - al . ma . average (
146+ array_lhs , axis = corr_dims , weights = weights_lhs , keepdims = True
147+ )
148+ s_rhs = array_rhs - al . ma . average (
149+ array_rhs , axis = corr_dims , weights = weights_rhs , keepdims = True
157150 )
158- var_1 = (s1 ** 2 ).collapsed (corr_coords , iris .analysis .SUM , weights = weights_1 )
159- var_2 = (s2 ** 2 ).collapsed (corr_coords , iris .analysis .SUM , weights = weights_2 )
160151
161- denom = iris .analysis .maths .apply_ufunc (
162- np .sqrt , var_1 * var_2 , new_unit = covar .units
152+ s_prod = resolver .cube (s_lhs * s_rhs )
153+
154+ # Use cube collapsed method as it takes care of coordinate collapsing and missing
155+ # data tolerance.
156+ covar = s_prod .collapsed (
157+ corr_coords , iris .analysis .SUM , weights = weights_lhs , mdtol = mdtol
163158 )
159+
160+ var_lhs = iris .analysis ._sum (s_lhs ** 2 , axis = corr_dims , weights = weights_lhs )
161+ var_rhs = iris .analysis ._sum (s_rhs ** 2 , axis = corr_dims , weights = weights_rhs )
162+
163+ denom = np .sqrt (var_lhs * var_rhs )
164+
164165 corr_cube = covar / denom
165166 corr_cube .rename ("Pearson's r" )
167+ corr_cube .units = 1
166168
167169 return corr_cube
0 commit comments