44# See LICENSE in the root of the repository for full licensing details.
55"""Statistical operations between cubes."""
66
7+ import dask .array as da
78import numpy as np
8- import numpy .ma as ma
99
1010import iris
11- from iris .util import broadcast_to_shape
11+ from iris .common import SERVICES , Resolve
12+ from iris .common .lenient import _lenient_client
13+ from iris .util import _mask_array
1214
1315
16+ @_lenient_client (services = SERVICES )
1417def pearsonr (
1518 cube_a ,
1619 cube_b ,
@@ -63,13 +66,13 @@ def pearsonr(
6366
6467 Notes
6568 -----
69+ If either of the input cubes has lazy data, the result will have lazy data.
70+
6671 Reference:
6772 https://en.wikipedia.org/wiki/Pearson_correlation_coefficient
6873
69- This operation is non-lazy.
70-
7174 """
72- # Assign larger cube to cube_1
75+ # Assign larger cube to cube_1 for simplicity.
7376 if cube_b .ndim > cube_a .ndim :
7477 cube_1 = cube_b
7578 cube_2 = cube_a
@@ -79,90 +82,94 @@ def pearsonr(
7982
8083 smaller_shape = cube_2 .shape
8184
82- dim_coords_1 = [coord .name () for coord in cube_1 .dim_coords ]
83- dim_coords_2 = [coord .name () for coord in cube_2 .dim_coords ]
84- common_dim_coords = list (set (dim_coords_1 ) & set (dim_coords_2 ))
85+ # Get the broadcast, auto-transposed safe versions of the cube operands.
86+ resolver = Resolve (cube_1 , cube_2 )
87+ cube_1 = resolver .lhs_cube_resolved
88+ cube_2 = resolver .rhs_cube_resolved
89+
90+ if cube_1 .has_lazy_data () or cube_2 .has_lazy_data ():
91+ al = da
92+ array_1 = cube_1 .lazy_data ()
93+ array_2 = cube_2 .lazy_data ()
94+ else :
95+ al = np
96+ array_1 = cube_1 .data
97+ array_2 = cube_2 .data
98+
8599 # If no coords passed then set to all common dimcoords of cubes.
86100 if corr_coords is None :
87- corr_coords = common_dim_coords
88-
89- def _ones_like (cube ):
90- # Return a copy of cube with the same mask, but all data values set to 1.
91- # The operation is non-lazy.
92- # For safety we also discard any cell-measures and ancillary-variables, to
93- # avoid cube arithmetic possibly objecting to them, or inadvertently retaining
94- # them in the result where they might be inappropriate.
95- ones_cube = cube .copy ()
96- ones_cube .data = np .ones_like (cube .data )
97- ones_cube .rename ("unknown" )
98- ones_cube .units = 1
99- for cm in ones_cube .cell_measures ():
100- ones_cube .remove_cell_measure (cm )
101- for av in ones_cube .ancillary_variables ():
102- ones_cube .remove_ancillary_variable (av )
103- return ones_cube
101+ dim_coords_1 = {coord .name () for coord in cube_1 .dim_coords }
102+ dim_coords_2 = {coord .name () for coord in cube_2 .dim_coords }
103+ corr_coords = list (dim_coords_1 .intersection (dim_coords_2 ))
104+
105+ # Interpret coords as array dimensions.
106+ corr_dims = set ()
107+ if isinstance (corr_coords , str ):
108+ corr_coords = [corr_coords ]
109+ for coord in corr_coords :
110+ corr_dims .update (cube_1 .coord_dims (coord ))
111+
112+ corr_dims = tuple (corr_dims )
104113
105114 # Match up data masks if required.
106115 if common_mask :
107- # Create a cube of 1's with a common mask.
108- if ma .is_masked (cube_2 .data ):
109- mask_cube = _ones_like (cube_2 )
110- else :
111- mask_cube = 1.0
112- if ma .is_masked (cube_1 .data ):
113- # Take a slice to avoid unnecessary broadcasting of cube_2.
114- slice_coords = [
115- dim_coords_1 [i ]
116- for i in range (cube_1 .ndim )
117- if dim_coords_1 [i ] not in common_dim_coords
118- and np .array_equal (
119- cube_1 .data .mask .any (axis = i ), cube_1 .data .mask .all (axis = i )
120- )
121- ]
122- cube_1_slice = next (cube_1 .slices_over (slice_coords ))
123- mask_cube = _ones_like (cube_1_slice ) * mask_cube
124- # Apply common mask to data.
125- if isinstance (mask_cube , iris .cube .Cube ):
126- cube_1 = cube_1 * mask_cube
127- cube_2 = mask_cube * cube_2
128- dim_coords_2 = [coord .name () for coord in cube_2 .dim_coords ]
129-
130- # Broadcast weights to shape of cubes if necessary.
131- if weights is None or cube_1 .shape == smaller_shape :
132- weights_1 = weights
133- weights_2 = weights
116+ mask_1 = al .ma .getmaskarray (array_1 )
117+ if al is np :
118+ # Reduce all invariant dimensions of mask_1 to length 1. This avoids
119+ # unnecessary broadcasting of array_2.
120+ index = tuple (
121+ slice (0 , 1 )
122+ if np .array_equal (mask_1 .any (axis = dim ), mask_1 .all (axis = dim ))
123+ else slice (None )
124+ for dim in range (mask_1 .ndim )
125+ )
126+ mask_1 = mask_1 [index ]
127+
128+ array_2 = _mask_array (array_2 , mask_1 )
129+ array_1 = _mask_array (array_1 , al .ma .getmaskarray (array_2 ))
130+
131+ # Broadcast weights to shape of arrays if necessary.
132+ if weights is None :
133+ weights_1 = weights_2 = None
134134 else :
135135 if weights .shape != smaller_shape :
136- raise ValueError (
137- "weights array should have dimensions {}" .format (smaller_shape )
138- )
136+ msg = f"weights array should have dimensions { smaller_shape } "
137+ raise ValueError (msg )
138+
139+ if resolver .reorder_src_dims is not None :
140+ # Apply same transposition as was done to cube_2 within Resolve.
141+ weights = weights .transpose (resolver .reorder_src_dims )
142+
143+ # Reshape to add in any length-1 dimensions that Resolve() has added
144+ # for broadcasting.
145+ weights = weights .reshape (cube_2 .shape )
139146
140- dims_1_common = [
141- i for i in range (cube_1 .ndim ) if dim_coords_1 [i ] in common_dim_coords
142- ]
143- weights_1 = broadcast_to_shape (weights , cube_1 .shape , dims_1_common )
144- if cube_2 .shape != smaller_shape :
145- dims_2_common = [
146- i for i in range (cube_2 .ndim ) if dim_coords_2 [i ] in common_dim_coords
147- ]
148- weights_2 = broadcast_to_shape (weights , cube_2 .shape , dims_2_common )
149- else :
150- weights_2 = weights
147+ weights_2 = np .broadcast_to (weights , array_2 .shape )
148+ weights_1 = np .broadcast_to (weights , array_1 .shape )
151149
152150 # Calculate correlations.
153- s1 = cube_1 - cube_1 .collapsed (corr_coords , iris .analysis .MEAN , weights = weights_1 )
154- s2 = cube_2 - cube_2 .collapsed (corr_coords , iris .analysis .MEAN , weights = weights_2 )
151+ s1 = array_1 - al .ma .average (
152+ array_1 , axis = corr_dims , weights = weights_1 , keepdims = True
153+ )
154+ s2 = array_2 - al .ma .average (
155+ array_2 , axis = corr_dims , weights = weights_2 , keepdims = True
156+ )
155157
156- covar = (s1 * s2 ).collapsed (
158+ s_prod = resolver .cube (s1 * s2 )
159+
160+ # Use cube collapsed method as it takes care of coordinate collapsing and missing
161+ # data tolerance.
162+ covar = s_prod .collapsed (
157163 corr_coords , iris .analysis .SUM , weights = weights_1 , mdtol = mdtol
158164 )
159- var_1 = (s1 ** 2 ).collapsed (corr_coords , iris .analysis .SUM , weights = weights_1 )
160- var_2 = (s2 ** 2 ).collapsed (corr_coords , iris .analysis .SUM , weights = weights_2 )
161165
162- denom = iris .analysis .maths .apply_ufunc (
163- np .sqrt , var_1 * var_2 , new_unit = covar .units
164- )
166+ var_1 = iris .analysis ._sum (s1 ** 2 , axis = corr_dims , weights = weights_1 )
167+ var_2 = iris .analysis ._sum (s2 ** 2 , axis = corr_dims , weights = weights_2 )
168+
169+ denom = np .sqrt (var_1 * var_2 )
170+
165171 corr_cube = covar / denom
166172 corr_cube .rename ("Pearson's r" )
173+ corr_cube .units = 1
167174
168175 return corr_cube
0 commit comments