Skip to content

Commit 2d96b56

Browse files
committed
FIX & TST
- cast cdf to dtype of quantile to avoid surprises - Convert scalar python objects to pure python objects as in unweighted case - Extend weighted case to test_linear_interpolation - Extend weighted case to test_percentile_out
1 parent fc0b5b7 commit 2d96b56

File tree

2 files changed

+50
-24
lines changed

2 files changed

+50
-24
lines changed

numpy/lib/_function_base_impl.py

Lines changed: 27 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4934,8 +4934,9 @@ def _quantile(
49344934
gamma,
49354935
out=out)
49364936
else:
4937-
# Weighted case, we need to sort anyway.
4938-
# This implements method="inverted_cdf".
4937+
# Weighted case
4938+
# This implements method="inverted_cdf", the only supported weighted
4939+
# method, which needs to sort anyway.
49394940
weights = np.asanyarray(weights)
49404941
if axis != 0:
49414942
weights = np.moveaxis(weights, axis, destination=0)
@@ -4954,17 +4955,30 @@ def _quantile(
49544955
else:
49554956
# weights is 1d
49564957
weights = weights.reshape(-1)[index_array, ...]
4957-
# weights = np.take_along_axis(weights, index_array, axis=axis)
4958-
# We use the weights to calculate the empirical distribution function.
4959-
cdf = weights.cumsum(axis=0, dtype=float)
4960-
cdf /= cdf[-1, ...] # normalization
4961-
# Only inverted_cdf is supported. Search index i such that
4962-
# sum(weights[j], j=0..i-1) < quantile <= sum(weights[j], j=0..i)
4958+
4959+
# We use the weights to calculate the empirical cumulative
4960+
# distribution function cdf
4961+
cdf = weights.cumsum(axis=0, dtype=np.float64)
4962+
cdf /= cdf[-1, ...] # normalization to 1
4963+
# Search index i such that
4964+
# sum(weights[j], j=0..i-1) < quantile <= sum(weights[j], j=0..i)
4965+
# is then equivalent to
4966+
# cdf[i-1] < quantile <= cdf[i]
4967+
# Unfortunately, searchsorted only accepts 1-d arrays as first
4968+
# argument, so we will need to iterate over dimensions.
4969+
4970+
# Without the following cast, searchsorted can return surprising
4971+
# results, e.g.
4972+
# np.searchsorted(np.array([0.2, 0.4, 0.6, 0.8, 1.]),
4973+
# np.array(0.4, dtype=np.float32), side="left")
4974+
# returns 2 instead of 1 because 0.4 is not binary representable.
4975+
if quantiles.dtype.kind == "f":
4976+
cdf = cdf.astype(quantiles.dtype)
49634977

49644978
def find_cdf_1d(arr, cdf):
49654979
indices = np.searchsorted(cdf, quantiles, side="left")
49664980
# We might have reached the maximum with i = len(arr), e.g. for
4967-
# quantiles == 1.
4981+
# quantiles = 1, and need to cut it to len(arr) - 1.
49684982
indices = minimum(indices, values_count - 1)
49694983
result = take(arr, indices, axis=0)
49704984
return result
@@ -4984,6 +4998,10 @@ def find_cdf_1d(arr, cdf):
49844998
if out is not None:
49854999
np.copyto(out, result)
49865000

5001+
# Make result the same as in unweighted inverted_cdf.
5002+
if result.shape == () and result.dtype == np.dtype("O"):
5003+
result = result.item()
5004+
49875005
if np.any(slices_having_nans):
49885006
if result.ndim == 0 and out is None:
49895007
# can't write to a scalar, but indexing will be correct

numpy/lib/tests/test_function_base.py

Lines changed: 23 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3113,21 +3113,23 @@ def test_linear_nan_1D(self, dtype):
31133113
[(np.quantile, 0.4),
31143114
(np.percentile, 40.0)])
31153115
@pytest.mark.parametrize(["input_dtype", "expected_dtype"], H_F_TYPE_CODES)
3116-
@pytest.mark.parametrize(["method", "expected"],
3117-
[("inverted_cdf", 20),
3118-
("averaged_inverted_cdf", 27.5),
3119-
("closest_observation", 20),
3120-
("interpolated_inverted_cdf", 20),
3121-
("hazen", 27.5),
3122-
("weibull", 26),
3123-
("linear", 29),
3124-
("median_unbiased", 27),
3125-
("normal_unbiased", 27.125),
3116+
@pytest.mark.parametrize(["method", "weighted", "expected"],
3117+
[("inverted_cdf", False, 20),
3118+
("inverted_cdf", True, 20),
3119+
("averaged_inverted_cdf", False, 27.5),
3120+
("closest_observation", False, 20),
3121+
("interpolated_inverted_cdf", False, 20),
3122+
("hazen", False, 27.5),
3123+
("weibull", False, 26),
3124+
("linear", False, 29),
3125+
("median_unbiased", False, 27),
3126+
("normal_unbiased", False, 27.125),
31263127
])
31273128
def test_linear_interpolation(self,
31283129
function,
31293130
quantile,
31303131
method,
3132+
weighted,
31313133
expected,
31323134
input_dtype,
31333135
expected_dtype):
@@ -3136,6 +3138,7 @@ def test_linear_interpolation(self,
31363138
expected_dtype = np.promote_types(expected_dtype, np.float64)
31373139

31383140
arr = np.asarray([15.0, 20.0, 35.0, 40.0, 50.0], dtype=input_dtype)
3141+
weights = np.ones_like(arr) if weighted else None
31393142
if input_dtype is np.longdouble:
31403143
if function is np.quantile:
31413144
# 0.4 is not exactly representable and it matters
@@ -3146,12 +3149,12 @@ def test_linear_interpolation(self,
31463149
else:
31473150
test_function = np.testing.assert_array_almost_equal_nulp
31483151

3149-
actual = function(arr, quantile, method=method)
3152+
actual = function(arr, quantile, method=method, weights=weights)
31503153

31513154
test_function(actual, expected_dtype.type(expected))
31523155

31533156
if method in ["inverted_cdf", "closest_observation"]:
3154-
if input_dtype == "O":
3157+
if input_dtype == "O" :
31553158
np.testing.assert_equal(np.asarray(actual).dtype, np.float64)
31563159
else:
31573160
np.testing.assert_equal(np.asarray(actual).dtype,
@@ -3308,14 +3311,16 @@ def test_percentile_out(self, percentile, with_weights):
33083311
y = np.zeros((3,), dtype=out_dtype)
33093312
p = (1, 2, 3)
33103313
weights = np.ones_like(x) if with_weights else None
3311-
percentile(x, p, out=y, weights=weights)
3314+
r = percentile(x, p, out=y, weights=weights)
3315+
assert r is y
33123316
assert_equal(percentile(x, p, weights=weights), y)
33133317

33143318
x = np.array([[1, 2, 3],
33153319
[4, 5, 6]])
33163320
y = np.zeros((3, 3), dtype=out_dtype)
33173321
weights = np.ones_like(x) if with_weights else None
3318-
percentile(x, p, axis=0, out=y, weights=weights)
3322+
r = percentile(x, p, axis=0, out=y, weights=weights)
3323+
assert r is y
33193324
assert_equal(percentile(x, p, weights=weights, axis=0), y)
33203325

33213326
y = np.zeros((3, 2), dtype=out_dtype)
@@ -3324,7 +3329,10 @@ def test_percentile_out(self, percentile, with_weights):
33243329

33253330
x = np.arange(12).reshape(3, 4)
33263331
# q.dim > 1, float
3327-
r0 = np.array([[2., 3., 4., 5.], [4., 5., 6., 7.]])
3332+
if with_weights:
3333+
r0 = np.array([[0, 1, 2, 3], [4, 5, 6, 7]])
3334+
else:
3335+
r0 = np.array([[2., 3., 4., 5.], [4., 5., 6., 7.]])
33283336
out = np.empty((2, 4), dtype=out_dtype)
33293337
weights = np.ones_like(x) if with_weights else None
33303338
assert_equal(

0 commit comments

Comments
 (0)