Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 12 additions & 3 deletions numpy/lib/function_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4237,7 +4237,9 @@ def percentile(a,
if a.dtype.kind == "c":
raise TypeError("a must be an array of real numbers")

q = np.true_divide(q, 100)
# Use dtype of array if possible (e.g., if q is a python int or float)
# by making the divisor have the dtype of the data array.
q = np.true_divide(q, a.dtype.type(100) if a.dtype.kind == "f" else 100)
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmmm, feels a bit brittle, but I have to sleep on it probably (also doesn't generalize easily to user DTypes, but OK).

Maybe in this case it is actually easier to just track was_pyscalar = type(q) in (int, float, complex) and then apply if was_pyscalar: pos = float(pos) at the end of the calculation.

Beyond figuring this out, one poitn I am annoying about it is, that I think this is an important problem to get eyes on, because percentile can't be the only function in NumPy (and even more so downstream!) to run into this.

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The problem with was_pyscalar is that I think the expectation should be that the result keeps the array input dtype, but only if it is float.

Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, by pos there, I don't mean the end-result, but the interpolation point value that is used for the last bit of the calculation. I.e. the intermediate result just before mixing it with the values from a.

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, I see. Indeed, that could work too. I think I slightly prefer to trying to deal with things up front - I'm trying to think of these functions as gufuncs and following the same logic...

q = asanyarray(q) # undo any decay that the ufunc performed (see gh-13105)
if not _quantile_is_valid(q):
raise ValueError("Percentiles must be in the range [0, 100]")
Expand Down Expand Up @@ -4498,7 +4500,12 @@ def quantile(a,
if a.dtype.kind == "c":
raise TypeError("a must be an array of real numbers")

q = np.asanyarray(q)
# Use dtype of array if possible (e.g., if q is a python int or float).
if isinstance(q, (int, float)) and a.dtype.kind == "f":
q = np.asanyarray(q, dtype=np.result_type(a, q))
Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could also just use this stanza for percentile to keep things more uniform.

Note that this would not work for user types either given the check for a.dtype.kind == "f", but I did have to include that since otherwise an example with a datetime broke.

else:
q = np.asanyarray(q)

if not _quantile_is_valid(q):
raise ValueError("Quantiles must be in the range [0, 1]")
return _quantile_unchecked(
Expand Down Expand Up @@ -4596,7 +4603,9 @@ def _get_gamma(virtual_indexes, previous_indexes, method):
"""
gamma = np.asanyarray(virtual_indexes - previous_indexes)
gamma = method["fix_gamma"](gamma, virtual_indexes)
return np.asanyarray(gamma)
# Ensure both that we have an array, and that we keep the dtype
# (which may have been matched to the input array).
return np.asanyarray(gamma, dtype=virtual_indexes.dtype)


def _lerp(a, b, t, out=None):
Expand Down
20 changes: 17 additions & 3 deletions numpy/lib/tests/test_function_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3076,6 +3076,9 @@ def test_linear_nan_1D(self, dtype):
(np.longdouble, np.longdouble),
(np.dtype("O"), np.float64)]

@pytest.mark.parametrize(["function", "quantile"],
[(np.quantile, 0.4),
(np.percentile, 40.0)])
@pytest.mark.parametrize(["input_dtype", "expected_dtype"], H_F_TYPE_CODES)
@pytest.mark.parametrize(["method", "expected"],
[("inverted_cdf", 20),
Expand All @@ -3089,6 +3092,8 @@ def test_linear_nan_1D(self, dtype):
("normal_unbiased", 27.125),
])
def test_linear_interpolation(self,
function,
quantile,
method,
expected,
input_dtype,
Expand All @@ -3098,10 +3103,19 @@ def test_linear_interpolation(self,
expected_dtype = np.promote_types(expected_dtype, np.float64)

arr = np.asarray([15.0, 20.0, 35.0, 40.0, 50.0], dtype=input_dtype)
actual = np.percentile(arr, 40.0, method=method)
if input_dtype is np.longdouble:
if function is np.quantile:
# 0.4 is not exactly representable and it matters
# for "averaged_inverted_cdf", so we need to cheat.
quantile = input_dtype("0.4")
# We want to use nulp, but that does not work for longdouble
test_function = np.testing.assert_almost_equal
else:
test_function = np.testing.assert_array_almost_equal_nulp

actual = function(arr, quantile, method=method)

np.testing.assert_almost_equal(
actual, expected_dtype.type(expected), 14)
test_function(actual, expected_dtype.type(expected))

if method in ["inverted_cdf", "closest_observation"]:
if input_dtype == "O":
Expand Down