Skip to content

Commit

Permalink
Merge pull request #26696 from charris/backport-26582
Browse files Browse the repository at this point in the history
BUG: weighted nanpercentile, nanquantile and  multi-dim q
  • Loading branch information
charris authored Jun 15, 2024
2 parents 103f4dd + ece3559 commit c8665ba
Show file tree
Hide file tree
Showing 2 changed files with 112 additions and 17 deletions.
67 changes: 53 additions & 14 deletions numpy/lib/_nanfunctions_impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ def _copyto(a, val, mask):
return a


def _remove_nan_1d(arr1d, overwrite_input=False):
def _remove_nan_1d(arr1d, second_arr1d=None, overwrite_input=False):
"""
Equivalent to arr1d[~arr1d.isnan()], but in a different order
Expand All @@ -151,13 +151,17 @@ def _remove_nan_1d(arr1d, overwrite_input=False):
----------
arr1d : ndarray
Array to remove nans from
second_arr1d : ndarray or None
A second array which will have the same positions removed as arr1d.
overwrite_input : bool
True if `arr1d` can be modified in place
Returns
-------
res : ndarray
Array with nan elements removed
second_res : ndarray or None
Second array with nan element positions of first array removed.
overwrite_input : bool
True if `res` can be modified in place, given the constraint on the
input
Expand All @@ -172,9 +176,12 @@ def _remove_nan_1d(arr1d, overwrite_input=False):
if s.size == arr1d.size:
warnings.warn("All-NaN slice encountered", RuntimeWarning,
stacklevel=6)
return arr1d[:0], True
if second_arr1d is None:
return arr1d[:0], None, True
else:
return arr1d[:0], second_arr1d[:0], True
elif s.size == 0:
return arr1d, overwrite_input
return arr1d, second_arr1d, overwrite_input
else:
if not overwrite_input:
arr1d = arr1d.copy()
Expand All @@ -183,7 +190,15 @@ def _remove_nan_1d(arr1d, overwrite_input=False):
# fill nans in beginning of array with non-nans of end
arr1d[s[:enonan.size]] = enonan

return arr1d[:-s.size], True
if second_arr1d is None:
return arr1d[:-s.size], None, True
else:
if not overwrite_input:
second_arr1d = second_arr1d.copy()
enonan = second_arr1d[-s.size:][~c[-s.size:]]
second_arr1d[s[:enonan.size]] = enonan

return arr1d[:-s.size], second_arr1d[:-s.size], True


def _divide_by_count(a, b, out=None):
Expand Down Expand Up @@ -1061,7 +1076,7 @@ def _nanmedian1d(arr1d, overwrite_input=False):
Private function for rank 1 arrays. Compute the median ignoring NaNs.
See nanmedian for parameter usage
"""
arr1d_parsed, overwrite_input = _remove_nan_1d(
arr1d_parsed, _, overwrite_input = _remove_nan_1d(
arr1d, overwrite_input=overwrite_input,
)

Expand Down Expand Up @@ -1646,13 +1661,36 @@ def _nanquantile_ureduce_func(
wgt = None if weights is None else weights.ravel()
result = _nanquantile_1d(part, q, overwrite_input, method, weights=wgt)
else:
result = np.apply_along_axis(_nanquantile_1d, axis, a, q,
overwrite_input, method, weights)
# apply_along_axis fills in collapsed axis with results.
# Move that axis to the beginning to match percentile's
# convention.
if q.ndim != 0:
result = np.moveaxis(result, axis, 0)
# Note that this code could try to fill in `out` right away
if weights is None:
result = np.apply_along_axis(_nanquantile_1d, axis, a, q,
overwrite_input, method, weights)
# apply_along_axis fills in collapsed axis with results.
# Move those axes to the beginning to match percentile's
# convention.
if q.ndim != 0:
from_ax = [axis + i for i in range(q.ndim)]
result = np.moveaxis(result, from_ax, list(range(q.ndim)))
else:
# We need to apply along axis over 2 arrays, a and weights.
# move operation axes to end for simplicity:
a = np.moveaxis(a, axis, -1)
if weights is not None:
weights = np.moveaxis(weights, axis, -1)
if out is not None:
result = out
else:
# weights are limited to `inverted_cdf` so the result dtype
# is known to be identical to that of `a` here:
result = np.empty_like(a, shape=q.shape + a.shape[:-1])

for ii in np.ndindex(a.shape[:-1]):
result[(...,) + ii] = _nanquantile_1d(
a[ii], q, weights=weights[ii],
overwrite_input=overwrite_input, method=method,
)
# This path dealt with `out` already...
return result

if out is not None:
out[...] = result
Expand All @@ -1666,8 +1704,9 @@ def _nanquantile_1d(
Private function for rank 1 arrays. Compute quantile ignoring NaNs.
See nanpercentile for parameter usage
"""
arr1d, overwrite_input = _remove_nan_1d(arr1d,
overwrite_input=overwrite_input)
# TODO: What to do when arr1d = [1, np.nan] and weights = [0, 1]?
arr1d, weights, overwrite_input = _remove_nan_1d(arr1d,
second_arr1d=weights, overwrite_input=overwrite_input)
if arr1d.size == 0:
# convert to scalar
return np.full(q.shape, np.nan, dtype=arr1d.dtype)[()]
Expand Down
62 changes: 59 additions & 3 deletions numpy/lib/tests/test_nanfunctions.py
Original file line number Diff line number Diff line change
Expand Up @@ -1144,7 +1144,8 @@ def test_complex(self):
assert_raises(TypeError, np.nanpercentile, arr_c, 0.5)

@pytest.mark.parametrize("weighted", [False, True])
def test_result_values(self, weighted):
@pytest.mark.parametrize("use_out", [False, True])
def test_result_values(self, weighted, use_out):
if weighted:
percentile = partial(np.percentile, method="inverted_cdf")
nanpercentile = partial(np.nanpercentile, method="inverted_cdf")
Expand All @@ -1160,13 +1161,16 @@ def gen_weights(d):
return None

tgt = [percentile(d, 28, weights=gen_weights(d)) for d in _rdat]
res = nanpercentile(_ndat, 28, axis=1, weights=gen_weights(_ndat))
out = np.empty_like(tgt) if use_out else None
res = nanpercentile(_ndat, 28, axis=1,
weights=gen_weights(_ndat), out=out)
assert_almost_equal(res, tgt)
# Transpose the array to fit the output convention of numpy.percentile
tgt = np.transpose([percentile(d, (28, 98), weights=gen_weights(d))
for d in _rdat])
out = np.empty_like(tgt) if use_out else None
res = nanpercentile(_ndat, (28, 98), axis=1,
weights=gen_weights(_ndat))
weights=gen_weights(_ndat), out=out)
assert_almost_equal(res, tgt)

@pytest.mark.parametrize("axis", [None, 0, 1])
Expand Down Expand Up @@ -1242,6 +1246,58 @@ def test_multiple_percentiles(self):
np.nanpercentile(megamat, perc, axis=(1, 2)).shape, (2, 3, 6)
)

@pytest.mark.parametrize("nan_weight", [0, 1, 2, 3, 1e200])
def test_nan_value_with_weight(self, nan_weight):
x = [1, np.nan, 2, 3]
result = np.float64(2.0)
q_unweighted = np.nanpercentile(x, 50, method="inverted_cdf")
assert_equal(q_unweighted, result)

# The weight value at the nan position should not matter.
w = [1.0, nan_weight, 1.0, 1.0]
q_weighted = np.nanpercentile(x, 50, weights=w, method="inverted_cdf")
assert_equal(q_weighted, result)

@pytest.mark.parametrize("axis", [0, 1, 2])
def test_nan_value_with_weight_ndim(self, axis):
# Create a multi-dimensional array to test
np.random.seed(1)
x_no_nan = np.random.random(size=(100, 99, 2))
# Set some places to NaN (not particularly smart) so there is always
# some non-Nan.
x = x_no_nan.copy()
x[np.arange(99), np.arange(99), 0] = np.nan

p = np.array([[20., 50., 30], [70, 33, 80]])

# We just use ones as weights, but replace it with 0 or 1e200 at the
# NaN positions below.
weights = np.ones_like(x)

# For comparison use weighted normal percentile with nan weights at
# 0 (and no NaNs); not sure this is strictly identical but should be
# sufficiently so (if a percentile lies exactly on a 0 value).
weights[np.isnan(x)] = 0
p_expected = np.percentile(
x_no_nan, p, axis=axis, weights=weights, method="inverted_cdf")

p_unweighted = np.nanpercentile(
x, p, axis=axis, method="inverted_cdf")
# The normal and unweighted versions should be identical:
assert_equal(p_unweighted, p_expected)

weights[np.isnan(x)] = 1e200 # huge value, shouldn't matter
p_weighted = np.nanpercentile(
x, p, axis=axis, weights=weights, method="inverted_cdf")
assert_equal(p_weighted, p_expected)
# Also check with out passed:
out = np.empty_like(p_weighted)
res = np.nanpercentile(
x, p, axis=axis, weights=weights, out=out, method="inverted_cdf")

assert res is out
assert_equal(out, p_expected)


class TestNanFunctions_Quantile:
# most of this is already tested by TestPercentile
Expand Down

0 comments on commit c8665ba

Please sign in to comment.