-
-
Notifications
You must be signed in to change notification settings - Fork 11k
Adding np.nanmean(), nanstd(), and nanvar() #3297
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
de30692
a457158
f15be52
5be45b2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,7 +7,8 @@ | |
|
||
from numpy.core import multiarray as mu | ||
from numpy.core import umath as um | ||
from numpy.core.numeric import asanyarray | ||
from numpy.core.numeric import array, asanyarray, isnan, issubdtype | ||
from numpy.core import numerictypes as nt | ||
|
||
def _amax(a, axis=None, out=None, keepdims=False): | ||
return um.maximum.reduce(a, axis=axis, | ||
|
@@ -46,8 +47,9 @@ def _count_reduce_items(arr, axis): | |
def _mean(a, axis=None, dtype=None, out=None, keepdims=False): | ||
arr = asanyarray(a) | ||
|
||
# Upgrade bool, unsigned int, and int to float64 | ||
if dtype is None and arr.dtype.kind in ['b','u','i']: | ||
# Cast bool, unsigned int, and int to float64 | ||
if dtype is None and (issubdtype(arr.dtype, nt.integer) or | ||
issubdtype(arr.dtype, nt.bool_)): | ||
ret = um.add.reduce(arr, axis=axis, dtype='f8', | ||
out=out, keepdims=keepdims) | ||
else: | ||
|
@@ -61,12 +63,36 @@ def _mean(a, axis=None, dtype=None, out=None, keepdims=False): | |
ret = ret / float(rcount) | ||
return ret | ||
|
||
def _nanmean(a, axis=None, dtype=None, out=None, keepdims=False): | ||
# Using array() instead of asanyarray() because the former always | ||
# makes a copy, which is important due to the copyto() action later | ||
arr = array(a, subok=True) | ||
mask = isnan(arr) | ||
|
||
# Cast bool, unsigned int, and int to float64 | ||
if dtype is None and (issubdtype(arr.dtype, nt.integer) or | ||
issubdtype(arr.dtype, nt.bool_)): | ||
ret = um.add.reduce(arr, axis=axis, dtype='f8', | ||
out=out, keepdims=keepdims) | ||
else: | ||
mu.copyto(arr, 0.0, where=mask) | ||
ret = um.add.reduce(arr, axis=axis, dtype=dtype, | ||
out=out, keepdims=keepdims) | ||
rcount = (~mask).sum(axis=axis) | ||
if isinstance(ret, mu.ndarray): | ||
ret = um.true_divide(ret, rcount, | ||
out=ret, casting='unsafe', subok=False) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm, that is going to truncate rather than round if the output is integer, which doesn't seem right. Might want to document that. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Then that would happen in regular _mean() as well, right? |
||
else: | ||
ret = ret / float(rcount) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What if There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Then you get NaN. Exactly as intended. |
||
return ret | ||
|
||
def _var(a, axis=None, dtype=None, out=None, ddof=0, | ||
keepdims=False): | ||
arr = asanyarray(a) | ||
|
||
# First compute the mean, saving 'rcount' for reuse later | ||
if dtype is None and arr.dtype.kind in ['b','u','i']: | ||
if dtype is None and (issubdtype(arr.dtype, nt.integer) or | ||
issubdtype(arr.dtype, nt.bool_)): | ||
arrmean = um.add.reduce(arr, axis=axis, dtype='f8', keepdims=True) | ||
else: | ||
arrmean = um.add.reduce(arr, axis=axis, dtype=dtype, keepdims=True) | ||
|
@@ -81,7 +107,7 @@ def _var(a, axis=None, dtype=None, out=None, ddof=0, | |
x = arr - arrmean | ||
|
||
# (arr - arrmean) ** 2 | ||
if arr.dtype.kind == 'c': | ||
if issubdtype(arr.dtype, nt.complex_): | ||
x = um.multiply(x, um.conjugate(x), out=x).real | ||
else: | ||
x = um.multiply(x, x, out=x) | ||
|
@@ -101,6 +127,55 @@ def _var(a, axis=None, dtype=None, out=None, ddof=0, | |
|
||
return ret | ||
|
||
def _nanvar(a, axis=None, dtype=None, out=None, ddof=0, | ||
keepdims=False): | ||
# Using array() instead of asanyarray() because the former always | ||
# makes a copy, which is important due to the copyto() action later | ||
arr = array(a, subok=True) | ||
mask = isnan(arr) | ||
|
||
# First compute the mean, saving 'rcount' for reuse later | ||
if dtype is None and (issubdtype(arr.dtype, nt.integer) or | ||
issubdtype(arr.dtype, nt.bool_)): | ||
arrmean = um.add.reduce(arr, axis=axis, dtype='f8', keepdims=True) | ||
else: | ||
mu.copyto(arr, 0.0, where=mask) | ||
arrmean = um.add.reduce(arr, axis=axis, dtype=dtype, | ||
keepdims=True) | ||
rcount = (~mask).sum(axis=axis, keepdims=True) | ||
if isinstance(arrmean, mu.ndarray): | ||
arrmean = um.true_divide(arrmean, rcount, | ||
out=arrmean, casting='unsafe', subok=False) | ||
else: | ||
arrmean = arrmean / float(rcount) | ||
|
||
# arr - arrmean | ||
x = arr - arrmean | ||
mu.copyto(x, 0.0, where=mask) | ||
|
||
# (arr - arrmean) ** 2 | ||
if issubdtype(arr.dtype, nt.complex_): | ||
x = um.multiply(x, um.conjugate(x), out=x).real | ||
else: | ||
x = um.multiply(x, x, out=x) | ||
|
||
# add.reduce((arr - arrmean) ** 2, axis) | ||
ret = um.add.reduce(x, axis=axis, dtype=dtype, out=out, | ||
keepdims=keepdims) | ||
|
||
# add.reduce((arr - arrmean) ** 2, axis) / (n - ddof) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (n - ddof) could be negative, no? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, that is a possibility, and I was wondering about that. Do note that the same issue exists for _var() and _std(), so we probably should fix it there too and add tests. |
||
if not keepdims and isinstance(rcount, mu.ndarray): | ||
rcount = rcount.squeeze(axis=axis) | ||
rcount -= ddof | ||
if isinstance(ret, mu.ndarray): | ||
ret = um.true_divide(ret, rcount, | ||
out=ret, casting='unsafe', subok=False) | ||
else: | ||
ret = ret / float(rcount) | ||
|
||
return ret | ||
|
||
|
||
def _std(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False): | ||
ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof, | ||
keepdims=keepdims) | ||
|
@@ -111,3 +186,14 @@ def _std(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False): | |
ret = um.sqrt(ret) | ||
|
||
return ret | ||
|
||
def _nanstd(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False): | ||
ret = _nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof, | ||
keepdims=keepdims) | ||
|
||
if isinstance(ret, mu.ndarray): | ||
ret = um.sqrt(ret, out=ret) | ||
else: | ||
ret = um.sqrt(ret) | ||
|
||
return ret |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Was going to say that it might be better a baseclass + wrap at the end (for matrix support, but matrix support is bad anyway...), but then the non-nan code does the same. Which makes me wonder, would it be sensible to just create a
where=
kwarg instead making the nan-funcs just tiny wrappers? Of course I could dream about having where for usual ufunc.reduce, but I think it probably would require larger additions to the nditer.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think @mwiebe did something along those lines at one point with the NA work, but it got pulled out. I seriously want a where= kwarg in the ufunc architecture so that I can "fix" masked arrays making a copy of itself whenever one does a min or a max.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
IIRC, there is a where= in ufunc.call now, but Mark didn't get around
to implementing it for ufunc.reduce. It would be great to have, definitely.
On Thu, May 2, 2013 at 9:09 AM, Benjamin Root [email protected]: