From de30692f0c8c677553127063e360c1514dd3e32b Mon Sep 17 00:00:00 2001 From: Benjamin Root Date: Thu, 2 May 2013 00:50:39 -0400 Subject: [PATCH 1/4] Adding np.nanmean(), np.nanstd(), np.nanvar() --- numpy/core/_methods.py | 79 +++++++++++- numpy/core/fromnumeric.py | 263 +++++++++++++++++++++++++++++++++++++- 2 files changed, 340 insertions(+), 2 deletions(-) diff --git a/numpy/core/_methods.py b/numpy/core/_methods.py index 66a05e86814c..297d708a85c7 100644 --- a/numpy/core/_methods.py +++ b/numpy/core/_methods.py @@ -7,7 +7,7 @@ from numpy.core import multiarray as mu from numpy.core import umath as um -from numpy.core.numeric import asanyarray +from numpy.core.numeric import array, asanyarray, isnan def _amax(a, axis=None, out=None, keepdims=False): return um.maximum.reduce(a, axis=axis, @@ -61,6 +61,26 @@ def _mean(a, axis=None, dtype=None, out=None, keepdims=False): ret = ret / float(rcount) return ret +def _nanmean(a, axis=None, dtype=None, out=None, keepdims=False): + arr = array(a, subok=True) + mask = isnan(arr) + + # Upgrade bool, unsigned int, and int to float64 + if dtype is None and arr.dtype.kind in ['b','u','i']: + ret = um.add.reduce(arr, axis=axis, dtype='f8', + out=out, keepdims=keepdims) + else: + mu.copyto(arr, 0.0, where=mask) + ret = um.add.reduce(arr, axis=axis, dtype=dtype, + out=out, keepdims=keepdims) + rcount = (~mask).sum(axis=axis) + if isinstance(ret, mu.ndarray): + ret = um.true_divide(ret, rcount, + out=ret, casting='unsafe', subok=False) + else: + ret = ret / float(rcount) + return ret + def _var(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False): arr = asanyarray(a) @@ -101,6 +121,52 @@ def _var(a, axis=None, dtype=None, out=None, ddof=0, return ret +def _nanvar(a, axis=None, dtype=None, out=None, ddof=0, + keepdims=False): + arr = array(a, subok=True) + mask = isnan(arr) + + # First compute the mean, saving 'rcount' for reuse later + if dtype is None and arr.dtype.kind in ['b','u','i']: + arrmean = um.add.reduce(arr, axis=axis, dtype='f8', keepdims=True) + else: + mu.copyto(arr, 0.0, where=mask) + arrmean = um.add.reduce(arr, axis=axis, dtype=dtype, + keepdims=True) + rcount = (~mask).sum(axis=axis, keepdims=True) + if isinstance(arrmean, mu.ndarray): + arrmean = um.true_divide(arrmean, rcount, + out=arrmean, casting='unsafe', subok=False) + else: + arrmean = arrmean / float(rcount) + + # arr - arrmean + x = arr - arrmean + x[mask] = 0.0 + + # (arr - arrmean) ** 2 + if arr.dtype.kind == 'c': + x = um.multiply(x, um.conjugate(x), out=x).real + else: + x = um.multiply(x, x, out=x) + + # add.reduce((arr - arrmean) ** 2, axis) + ret = um.add.reduce(x, axis=axis, dtype=dtype, out=out, + keepdims=keepdims) + + # add.reduce((arr - arrmean) ** 2, axis) / (n - ddof) + if not keepdims and isinstance(rcount, mu.ndarray): + rcount = rcount.squeeze(axis=axis) + rcount -= ddof + if isinstance(ret, mu.ndarray): + ret = um.true_divide(ret, rcount, + out=ret, casting='unsafe', subok=False) + else: + ret = ret / float(rcount) + + return ret + + def _std(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False): ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof, keepdims=keepdims) @@ -111,3 +177,14 @@ def _std(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False): ret = um.sqrt(ret) return ret + +def _nanstd(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False): + ret = _nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof, + keepdims=keepdims) + + if isinstance(ret, mu.ndarray): + ret = um.sqrt(ret, out=ret) + else: + ret = um.sqrt(ret) + + return ret diff --git a/numpy/core/fromnumeric.py b/numpy/core/fromnumeric.py index 84e45a6a519a..1fa0cf3687b2 100644 --- a/numpy/core/fromnumeric.py +++ b/numpy/core/fromnumeric.py @@ -21,7 +21,8 @@ 'resize', 'diagonal', 'trace', 'ravel', 'nonzero', 'shape', 'compress', 'clip', 'sum', 'product', 'prod', 'sometrue', 'alltrue', 'any', 'all', 'cumsum', 'cumproduct', 'cumprod', 'ptp', 'ndim', - 'rank', 'size', 'around', 'round_', 'mean', 'std', 'var', 'squeeze', + 'rank', 'size', 'around', 'round_', 'mean', 'nanmean', + 'std', 'nanstd', 'var', 'nanvar', 'squeeze', 'amax', 'amin', ] @@ -2454,6 +2455,8 @@ def mean(a, axis=None, dtype=None, out=None, keepdims=False): See Also -------- average : Weighted average + nanmean : Arithmetic mean while ignoring NaNs + var, nanvar Notes ----- @@ -2500,6 +2503,80 @@ def mean(a, axis=None, dtype=None, out=None, keepdims=False): return _methods._mean(a, axis=axis, dtype=dtype, out=out, keepdims=keepdims) +def nanmean(a, axis=None, dtype=None, out=None, keepdims=False): + """ + Compute the arithmetic mean along the specified axis, ignoring NaNs. + + Returns the average of the array elements. The average is taken over + the flattened array by default, otherwise over the specified axis. + `float64` intermediate and return values are used for integer inputs. + + Parameters + ---------- + a : array_like + Array containing numbers whose mean is desired. If `a` is not an + array, a conversion is attempted. + axis : int, optional + Axis along which the means are computed. The default is to compute + the mean of the flattened array. + dtype : data-type, optional + Type to use in computing the mean. For integer inputs, the default + is `float64`; for floating point inputs, it is the same as the + input dtype. + out : ndarray, optional + Alternate output array in which to place the result. The default + is ``None``; if provided, it must have the same shape as the + expected output, but the type will be cast if necessary. + See `doc.ufuncs` for details. + keepdims : bool, optional + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the original `arr`. + + Returns + ------- + m : ndarray, see dtype parameter above + If `out=None`, returns a new array containing the mean values, + otherwise a reference to the output array is returned. + + See Also + -------- + average : Weighted average + mean : Arithmetic mean taken while not ignoring NaNs + var, nanvar + + Notes + ----- + The arithmetic mean is the sum of the non-nan elements along the axis + divided by the number of non-nan elements. + + Note that for floating-point input, the mean is computed using the + same precision the input has. Depending on the input data, this can + cause the results to be inaccurate, especially for `float32`. + Specifying a higher-precision accumulator using the `dtype` keyword + can alleviate this issue. + + Examples + -------- + >>> a = np.array([[1, np.nan], [3, 4]]) + >>> np.nanmean(a) + 2.6666666666666665 + >>> np.nanmean(a, axis=0) + array([ 2., 4.]) + >>> np.nanmean(a, axis=1) + array([ 1., 3.5]) + + """ + if not (type(a) is mu.ndarray): + try: + mean = a.nanmean + return mean(axis=axis, dtype=dtype, out=out) + except AttributeError: + pass + + return _methods._nanmean(a, axis=axis, dtype=dtype, + out=out, keepdims=keepdims) + def std(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False): """ @@ -2542,6 +2619,7 @@ def std(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False): See Also -------- var, mean + nanmean, nanstd numpy.doc.ufuncs : Section "Output arguments" Notes @@ -2602,6 +2680,97 @@ def std(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False): return _methods._std(a, axis=axis, dtype=dtype, out=out, ddof=ddof, keepdims=keepdims) +def nanstd(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False): + """ + Compute the standard deviation along the specified axis, while + ignoring NaNs. + + Returns the standard deviation, a measure of the spread of a distribution, + of the non-NaN array elements. The standard deviation is computed for the + flattened array by default, otherwise over the specified axis. + + Parameters + ---------- + a : array_like + Calculate the standard deviation of the non-NaN values. + axis : int, optional + Axis along which the standard deviation is computed. The default is + to compute the standard deviation of the flattened array. + dtype : dtype, optional + Type to use in computing the standard deviation. For arrays of + integer type the default is float64, for arrays of float types it is + the same as the array type. + out : ndarray, optional + Alternative output array in which to place the result. It must have + the same shape as the expected output but the type (of the calculated + values) will be cast if necessary. + ddof : int, optional + Means Delta Degrees of Freedom. The divisor used in calculations + is ``N - ddof``, where ``N`` represents the number of elements. + By default `ddof` is zero. + keepdims : bool, optional + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the original `arr`. + + Returns + ------- + standard_deviation : ndarray, see dtype parameter above. + If `out` is None, return a new array containing the standard deviation, + otherwise return a reference to the output array. + + See Also + -------- + var, mean, std + nanvar, nanmean + numpy.doc.ufuncs : Section "Output arguments" + + Notes + ----- + The standard deviation is the square root of the average of the squared + deviations from the mean, i.e., ``std = sqrt(mean(abs(x - x.mean())**2))``. + + The average squared deviation is normally calculated as + ``x.sum() / N``, where ``N = len(x)``. If, however, `ddof` is specified, + the divisor ``N - ddof`` is used instead. In standard statistical + practice, ``ddof=1`` provides an unbiased estimator of the variance + of the infinite population. ``ddof=0`` provides a maximum likelihood + estimate of the variance for normally distributed variables. The + standard deviation computed in this function is the square root of + the estimated variance, so even with ``ddof=1``, it will not be an + unbiased estimate of the standard deviation per se. + + Note that, for complex numbers, `std` takes the absolute + value before squaring, so that the result is always real and nonnegative. + + For floating-point input, the *std* is computed using the same + precision the input has. Depending on the input data, this can cause + the results to be inaccurate, especially for float32 (see example below). + Specifying a higher-accuracy accumulator using the `dtype` keyword can + alleviate this issue. + + Examples + -------- + >>> a = np.array([[1, np.nan], [3, 4]]) + >>> np.nanstd(a) + 1.247219128924647 + >>> np.nanstd(a, axis=0) + array([ 1., 0.]) + >>> np.nanstd(a, axis=1) + array([ 0., 0.5]) + + """ + + if not (type(a) is mu.ndarray): + try: + nanstd = a.nanstd + return nanstd(axis=axis, dtype=dtype, out=out, ddof=ddof) + except AttributeError: + pass + + return _methods._nanstd(a, axis=axis, dtype=dtype, out=out, ddof=ddof, + keepdims=keepdims) + def var(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False): """ @@ -2704,3 +2873,95 @@ def var(a, axis=None, dtype=None, out=None, ddof=0, return _methods._var(a, axis=axis, dtype=dtype, out=out, ddof=ddof, keepdims=keepdims) + + +def nanvar(a, axis=None, dtype=None, out=None, ddof=0, + keepdims=False): + """ + Compute the variance along the specified axis, while ignoring NaNs. + + Returns the variance of the array elements, a measure of the spread of a + distribution. The variance is computed for the flattened array by + default, otherwise over the specified axis. + + Parameters + ---------- + a : array_like + Array containing numbers whose variance is desired. If `a` is not an + array, a conversion is attempted. + axis : int, optional + Axis along which the variance is computed. The default is to compute + the variance of the flattened array. + dtype : data-type, optional + Type to use in computing the variance. For arrays of integer type + the default is `float32`; for arrays of float types it is the same as + the array type. + out : ndarray, optional + Alternate output array in which to place the result. It must have + the same shape as the expected output, but the type is cast if + necessary. + ddof : int, optional + "Delta Degrees of Freedom": the divisor used in the calculation is + ``N - ddof``, where ``N`` represents the number of elements. By + default `ddof` is zero. + keepdims : bool, optional + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the original `arr`. + + Returns + ------- + variance : ndarray, see dtype parameter above + If ``out=None``, returns a new array containing the variance; + otherwise, a reference to the output array is returned. + + See Also + -------- + std : Standard deviation + mean : Average + var : Variance while not ignoring NaNs + nanstd, nanmean + numpy.doc.ufuncs : Section "Output arguments" + + Notes + ----- + The variance is the average of the squared deviations from the mean, + i.e., ``var = mean(abs(x - x.mean())**2)``. + + The mean is normally calculated as ``x.sum() / N``, where ``N = len(x)``. + If, however, `ddof` is specified, the divisor ``N - ddof`` is used + instead. In standard statistical practice, ``ddof=1`` provides an + unbiased estimator of the variance of a hypothetical infinite population. + ``ddof=0`` provides a maximum likelihood estimate of the variance for + normally distributed variables. + + Note that for complex numbers, the absolute value is taken before + squaring, so that the result is always real and nonnegative. + + For floating-point input, the variance is computed using the same + precision the input has. Depending on the input data, this can cause + the results to be inaccurate, especially for `float32` (see example + below). Specifying a higher-accuracy accumulator using the ``dtype`` + keyword can alleviate this issue. + + Examples + -------- + >>> a = np.array([[1, np.nan], [3, 4]]) + >>> np.var(a) + 1.5555555555555554 + >>> np.nanvar(a, axis=0) + array([ 1., 0.]) + >>> np.nanvar(a, axis=1) + array([ 0., 0.25]) + + """ + + if not (type(a) is mu.ndarray): + try: + nanvar = a.nanvar + return nanvar(axis=axis, dtype=dtype, out=out, ddof=ddof) + except AttributeError: + pass + + return _methods._nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof, + keepdims=keepdims) From a4571585455348e5cc535f0b753434d9bcfc5b94 Mon Sep 17 00:00:00 2001 From: Benjamin Root Date: Wed, 15 May 2013 22:09:23 -0400 Subject: [PATCH 2/4] Added tests for nanmean(), nanvar(), nanstd() --- numpy/core/tests/test_numeric.py | 65 ++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) diff --git a/numpy/core/tests/test_numeric.py b/numpy/core/tests/test_numeric.py index 9947a4660af7..de7e92547f41 100644 --- a/numpy/core/tests/test_numeric.py +++ b/numpy/core/tests/test_numeric.py @@ -177,18 +177,36 @@ def test_mean(self): assert_(all(mean(A,0) == array([2.5,3.5,4.5]))) assert_(all(mean(A,1) == array([2.,5.]))) + def test_nanmean(self): + A = [[1, nan, nan], [nan, 4, 5]] + assert_(nanmean(A) == (10.0 / 3)) + assert_(all(nanmean(A,0) == array([1, 4, 5]))) + assert_(all(nanmean(A,1) == array([1, 4.5]))) + def test_std(self): A = [[1,2,3],[4,5,6]] assert_almost_equal(std(A), 1.707825127659933) assert_almost_equal(std(A,0), array([1.5, 1.5, 1.5])) assert_almost_equal(std(A,1), array([0.81649658, 0.81649658])) + def test_nanstd(self): + A = [[1, nan, nan], [nan, 4, 5]] + assert_almost_equal(nanstd(A), 1.699673171197595) + assert_almost_equal(nanstd(A,0), array([0.0, 0.0, 0.0])) + assert_almost_equal(nanstd(A,1), array([0.0, 0.5])) + def test_var(self): A = [[1,2,3],[4,5,6]] assert_almost_equal(var(A), 2.9166666666666665) assert_almost_equal(var(A,0), array([2.25, 2.25, 2.25])) assert_almost_equal(var(A,1), array([0.66666667, 0.66666667])) + def test_nanvar(self): + A = [[1, nan, nan], [nan, 4, 5]] + assert_almost_equal(nanvar(A), 2.88888888889) + assert_almost_equal(nanvar(A,0), array([0.0, 0.0, 0.0])) + assert_almost_equal(nanvar(A,1), array([0.0, 0.25])) + class TestBoolScalar(TestCase): def test_logical(self): @@ -1291,6 +1309,23 @@ def test_no_parameter_modification(self): assert_array_equal(x, array([inf, 1])) assert_array_equal(y, array([0, inf])) +class TestNaNMean(TestCase): + def setUp(self): + self.A = array([1,nan,-1,nan,nan,1,-1]) + self.B = array([nan, nan, nan, nan]) + self.real_mean = 0 + + def test_basic(self): + assert_almost_equal(nanmean(self.A),self.real_mean) + + def test_allnans(self): + assert_(isnan(nanmean(self.B))) + + def test_empty(self): + assert_(isnan(nanmean(array([])))) + + + class TestStdVar(TestCase): def setUp(self): @@ -1313,6 +1348,36 @@ def test_ddof2(self): assert_almost_equal(std(self.A,ddof=2)**2, self.real_var*len(self.A)/float(len(self.A)-2)) +class TestNaNStdVar(TestCase): + def setUp(self): + self.A = array([nan,1,-1,nan,1,nan,-1]) + self.B = array([nan, nan, nan, nan]) + self.real_var = 1 + + def test_basic(self): + assert_almost_equal(nanvar(self.A),self.real_var) + assert_almost_equal(nanstd(self.A)**2,self.real_var) + + def test_ddof1(self): + assert_almost_equal(nanvar(self.A,ddof=1), + self.real_var*sum(~isnan(self.A))/float(sum(~isnan(self.A))-1)) + assert_almost_equal(nanstd(self.A,ddof=1)**2, + self.real_var*sum(~isnan(self.A))/float(sum(~isnan(self.A))-1)) + + def test_ddof2(self): + assert_almost_equal(nanvar(self.A,ddof=2), + self.real_var*sum(~isnan(self.A))/float(sum(~isnan(self.A))-2)) + assert_almost_equal(nanstd(self.A,ddof=2)**2, + self.real_var*sum(~isnan(self.A))/float(sum(~isnan(self.A))-2)) + + def test_allnans(self): + assert_(isnan(nanvar(self.B))) + assert_(isnan(nanstd(self.B))) + + def test_empty(self): + assert_(isnan(nanvar(array([])))) + assert_(isnan(nanstd(array([])))) + class TestStdVarComplex(TestCase): def test_basic(self): From f15be523ba292aff7f955d6e8ee74da01411687e Mon Sep 17 00:00:00 2001 From: Benjamin Root Date: Sun, 19 May 2013 14:27:13 -0400 Subject: [PATCH 3/4] Tests now checks the warning state --- numpy/core/tests/test_numeric.py | 44 ++++++++++++++++++++++++-------- 1 file changed, 34 insertions(+), 10 deletions(-) diff --git a/numpy/core/tests/test_numeric.py b/numpy/core/tests/test_numeric.py index de7e92547f41..6d45bec422e7 100644 --- a/numpy/core/tests/test_numeric.py +++ b/numpy/core/tests/test_numeric.py @@ -3,6 +3,7 @@ import sys import platform from decimal import Decimal +import warnings import numpy as np from numpy.core import * @@ -177,6 +178,11 @@ def test_mean(self): assert_(all(mean(A,0) == array([2.5,3.5,4.5]))) assert_(all(mean(A,1) == array([2.,5.]))) + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', RuntimeWarning) + assert_(isnan(mean([]))) + assert_(w[0].category is RuntimeWarning) + def test_nanmean(self): A = [[1, nan, nan], [nan, 4, 5]] assert_(nanmean(A) == (10.0 / 3)) @@ -189,6 +195,11 @@ def test_std(self): assert_almost_equal(std(A,0), array([1.5, 1.5, 1.5])) assert_almost_equal(std(A,1), array([0.81649658, 0.81649658])) + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', RuntimeWarning) + assert_(isnan(std([]))) + assert_(w[0].category is RuntimeWarning) + def test_nanstd(self): A = [[1, nan, nan], [nan, 4, 5]] assert_almost_equal(nanstd(A), 1.699673171197595) @@ -201,6 +212,11 @@ def test_var(self): assert_almost_equal(var(A,0), array([2.25, 2.25, 2.25])) assert_almost_equal(var(A,1), array([0.66666667, 0.66666667])) + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', RuntimeWarning) + assert_(isnan(mean([]))) + assert_(w[0].category is RuntimeWarning) + def test_nanvar(self): A = [[1, nan, nan], [nan, 4, 5]] assert_almost_equal(nanvar(A), 2.88888888889) @@ -1319,13 +1335,16 @@ def test_basic(self): assert_almost_equal(nanmean(self.A),self.real_mean) def test_allnans(self): - assert_(isnan(nanmean(self.B))) + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', RuntimeWarning) + assert_(isnan(nanmean(self.B))) + assert_(w[0].category is RuntimeWarning) def test_empty(self): - assert_(isnan(nanmean(array([])))) - - - + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', RuntimeWarning) + assert_(isnan(nanmean(array([])))) + assert_(w[0].category is RuntimeWarning) class TestStdVar(TestCase): def setUp(self): @@ -1371,13 +1390,18 @@ def test_ddof2(self): self.real_var*sum(~isnan(self.A))/float(sum(~isnan(self.A))-2)) def test_allnans(self): - assert_(isnan(nanvar(self.B))) - assert_(isnan(nanstd(self.B))) + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', RuntimeWarning) + assert_(isnan(nanvar(self.B))) + assert_(isnan(nanstd(self.B))) + assert_(w[0].category is RuntimeWarning) def test_empty(self): - assert_(isnan(nanvar(array([])))) - assert_(isnan(nanstd(array([])))) - + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', RuntimeWarning) + assert_(isnan(nanvar(array([])))) + assert_(isnan(nanstd(array([])))) + assert_(w[0].category is RuntimeWarning) class TestStdVarComplex(TestCase): def test_basic(self): From 5be45b280b258e158b93163b937f8f9c08d30393 Mon Sep 17 00:00:00 2001 From: Benjamin Root Date: Thu, 30 May 2013 22:38:56 -0400 Subject: [PATCH 4/4] Updated comments and dtype tests in _methods.py --- numpy/core/_methods.py | 29 +++++++++++++++++++---------- numpy/core/tests/test_numeric.py | 26 ++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 10 deletions(-) diff --git a/numpy/core/_methods.py b/numpy/core/_methods.py index 297d708a85c7..c317358e1aa2 100644 --- a/numpy/core/_methods.py +++ b/numpy/core/_methods.py @@ -7,7 +7,8 @@ from numpy.core import multiarray as mu from numpy.core import umath as um -from numpy.core.numeric import array, asanyarray, isnan +from numpy.core.numeric import array, asanyarray, isnan, issubdtype +from numpy.core import numerictypes as nt def _amax(a, axis=None, out=None, keepdims=False): return um.maximum.reduce(a, axis=axis, @@ -46,8 +47,9 @@ def _count_reduce_items(arr, axis): def _mean(a, axis=None, dtype=None, out=None, keepdims=False): arr = asanyarray(a) - # Upgrade bool, unsigned int, and int to float64 - if dtype is None and arr.dtype.kind in ['b','u','i']: + # Cast bool, unsigned int, and int to float64 + if dtype is None and (issubdtype(arr.dtype, nt.integer) or + issubdtype(arr.dtype, nt.bool_)): ret = um.add.reduce(arr, axis=axis, dtype='f8', out=out, keepdims=keepdims) else: @@ -62,11 +64,14 @@ def _mean(a, axis=None, dtype=None, out=None, keepdims=False): return ret def _nanmean(a, axis=None, dtype=None, out=None, keepdims=False): + # Using array() instead of asanyarray() because the former always + # makes a copy, which is important due to the copyto() action later arr = array(a, subok=True) mask = isnan(arr) - # Upgrade bool, unsigned int, and int to float64 - if dtype is None and arr.dtype.kind in ['b','u','i']: + # Cast bool, unsigned int, and int to float64 + if dtype is None and (issubdtype(arr.dtype, nt.integer) or + issubdtype(arr.dtype, nt.bool_)): ret = um.add.reduce(arr, axis=axis, dtype='f8', out=out, keepdims=keepdims) else: @@ -86,7 +91,8 @@ def _var(a, axis=None, dtype=None, out=None, ddof=0, arr = asanyarray(a) # First compute the mean, saving 'rcount' for reuse later - if dtype is None and arr.dtype.kind in ['b','u','i']: + if dtype is None and (issubdtype(arr.dtype, nt.integer) or + issubdtype(arr.dtype, nt.bool_)): arrmean = um.add.reduce(arr, axis=axis, dtype='f8', keepdims=True) else: arrmean = um.add.reduce(arr, axis=axis, dtype=dtype, keepdims=True) @@ -101,7 +107,7 @@ def _var(a, axis=None, dtype=None, out=None, ddof=0, x = arr - arrmean # (arr - arrmean) ** 2 - if arr.dtype.kind == 'c': + if issubdtype(arr.dtype, nt.complex_): x = um.multiply(x, um.conjugate(x), out=x).real else: x = um.multiply(x, x, out=x) @@ -123,11 +129,14 @@ def _var(a, axis=None, dtype=None, out=None, ddof=0, def _nanvar(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False): + # Using array() instead of asanyarray() because the former always + # makes a copy, which is important due to the copyto() action later arr = array(a, subok=True) mask = isnan(arr) # First compute the mean, saving 'rcount' for reuse later - if dtype is None and arr.dtype.kind in ['b','u','i']: + if dtype is None and (issubdtype(arr.dtype, nt.integer) or + issubdtype(arr.dtype, nt.bool_)): arrmean = um.add.reduce(arr, axis=axis, dtype='f8', keepdims=True) else: mu.copyto(arr, 0.0, where=mask) @@ -142,10 +151,10 @@ def _nanvar(a, axis=None, dtype=None, out=None, ddof=0, # arr - arrmean x = arr - arrmean - x[mask] = 0.0 + mu.copyto(x, 0.0, where=mask) # (arr - arrmean) ** 2 - if arr.dtype.kind == 'c': + if issubdtype(arr.dtype, nt.complex_): x = um.multiply(x, um.conjugate(x), out=x).real else: x = um.multiply(x, x, out=x) diff --git a/numpy/core/tests/test_numeric.py b/numpy/core/tests/test_numeric.py index 6d45bec422e7..faf9cb069f30 100644 --- a/numpy/core/tests/test_numeric.py +++ b/numpy/core/tests/test_numeric.py @@ -1334,6 +1334,19 @@ def setUp(self): def test_basic(self): assert_almost_equal(nanmean(self.A),self.real_mean) + def test_mutation(self): + # Because of the "messing around" we do to replace NaNs with zeros + # this is meant to ensure we don't actually replace the NaNs in the + # actual array. + a_copy = self.A.copy() + b_copy = self.B.copy() + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', RuntimeWarning) + a_ret = nanmean(self.A) + assert_equal(self.A, a_copy) + b_ret = nanmean(self.B) + assert_equal(self.B, b_copy) + def test_allnans(self): with warnings.catch_warnings(record=True) as w: warnings.filterwarnings('always', '', RuntimeWarning) @@ -1377,6 +1390,19 @@ def test_basic(self): assert_almost_equal(nanvar(self.A),self.real_var) assert_almost_equal(nanstd(self.A)**2,self.real_var) + def test_mutation(self): + # Because of the "messing around" we do to replace NaNs with zeros + # this is meant to ensure we don't actually replace the NaNs in the + # actual array. + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings('always', '', RuntimeWarning) + a_copy = self.A.copy() + b_copy = self.B.copy() + a_ret = nanvar(self.A) + assert_equal(self.A, a_copy) + b_ret = nanstd(self.B) + assert_equal(self.B, b_copy) + def test_ddof1(self): assert_almost_equal(nanvar(self.A,ddof=1), self.real_var*sum(~isnan(self.A))/float(sum(~isnan(self.A))-1))