diff --git a/doc/release/upcoming_changes/16022.improvement.rst b/doc/release/upcoming_changes/16022.improvement.rst new file mode 100644 index 000000000000..0616163b9937 --- /dev/null +++ b/doc/release/upcoming_changes/16022.improvement.rst @@ -0,0 +1,9 @@ +MaskedArray gains a ``__array_ufunc__`` method to better handle ufuncs +---------------------------------------------------------------------- +The MaskedArray class now has an implementation of `__array_ufunc__` that +handles deferrals to the desired implementations of the ufunc. If a masked +implementation of a ufunc exists, that implementation will take priority. +This means that code called with MaskedArray ma as ``np.ufunc(ma)`` will +behave the same as ``np.ma.ufunc(ma)``. Additionally, adding this helps with +dispatching to subclasses and preserving the proper types when another +implementation should take priority. diff --git a/numpy/lib/tests/test_function_base.py b/numpy/lib/tests/test_function_base.py index 8457551ca81e..1c3c32bdd233 100644 --- a/numpy/lib/tests/test_function_base.py +++ b/numpy/lib/tests/test_function_base.py @@ -780,7 +780,7 @@ def test_subclass(self): mask=[[False, False], [True, False], [False, True], [True, True], [False, False]]) out = diff(x) - assert_array_equal(out.data, [[1], [1], [1], [1], [1]]) + assert_array_equal(out.data, [[1], [4], [6], [8], [1]]) assert_array_equal(out.mask, [[False], [True], [True], [True], [False]]) assert_(type(out) is type(x)) diff --git a/numpy/ma/core.py b/numpy/ma/core.py index 93eb74be31a8..8e7ac9c4ba59 100644 --- a/numpy/ma/core.py +++ b/numpy/ma/core.py @@ -22,6 +22,7 @@ # pylint: disable-msg=E1002 import builtins import inspect +from numbers import Number import operator import warnings import textwrap @@ -58,10 +59,10 @@ 'frombuffer', 'fromflex', 'fromfunction', 'getdata', 'getmask', 'getmaskarray', 'greater', 'greater_equal', 'harden_mask', 'hypot', 'identity', 'ids', 'indices', 'inner', 'innerproduct', 'isMA', - 'isMaskedArray', 'is_mask', 'is_masked', 'isarray', 'left_shift', - 'less', 'less_equal', 'log', 'log10', 'log2', - 'logical_and', 'logical_not', 'logical_or', 'logical_xor', 'make_mask', - 'make_mask_descr', 'make_mask_none', 'mask_or', 'masked', + 'isMaskedArray', 'is_mask', 'is_masked', 'isarray', 'isfinite', + 'isinf', 'isnan', 'left_shift', 'less', 'less_equal', 'log', 'log10', + 'log2', 'logical_and', 'logical_not', 'logical_or', 'logical_xor', + 'make_mask', 'make_mask_descr', 'make_mask_none', 'mask_or', 'masked', 'masked_array', 'masked_equal', 'masked_greater', 'masked_greater_equal', 'masked_inside', 'masked_invalid', 'masked_less', 'masked_less_equal', 'masked_not_equal', @@ -925,6 +926,12 @@ def __call__(self, a, *args, **kwargs): """ d = getdata(a) + if 'out' in kwargs: + # Need to drop the mask from the output array when being called + kwargs['out'] = getdata(kwargs['out']) + args = [getdata(arg) if isinstance(arg, MaskedArray) else arg + for arg in args] + # Deal with domain if self.domain is not None: # Case 1.1. : Domained function @@ -1048,7 +1055,7 @@ def __call__(self, a, b, *args, **kwargs): masked_result._update_from(b) return masked_result - def reduce(self, target, axis=0, dtype=None): + def reduce(self, target, axis=0, dtype=None, **kwargs): """ Reduce `target` along the given `axis`. @@ -1183,6 +1190,10 @@ def __call__(self, a, b, *args, **kwargs): # Transforms to a (subclass of) MaskedArray masked_result = result.view(get_masked_subclass(a, b)) + # If the original masks were scalar or nomask, don't expand the result + # which comes from the isfinite initialization above + if getmask(a).shape + getmask(b).shape == (): + m = _shrink_mask(m) masked_result._mask = m if isinstance(a, MaskedArray): masked_result._update_from(a) @@ -1209,6 +1220,9 @@ def __call__(self, a, b, *args, **kwargs): ceil = _MaskedUnaryOperation(umath.ceil) around = _MaskedUnaryOperation(np.round_) logical_not = _MaskedUnaryOperation(umath.logical_not) +isinf = _MaskedUnaryOperation(umath.isinf) +isnan = _MaskedUnaryOperation(umath.isnan) +isfinite = _MaskedUnaryOperation(umath.isfinite) # Domained unary ufuncs sqrt = _MaskedUnaryOperation(umath.sqrt, 0.0, @@ -3081,7 +3095,7 @@ def __array_wrap__(self, obj, context=None): func, args, out_i = context # args sometimes contains outputs (gh-10459), which we don't want input_args = args[:func.nin] - m = reduce(mask_or, [getmaskarray(arg) for arg in input_args]) + m = reduce(mask_or, [getmask(arg) for arg in input_args]) # Get the domain mask domain = ufunc_domain.get(func, None) if domain is not None: @@ -3109,7 +3123,6 @@ def __array_wrap__(self, obj, context=None): else: # Don't modify inplace, we risk back-propagation m = (m | d) - # Make sure the mask has the proper size if result is not self and result.shape == () and m: return masked @@ -3119,6 +3132,85 @@ def __array_wrap__(self, obj, context=None): return result + def __array_ufunc__(self, np_ufunc, method, *inputs, **kwargs): + """ + MaskedArray capability for ufuncs + + Handle masked versions of ufuncs if they are implemented within + the MaskedArray module. If the masked ufunc is not implemented, + this falls back to the standard numpy ndarray ufunc, which we + then call with the ndarray view of the input data. + + """ + # Output can be specified as arguments or as keyword arguments + outputs = kwargs.pop('out', ()) + if not isinstance(outputs, tuple): + outputs = (outputs,) + outputs += inputs[np_ufunc.nin:] + args = inputs[:np_ufunc.nin] + + # Determine what class types we are compatible with and return + # NotImplemented if we don't know how to handle them + for arg in args + outputs: + if not isinstance(arg, (ndarray, np.bool_, Number, list, str)): + return NotImplemented + + # Get the equivalent masked version of the numpy function + # if it is in the module level functions + ma_ufunc = np.ma.__dict__.get(np_ufunc.__name__, np_ufunc) + if ma_ufunc is np_ufunc: + # We didn't have a Masked version of the ufunc, so we need to + # call the ndarray version with the data from the objects and + # prevent infinite recursion. + + # Make ndarray views of the input arguments + args = [getdata(input) if isinstance(input, MaskedArray) + else input for input in args] + else: + # The masked power function doesn't support extra args + if np_ufunc.__name__ in ('power'): + kwargs = {} + + results = getattr(ma_ufunc, method)(*args, **kwargs) + if results is NotImplemented: + return NotImplemented + if method == 'at': + return + if np_ufunc.nout == 1: + results = (results,) + if outputs == (): + outputs = (None,) * np_ufunc.nout + + returns = [] + for i, result in enumerate(results): + output = outputs[i] + + # Reapply the mask + if isinstance(result, ndarray) and result is not masked: + # Need to copy over all of the data and mask from results + # to the original object requested with out + if output is not None: + if isinstance(output, MaskedArray): + output._update_from(result) + if isinstance(result, MaskedArray): + output.data[:] = result._data + output._mask = result._mask + else: + output.data[:] = result + else: + output[:] = result + + result = output + + elif output is not None: + # An out value was requested, but the result is a scalar + output[()] = result + result = output + + returns.append(result) + + return returns[0] if np_ufunc.nout == 1 else returns + def view(self, dtype=None, type=None, fill_value=None): """ Return a view of the MaskedArray data. @@ -3292,7 +3384,7 @@ def _scalar_heuristic(arr, elem): return dout else: # Force dout to MA - dout = dout.view(type(self)) + dout = MaskedArray(dout) # Inherit attributes from self dout._update_from(self) # Check the fill_value @@ -3854,6 +3946,23 @@ def filled(self, fill_value=None): result = self._data return result + def clip(self, a_min, a_max, out=None, **kwargs): + """docstring inherited + np.clip.__doc__ + + TODO: Should we ignore the clip where the data is masked? + It is currently in line with the old numpy version + """ + result = self.data.clip(a_min, a_max, **kwargs).view(MaskedArray) + if out is not None: + # Just copy the data and mask + out.data[:] = getdata(result) + out._mask = self._mask + return out + result._update_from(self) + result._mask = self._mask + return result + def compressed(self): """ Return all the non-masked data as a 1-D array. @@ -3947,10 +4056,15 @@ def compress(self, condition, axis=None, out=None): # values. condition = np.asarray(condition) - _new = _data.compress(condition, axis=axis, out=out).view(type(self)) + _new = _data.compress(condition, axis=axis).view(type(self)) _new._update_from(self) if _mask is not nomask: _new._mask = _mask.compress(condition, axis=axis) + if out is not None: + out._update_from(self) + out.data[:] = _new.data + out._mask = _new.mask + return out return _new def _insert_masked_print(self): @@ -4200,7 +4314,7 @@ def __add__(self, other): """ if self._delegate_binop(other): return NotImplemented - return add(self, other) + return np.add(self, other) def __radd__(self, other): """ @@ -4209,7 +4323,7 @@ def __radd__(self, other): """ # In analogy with __rsub__ and __rdiv__, use original order: # we get here from `other + self`. - return add(other, self) + return np.add(other, self) def __sub__(self, other): """ @@ -4218,20 +4332,20 @@ def __sub__(self, other): """ if self._delegate_binop(other): return NotImplemented - return subtract(self, other) + return np.subtract(self, other) def __rsub__(self, other): """ Subtract self from other, and return a new masked array. """ - return subtract(other, self) + return np.subtract(other, self) def __mul__(self, other): "Multiply self by other, and return a new masked array." if self._delegate_binop(other): return NotImplemented - return multiply(self, other) + return np.multiply(self, other) def __rmul__(self, other): """ @@ -4240,7 +4354,7 @@ def __rmul__(self, other): """ # In analogy with __rsub__ and __rdiv__, use original order: # we get here from `other * self`. - return multiply(other, self) + return np.multiply(other, self) def __div__(self, other): """ @@ -4249,7 +4363,7 @@ def __div__(self, other): """ if self._delegate_binop(other): return NotImplemented - return divide(self, other) + return np.divide(self, other) def __truediv__(self, other): """ @@ -4258,14 +4372,14 @@ def __truediv__(self, other): """ if self._delegate_binop(other): return NotImplemented - return true_divide(self, other) + return np.true_divide(self, other) def __rtruediv__(self, other): """ Divide self into other, and return a new masked array. """ - return true_divide(other, self) + return np.true_divide(other, self) def __floordiv__(self, other): """ @@ -4274,14 +4388,14 @@ def __floordiv__(self, other): """ if self._delegate_binop(other): return NotImplemented - return floor_divide(self, other) + return np.floor_divide(self, other) def __rfloordiv__(self, other): """ Divide self into other, and return a new masked array. """ - return floor_divide(other, self) + return np.floor_divide(other, self) def __pow__(self, other): """ @@ -5058,8 +5172,8 @@ def trace(self, offset=0, axis1=0, axis2=1, dtype=None, out=None): #!!!: implement out + test! m = self._mask if m is nomask: - result = super().trace(offset=offset, axis1=axis1, axis2=axis2, - out=out) + result = self.view(np.ndarray).trace(offset=offset, axis1=axis1, + axis2=axis2, out=out) return result.astype(dtype) else: D = self.diagonal(offset=offset, axis1=axis1, axis2=axis2) @@ -5159,7 +5273,9 @@ def sum(self, axis=None, dtype=None, out=None, keepdims=np._NoValue): result = masked return result # Explicit output - result = self.filled(0).sum(axis, dtype=dtype, out=out, **kwargs) + + self.filled(0).sum(axis, dtype=dtype, out=out.view(np.ndarray), + **kwargs) if isinstance(out, MaskedArray): outmask = getmask(out) if outmask is nomask: @@ -5309,7 +5425,10 @@ def mean(self, axis=None, dtype=None, out=None, keepdims=np._NoValue): """ kwargs = {} if keepdims is np._NoValue else {'keepdims': keepdims} if self._mask is nomask: - result = super().mean(axis=axis, dtype=dtype, **kwargs)[()] + result = self.view(np.ndarray).mean(axis=axis, + dtype=dtype, **kwargs) + if isinstance(result, np.ndarray): + result = MaskedArray(result, mask=nomask) else: is_float16_result = False if dtype is None: @@ -5392,9 +5511,12 @@ def var(self, axis=None, dtype=None, out=None, ddof=0, # Easy case: nomask, business as usual if self._mask is nomask: - ret = super().var(axis=axis, dtype=dtype, out=out, ddof=ddof, - **kwargs)[()] + ret = self.view(np.ndarray).var(axis=axis, dtype=dtype, + ddof=ddof, **kwargs) + if isinstance(ret, np.ndarray): + ret = MaskedArray(ret, mask=nomask) if out is not None: + out.flat = ret if isinstance(out, MaskedArray): out.__setmask__(nomask) return out @@ -5452,12 +5574,10 @@ def std(self, axis=None, dtype=None, out=None, ddof=0, numpy.std : Equivalent function """ kwargs = {} if keepdims is np._NoValue else {'keepdims': keepdims} - dvar = self.var(axis, dtype, out, ddof, **kwargs) if dvar is not masked: if out is not None: - np.power(out, 0.5, out=out, casting='unsafe') - return out + return np.power(out, 0.5, out=out, casting='unsafe') dvar = sqrt(dvar) return dvar @@ -5472,6 +5592,10 @@ def round(self, decimals=0, out=None): numpy.ndarray.round : corresponding function for ndarrays numpy.around : equivalent function """ + stored_out = None + if isinstance(out, MaskedArray): + stored_out = out + out = getdata(out) result = self._data.round(decimals=decimals, out=out).view(type(self)) if result.ndim > 0: result._mask = self._mask @@ -5482,7 +5606,9 @@ def round(self, decimals=0, out=None): # No explicit output: we're done if out is None: return result - if isinstance(out, MaskedArray): + if stored_out is not None: + # We got in a masked array originally, so we need to return one + out = stored_out out.__setmask__(self._mask) return out @@ -5870,7 +5996,7 @@ def mini(self, axis=None): "`mini` is deprecated; use the `min` method or " "`np.ma.minimum.reduce instead.", DeprecationWarning, stacklevel=2) - return minimum.reduce(self, axis) + return MaskedArray(np.min(self, axis)) def max(self, axis=None, out=None, fill_value=None, keepdims=np._NoValue): """ @@ -6034,13 +6160,13 @@ def partition(self, *args, **kwargs): warnings.warn("Warning: 'partition' will ignore the 'mask' " f"of the {self.__class__.__name__}.", stacklevel=2) - return super().partition(*args, **kwargs) + return self.view(np.ndarray).partition(*args, **kwargs) def argpartition(self, *args, **kwargs): warnings.warn("Warning: 'argpartition' will ignore the 'mask' " f"of the {self.__class__.__name__}.", stacklevel=2) - return super().argpartition(*args, **kwargs) + return self.view(np.ndarray).argpartition(*args, **kwargs) def take(self, indices, axis=None, out=None, mode='raise'): """ @@ -6730,7 +6856,7 @@ def __call__(self, a, b=None): return self.reduce(a) return where(self.compare(a, b), a, b) - def reduce(self, target, axis=np._NoValue): + def reduce(self, target, axis=np._NoValue, **kwargs): "Reduce target along the given axis." target = narray(target, copy=False, subok=True) m = getmask(target) @@ -6745,12 +6871,10 @@ def reduce(self, target, axis=np._NoValue): axis = None if axis is not np._NoValue: - kwargs = dict(axis=axis) - else: - kwargs = dict() + kwargs['axis'] = axis if m is nomask: - t = self.f.reduce(target, **kwargs) + t = self.f.reduce(target.view(np.ndarray), **kwargs) else: target = target.filled( self.fill_value_func(target)).view(type(target)) @@ -7954,6 +8078,23 @@ def allclose(a, b, masked_equal=True, rtol=1e-5, atol=1e-8): """ x = masked_array(a, copy=False) y = masked_array(b, copy=False) + if masked_equal: + # Apply the combined mask right away to avoid comparisons at the + # masked locations (assumed mask is True) + m = mask_or(getmask(x), getmask(y)) + # Expand scalars to the proper dimension for comparison if needed + if shape(x) != shape(y): + if size(x) == 1: + # scalar a + x = masked_array(np.ones(shape=shape(y))*x, mask=m) + elif size(y) == 1: + # scalar b + y = masked_array(np.ones(shape=shape(x))*y, mask=m) + else: + raise ValueError("Cannot compare arrays of different shapes.") + else: + x = masked_array(a, copy=False, mask=m) + y = masked_array(b, copy=False, mask=m) # make sure y is an inexact type to avoid abs(MIN_INT); will cause # casting of x later. @@ -7966,8 +8107,7 @@ def allclose(a, b, masked_equal=True, rtol=1e-5, atol=1e-8): if y.dtype != dtype: y = masked_array(y, dtype=dtype, copy=False) - m = mask_or(getmask(x), getmask(y)) - xinf = np.isinf(masked_array(x, copy=False, mask=m)).filled(False) + xinf = filled(np.isinf(x), False) # If we have some infs, they should fall at the same place. if not np.all(xinf == filled(np.isinf(y), False)): return False diff --git a/numpy/ma/extras.py b/numpy/ma/extras.py index d2986012b7ac..b3016da5ae0c 100644 --- a/numpy/ma/extras.py +++ b/numpy/ma/extras.py @@ -588,8 +588,8 @@ def average(a, axis=None, weights=None, returned=False, *, >>> avg, sumweights = np.ma.average(x, axis=0, weights=[1, 2, 3], ... returned=True) >>> avg - masked_array(data=[2.6666666666666665, 3.6666666666666665], - mask=[False, False], + masked_array(data=[2.66666667, 3.66666667], + mask=False, fill_value=1e+20) With ``keepdims=True``, the following result has shape (3, 1). @@ -2016,8 +2016,15 @@ def polyfit(x, y, deg, rcond=None, full=False, w=None, cov=False): not_m = ~m if w is not None: w = w[not_m] - return np.polyfit(x[not_m], y[not_m], deg, rcond, full, w, cov) - else: - return np.polyfit(x, y, deg, rcond, full, w, cov) + x = x[not_m] + y = y[not_m] + + # Only pass the ndarray data + if w is not None: + w = w.view(np.ndarray) + x = x.view(np.ndarray) + y = y.view(np.ndarray) + + return np.polyfit(x, y, deg, rcond, full, w, cov) polyfit.__doc__ = ma.doc_note(np.polyfit.__doc__, polyfit.__doc__) diff --git a/numpy/ma/tests/test_core.py b/numpy/ma/tests/test_core.py index b056d516907b..5b779edcbbca 100644 --- a/numpy/ma/tests/test_core.py +++ b/numpy/ma/tests/test_core.py @@ -3208,7 +3208,7 @@ def test_compress(self): assert_equal(b.fill_value, 9999) assert_equal(b, a[condition]) - condition = (a < 4.) + condition = (a.data < 4.) b = a.compress(condition) assert_equal(b._data, [1., 2., 3.]) assert_equal(b._mask, [0, 0, 1]) @@ -5367,7 +5367,7 @@ def test_ufunc_with_out_varied(): a = array([ 1, 2, 3], mask=[1, 0, 0]) b = array([10, 20, 30], mask=[1, 0, 0]) out = array([ 0, 0, 0], mask=[0, 0, 1]) - expected = array([11, 22, 33], mask=[1, 0, 0]) + expected = array([1, 22, 33], mask=[1, 0, 0]) out_pos = out.copy() res_pos = np.add(a, b, out_pos) diff --git a/numpy/ma/tests/test_subclassing.py b/numpy/ma/tests/test_subclassing.py index 3491cef7f450..6e45d6bbd4a1 100644 --- a/numpy/ma/tests/test_subclassing.py +++ b/numpy/ma/tests/test_subclassing.py @@ -7,6 +7,7 @@ """ import numpy as np +from numpy.lib.mixins import NDArrayOperatorsMixin from numpy.testing import assert_, assert_raises from numpy.ma.testutils import assert_equal from numpy.ma.core import ( @@ -147,6 +148,33 @@ def __array_wrap__(self, obj, context=None): return obj +class WrappedArray(NDArrayOperatorsMixin): + """ + Wrapping a MaskedArray rather than subclassing to test that + ufunc deferrals are commutative. + See: https://github.com/numpy/numpy/issues/15200) + """ + __array_priority__ = 20 + + def __init__(self, array, **attrs): + self._array = array + self.attrs = attrs + + def __repr__(self): + return f"{self.__class__.__name__}(\n{self._array}\n{self.attrs}\n)" + + def __array__(self): + return np.asarray(self._array) + + def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): + if method == '__call__': + inputs = [arg._array if isinstance(arg, self.__class__) else arg + for arg in inputs] + return self.__class__(ufunc(*inputs, **kwargs), **self.attrs) + else: + return NotImplemented + + class TestSubclassing: # Test suite for masked subclasses of ndarray. @@ -384,3 +412,39 @@ def test_array_no_inheritance(): # Test that the mask is False and not shared when keep_mask=False assert_(not new_array.mask) assert_(not new_array.sharedmask) + + +class TestClassWrapping: + # Test suite for classes that wrap MaskedArrays + + def setup(self): + m = np.ma.masked_array([1, 3, 5], mask=[False, True, False]) + wm = WrappedArray(m) + self.data = (m, wm) + + def test_masked_unary_operations(self): + # Tests masked_unary_operation + (m, wm) = self.data + with np.errstate(divide='ignore'): + assert_(isinstance(np.log(wm), WrappedArray)) + + def test_masked_binary_operations(self): + # Tests masked_binary_operation + (m, wm) = self.data + # Result should be a WrappedArray + assert_(isinstance(np.add(wm, wm), WrappedArray)) + assert_(isinstance(np.add(m, wm), WrappedArray)) + assert_(isinstance(np.add(wm, m), WrappedArray)) + # add and '+' should call the same ufunc + assert_equal(np.add(m, wm), m + wm) + assert_(isinstance(np.hypot(m, wm), WrappedArray)) + assert_(isinstance(np.hypot(wm, m), WrappedArray)) + # Test domained binary operations + assert_(isinstance(np.divide(wm, m), WrappedArray)) + assert_(isinstance(np.divide(m, wm), WrappedArray)) + assert_equal(np.divide(wm, m) * m, np.divide(m, m) * wm) + # Test broadcasting + m2 = np.stack([m, m]) + assert_(isinstance(np.divide(wm, m2), WrappedArray)) + assert_(isinstance(np.divide(m2, wm), WrappedArray)) + assert_equal(np.divide(m2, wm), np.divide(wm, m2))