From 0360bd5920c8affa5d85389ecb2961589d31e408 Mon Sep 17 00:00:00 2001 From: Greg Lucas Date: Mon, 2 Jan 2023 14:46:44 -0700 Subject: [PATCH 1/5] ENH: Add __array_function__ implementation to MaskedArray Previously, calling np.stack(ma, a) with a masked and normal array would stack the nd-data from the items, but wrap it in a MaskedArray class without implementing the mask. This implementation intercepts the np.func calls and checks whether there is a compatible masked version that we can use instead. Now, the mask gets propagated with the stack as expected. --- .../upcoming_changes/22913.improvement.rst | 7 +++ numpy/ma/core.py | 58 ++++++++++++++++--- numpy/ma/extras.py | 9 ++- numpy/ma/tests/test_core.py | 9 +-- numpy/ma/tests/test_extras.py | 14 +++-- 5 files changed, 79 insertions(+), 18 deletions(-) create mode 100644 doc/release/upcoming_changes/22913.improvement.rst diff --git a/doc/release/upcoming_changes/22913.improvement.rst b/doc/release/upcoming_changes/22913.improvement.rst new file mode 100644 index 000000000000..96bba059882d --- /dev/null +++ b/doc/release/upcoming_changes/22913.improvement.rst @@ -0,0 +1,7 @@ +MaskedArrays gain a ``__array_function__`` method +------------------------------------------------- + +The MaskedArray class now has an implementation of `__array_function__` that +dispatches to a masked version of the function when one is available. +This means that code called with MaskedArray ma as ``np.func(ma)`` will +behave the same as ``np.ma.func(ma)`` when possible. \ No newline at end of file diff --git a/numpy/ma/core.py b/numpy/ma/core.py index cbb3858ab421..32f97084fb3d 100644 --- a/numpy/ma/core.py +++ b/numpy/ma/core.py @@ -3122,6 +3122,46 @@ def __array_wrap__(self, obj, context=None): return result + def __array_function__(self, function, types, args, kwargs): + """ + Wrap numpy functions that have a masked implementation. + + Parameters + ---------- + function : callable + Numpy function being called + types : iterable of classes + Classes that provide an ``__array_function__`` override. Can + in principle be used to interact with other classes. Below, + mostly passed on to `~numpy.ndarray`, which can only interact + with subclasses. + args : tuple + Positional arguments provided in the function call. + kwargs : dict + Keyword arguments provided in the function call. + """ + ma_func = np.ma.__dict__.get(function.__name__, function) + # Known incompatible functions + # copy: np.copy() has subok semantics, which we can't pass through + # unique: using ma.view(ndarray) internally doesn't work + incompatible_functions = {"copy", "unique"} + # The masked functions know how to work with ndarray and + # MaskedArrays, but can't guarantee anything about other types + handled_types = {np.ndarray, MaskedArray} + # _convert2ma functions simply guarantee that a MaskedArray + # is returned, which is important when the function is called as + # ``np.ma.function(arr)``, but not necessary when we get there + # through the ``np.function(marr)`` path. This prevents infinite + # recursion of this implementation calling the ndarray method. + if (ma_func is function + or function.__name__ in incompatible_functions + or not all([t in handled_types for t in types]) + or isinstance(ma_func, _convert2ma)): + # We don't have a Masked-compatible version of the function, + # so just default to the super() ndarray version + return super().__array_function__(function, types, args, kwargs) + return ma_func(*args, **kwargs) + def view(self, dtype=None, type=None, fill_value=None): """ Return a view of the MaskedArray data. @@ -7251,7 +7291,7 @@ def diag(v, k=0): fill_value=1e+20) """ - output = np.diag(v, k).view(MaskedArray) + output = np.diag(v.view(np.ndarray), k).view(MaskedArray) if getmask(v) is not nomask: output._mask = np.diag(v._mask, k) return output @@ -7487,7 +7527,8 @@ def resize(x, new_shape): m = getmask(x) if m is not nomask: m = np.resize(m, new_shape) - result = np.resize(x, new_shape).view(get_masked_subclass(x)) + result = np.resize(x.view(np.ndarray), + new_shape).view(get_masked_subclass(x)) if result.ndim: result._mask = m return result @@ -7795,7 +7836,7 @@ def fmask(x): "Returns the filled array, or True if masked." if x is masked: return True - return filled(x) + return filled(x).view(np.ndarray) def nmask(x): "Returns the mask, True if ``masked``, False if ``nomask``." @@ -7808,11 +7849,14 @@ def nmask(x): masks = [nmask(x) for x in choices] data = [fmask(x) for x in choices] # Construct the mask - outputmask = np.choose(c, masks, mode=mode) + outputmask = np.choose(c.view(np.ndarray), masks, mode=mode) outputmask = make_mask(mask_or(outputmask, getmask(indices)), copy=False, shrink=True) # Get the choices. - d = np.choose(c, data, mode=mode, out=out).view(MaskedArray) + outview = out.view(np.ndarray) if isinstance(out, MaskedArray) else out + + d = np.choose(c.view(np.ndarray), data, + mode=mode, out=outview).view(MaskedArray) if out is not None: if isinstance(out, MaskedArray): out.__setmask__(outputmask) @@ -7868,9 +7912,9 @@ def round_(a, decimals=0, out=None): fill_value=1e+20) """ if out is None: - return np.round_(a, decimals, out) + return np.round(a, decimals, out) else: - np.round_(getdata(a), decimals, out) + np.round(getdata(a), decimals, out) if hasattr(out, '_mask'): out._mask = getmask(a) return out diff --git a/numpy/ma/extras.py b/numpy/ma/extras.py index 23e97a739951..6fa06ccfea74 100644 --- a/numpy/ma/extras.py +++ b/numpy/ma/extras.py @@ -2121,13 +2121,16 @@ def polyfit(x, y, deg, rcond=None, full=False, w=None, cov=False): if w.shape[0] != y.shape[0]: raise TypeError("expected w and y to have the same length") m = mask_or(m, getmask(w)) + w = w.view(np.ndarray) if m is not nomask: not_m = ~m if w is not None: w = w[not_m] - return np.polyfit(x[not_m], y[not_m], deg, rcond, full, w, cov) - else: - return np.polyfit(x, y, deg, rcond, full, w, cov) + x = x[not_m] + y = y[not_m] + x = x.view(np.ndarray) + y = y.view(np.ndarray) + return np.polyfit(x, y, deg, rcond, full, w, cov) polyfit.__doc__ = ma.doc_note(np.polyfit.__doc__, polyfit.__doc__) diff --git a/numpy/ma/tests/test_core.py b/numpy/ma/tests/test_core.py index 7e5282aabaa2..f18f32c93fb0 100644 --- a/numpy/ma/tests/test_core.py +++ b/numpy/ma/tests/test_core.py @@ -5429,11 +5429,12 @@ def test_masked_array_no_copy(): _ = np.ma.masked_invalid(a, copy=False) assert_array_equal(a.mask, [True, False, False, False, False]) -def test_append_masked_array(): +@pytest.mark.parametrize("append", [np.append, np.ma.append]) +def test_append_masked_array(append): a = np.ma.masked_equal([1,2,3], value=2) b = np.ma.masked_equal([4,3,2], value=2) - result = np.ma.append(a, b) + result = append(a, b) expected_data = [1, 2, 3, 4, 3, 2] expected_mask = [False, True, False, False, False, True] assert_array_equal(result.data, expected_data) @@ -5442,13 +5443,13 @@ def test_append_masked_array(): a = np.ma.masked_all((2,2)) b = np.ma.ones((3,1)) - result = np.ma.append(a, b) + result = append(a, b) expected_data = [1] * 3 expected_mask = [True] * 4 + [False] * 3 assert_array_equal(result.data[-3], expected_data) assert_array_equal(result.mask, expected_mask) - result = np.ma.append(a, b, axis=None) + result = append(a, b, axis=None) assert_array_equal(result.data[-3], expected_data) assert_array_equal(result.mask, expected_mask) diff --git a/numpy/ma/tests/test_extras.py b/numpy/ma/tests/test_extras.py index d09a50fecd4a..7f6edb8ab1e3 100644 --- a/numpy/ma/tests/test_extras.py +++ b/numpy/ma/tests/test_extras.py @@ -29,7 +29,7 @@ ediff1d, apply_over_axes, apply_along_axis, compress_nd, compress_rowcols, mask_rowcols, clump_masked, clump_unmasked, flatnotmasked_contiguous, notmasked_contiguous, notmasked_edges, masked_all, masked_all_like, isin, - diagflat, ndenumerate, stack, vstack + diagflat, ndenumerate ) @@ -1790,7 +1790,10 @@ def test_ndenumerate_mixedmasked(self): class TestStack: - def test_stack_1d(self): + @pytest.mark.parametrize("stack,vstack", + [pytest.param(np.stack, np.vstack, id="np-version"), + pytest.param(np.ma.stack, np.ma.vstack, id="ma-version")]) + def test_stack_1d(self, stack, vstack): a = masked_array([0, 1, 2], mask=[0, 1, 0]) b = masked_array([9, 8, 7], mask=[1, 0, 0]) @@ -1808,7 +1811,9 @@ def test_stack_1d(self): assert_array_equal(a.mask, c[:, 0].mask) assert_array_equal(b.mask, c[:, 1].mask) - def test_stack_masks(self): + @pytest.mark.parametrize("stack,vstack", + [(np.stack, np.vstack), (np.ma.stack, np.ma.vstack)]) + def test_stack_masks(self, stack, vstack): a = masked_array([0, 1, 2], mask=True) b = masked_array([9, 8, 7], mask=False) @@ -1826,7 +1831,8 @@ def test_stack_masks(self): assert_array_equal(a.mask, c[:, 0].mask) assert_array_equal(b.mask, c[:, 1].mask) - def test_stack_nd(self): + @pytest.mark.parametrize("stack", [np.stack, np.ma.stack]) + def test_stack_nd(self, stack): # 2D shp = (3, 2) d1 = np.random.randint(0, 10, shp) From 50074f246f26ca82028ba4b4391e59a0cf7032aa Mon Sep 17 00:00:00 2001 From: Greg Lucas Date: Tue, 1 Aug 2023 21:34:40 -0600 Subject: [PATCH 2/5] FIX: Remove masked diff implementation The manual masked implementation was largely copied over from the numpy array implementation just to handle masked concatenate. Now that we use array function implementations, we get that automatically with the np.diff() call, so there is no need to re-implement things in the masked namespace anymore. --- numpy/ma/core.py | 141 ++--------------------------------------------- 1 file changed, 6 insertions(+), 135 deletions(-) diff --git a/numpy/ma/core.py b/numpy/ma/core.py index 32f97084fb3d..ccd5be162869 100644 --- a/numpy/ma/core.py +++ b/numpy/ma/core.py @@ -7556,141 +7556,6 @@ def size(obj, axis=None): size.__doc__ = np.size.__doc__ -def diff(a, /, n=1, axis=-1, prepend=np._NoValue, append=np._NoValue): - """ - Calculate the n-th discrete difference along the given axis. - The first difference is given by ``out[i] = a[i+1] - a[i]`` along - the given axis, higher differences are calculated by using `diff` - recursively. - Preserves the input mask. - - Parameters - ---------- - a : array_like - Input array - n : int, optional - The number of times values are differenced. If zero, the input - is returned as-is. - axis : int, optional - The axis along which the difference is taken, default is the - last axis. - prepend, append : array_like, optional - Values to prepend or append to `a` along axis prior to - performing the difference. Scalar values are expanded to - arrays with length 1 in the direction of axis and the shape - of the input array in along all other axes. Otherwise the - dimension and shape must match `a` except along axis. - - Returns - ------- - diff : MaskedArray - The n-th differences. The shape of the output is the same as `a` - except along `axis` where the dimension is smaller by `n`. The - type of the output is the same as the type of the difference - between any two elements of `a`. This is the same as the type of - `a` in most cases. A notable exception is `datetime64`, which - results in a `timedelta64` output array. - - See Also - -------- - numpy.diff : Equivalent function in the top-level NumPy module. - - Notes - ----- - Type is preserved for boolean arrays, so the result will contain - `False` when consecutive elements are the same and `True` when they - differ. - - For unsigned integer arrays, the results will also be unsigned. This - should not be surprising, as the result is consistent with - calculating the difference directly: - - >>> u8_arr = np.array([1, 0], dtype=np.uint8) - >>> np.ma.diff(u8_arr) - masked_array(data=[255], - mask=False, - fill_value=np.int64(999999), - dtype=uint8) - >>> u8_arr[1,...] - u8_arr[0,...] - 255 - - If this is not desirable, then the array should be cast to a larger - integer type first: - - >>> i16_arr = u8_arr.astype(np.int16) - >>> np.ma.diff(i16_arr) - masked_array(data=[-1], - mask=False, - fill_value=np.int64(999999), - dtype=int16) - - Examples - -------- - >>> a = np.array([1, 2, 3, 4, 7, 0, 2, 3]) - >>> x = np.ma.masked_where(a < 2, a) - >>> np.ma.diff(x) - masked_array(data=[--, 1, 1, 3, --, --, 1], - mask=[ True, False, False, False, True, True, False], - fill_value=999999) - - >>> np.ma.diff(x, n=2) - masked_array(data=[--, 0, 2, --, --, --], - mask=[ True, False, False, True, True, True], - fill_value=999999) - - >>> a = np.array([[1, 3, 1, 5, 10], [0, 1, 5, 6, 8]]) - >>> x = np.ma.masked_equal(a, value=1) - >>> np.ma.diff(x) - masked_array( - data=[[--, --, --, 5], - [--, --, 1, 2]], - mask=[[ True, True, True, False], - [ True, True, False, False]], - fill_value=1) - - >>> np.ma.diff(x, axis=0) - masked_array(data=[[--, --, --, 1, -2]], - mask=[[ True, True, True, False, False]], - fill_value=1) - - """ - if n == 0: - return a - if n < 0: - raise ValueError("order must be non-negative but got " + repr(n)) - - a = np.ma.asanyarray(a) - if a.ndim == 0: - raise ValueError( - "diff requires input that is at least one dimensional" - ) - - combined = [] - if prepend is not np._NoValue: - prepend = np.ma.asanyarray(prepend) - if prepend.ndim == 0: - shape = list(a.shape) - shape[axis] = 1 - prepend = np.broadcast_to(prepend, tuple(shape)) - combined.append(prepend) - - combined.append(a) - - if append is not np._NoValue: - append = np.ma.asanyarray(append) - if append.ndim == 0: - shape = list(a.shape) - shape[axis] = 1 - append = np.broadcast_to(append, tuple(shape)) - combined.append(append) - - if len(combined) > 1: - a = np.ma.concatenate(combined, axis) - - # GH 22465 np.diff without prepend/append preserves the mask - return np.diff(a, n, axis) - - ############################################################################## # Extra functions # ############################################################################## @@ -8558,6 +8423,12 @@ def __call__(self, *args, **params): np_ret='clipped_array : ndarray', np_ma_ret='clipped_array : MaskedArray', ) +diff = _convert2ma( + 'diff', + params=dict(fill_value=None, hardmask=False), + np_ret='diff : ndarray', + np_ma_ret='diff : MaskedArray', +) empty = _convert2ma( 'empty', params=dict(fill_value=None, hardmask=False), From 5e98ad1bc33ef46dfc3b62925b5630c83ee2be8e Mon Sep 17 00:00:00 2001 From: Greg Lucas Date: Fri, 4 Aug 2023 21:47:15 -0600 Subject: [PATCH 3/5] MNT: consolidate masked round implementation to use frommethod --- numpy/ma/core.py | 58 ++---------------------------------------------- 1 file changed, 2 insertions(+), 56 deletions(-) diff --git a/numpy/ma/core.py b/numpy/ma/core.py index ccd5be162869..0c43cd88b915 100644 --- a/numpy/ma/core.py +++ b/numpy/ma/core.py @@ -6994,6 +6994,8 @@ def __call__(self, a, *args, **params): prod = _frommethod('prod') product = _frommethod('prod') ravel = _frommethod('ravel') +round = _frommethod('round') +round_ = round repeat = _frommethod('repeat') shrink_mask = _frommethod('shrink_mask') soften_mask = _frommethod('soften_mask') @@ -7730,62 +7732,6 @@ def nmask(x): return d -def round_(a, decimals=0, out=None): - """ - Return a copy of a, rounded to 'decimals' places. - - When 'decimals' is negative, it specifies the number of positions - to the left of the decimal point. The real and imaginary parts of - complex numbers are rounded separately. Nothing is done if the - array is not of float type and 'decimals' is greater than or equal - to 0. - - Parameters - ---------- - decimals : int - Number of decimals to round to. May be negative. - out : array_like - Existing array to use for output. - If not given, returns a default copy of a. - - Notes - ----- - If out is given and does not have a mask attribute, the mask of a - is lost! - - Examples - -------- - >>> import numpy.ma as ma - >>> x = [11.2, -3.973, 0.801, -1.41] - >>> mask = [0, 0, 0, 1] - >>> masked_x = ma.masked_array(x, mask) - >>> masked_x - masked_array(data=[11.2, -3.973, 0.801, --], - mask=[False, False, False, True], - fill_value=1e+20) - >>> ma.round_(masked_x) - masked_array(data=[11.0, -4.0, 1.0, --], - mask=[False, False, False, True], - fill_value=1e+20) - >>> ma.round(masked_x, decimals=1) - masked_array(data=[11.2, -4.0, 0.8, --], - mask=[False, False, False, True], - fill_value=1e+20) - >>> ma.round_(masked_x, decimals=-1) - masked_array(data=[10.0, -0.0, 0.0, --], - mask=[False, False, False, True], - fill_value=1e+20) - """ - if out is None: - return np.round(a, decimals, out) - else: - np.round(getdata(a), decimals, out) - if hasattr(out, '_mask'): - out._mask = getmask(a) - return out -round = round_ - - def _mask_propagate(a, axis): """ Mask whole 1-d vectors of an array that contain masked values. From a7ef3ccbb9f4b254353da28f508accf341280276 Mon Sep 17 00:00:00 2001 From: Greg Lucas Date: Sat, 5 Aug 2023 07:31:50 -0600 Subject: [PATCH 4/5] DOC: np.resize is the same as ma.resize on masked arrays now --- numpy/ma/core.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/numpy/ma/core.py b/numpy/ma/core.py index 0c43cd88b915..7237d376c4c6 100644 --- a/numpy/ma/core.py +++ b/numpy/ma/core.py @@ -7496,13 +7496,6 @@ def resize(x, new_shape): mask=[[False, True], [False, False]], fill_value=999999) - >>> np.resize(a, (3, 3)) - masked_array( - data=[[1, 2, 3], - [4, 1, 2], - [3, 4, 1]], - mask=False, - fill_value=999999) >>> ma.resize(a, (3, 3)) masked_array( data=[[1, --, 3], From 0ee3075210f19523bb97b6a1f500b0be02c1b9ce Mon Sep 17 00:00:00 2001 From: Greg Lucas Date: Sat, 5 Aug 2023 08:26:56 -0600 Subject: [PATCH 5/5] DOC: np.concatenate does preserve the mask now --- numpy/core/multiarray.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/numpy/core/multiarray.py b/numpy/core/multiarray.py index bea9ddf9ef5f..cc20db591839 100644 --- a/numpy/core/multiarray.py +++ b/numpy/core/multiarray.py @@ -220,7 +220,7 @@ def concatenate(arrays, axis=None, out=None, *, dtype=None, casting=None): >>> np.concatenate((a, b), axis=None) array([1, 2, 3, 4, 5, 6]) - This function will not preserve masking of MaskedArray inputs. + This function preserves the masks of MaskedArray inputs. >>> a = np.ma.arange(3) >>> a[1] = np.ma.masked @@ -232,10 +232,6 @@ def concatenate(arrays, axis=None, out=None, *, dtype=None, casting=None): >>> b array([2, 3, 4]) >>> np.concatenate([a, b]) - masked_array(data=[0, 1, 2, 2, 3, 4], - mask=False, - fill_value=999999) - >>> np.ma.concatenate([a, b]) masked_array(data=[0, --, 2, 2, 3, 4], mask=[False, True, False, False, False, False], fill_value=999999)