diff --git a/doc/release/1.13.0-notes.rst b/doc/release/1.13.0-notes.rst index 49748e42edfa..5f8c40cd2727 100644 --- a/doc/release/1.13.0-notes.rst +++ b/doc/release/1.13.0-notes.rst @@ -200,6 +200,14 @@ All of the following functions in ``np.linalg`` now work when given input arrays with a 0 in the last two dimensions: `det``, ``slogdet``, ``pinv``, ``eigvals``, ``eigvalsh``, ``eig``, ``eigh``. +``argsort`` on masked arrays takes the same default arguments as ``sort`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +By default, ``argsort`` now places the masked values at the end of the sorted +array, in the same way that ``sort`` already did. Additionally, the +``end_with`` argument is added to ``argsort``, for consistency with ``sort``. +Note that this argument is not added at the end, so breaks any code that +passed ``fill_value`` as a positional argument. + Changes ======= diff --git a/numpy/lib/tests/test_arraysetops.py b/numpy/lib/tests/test_arraysetops.py index 8b142c2640dc..eb4cca0ceec2 100644 --- a/numpy/lib/tests/test_arraysetops.py +++ b/numpy/lib/tests/test_arraysetops.py @@ -352,6 +352,18 @@ def test_unique_axis(self): result = np.array([[-0.0, 0.0]]) assert_array_equal(unique(data, axis=0), result, msg) + def test_unique_masked(self): + # issue 8664 + x = np.array([64, 0, 1, 2, 3, 63, 63, 0, 0, 0, 1, 2, 0, 63, 0], dtype='uint8') + y = np.ma.masked_equal(x, 0) + + v = np.unique(y) + v2, i, c = np.unique(y, return_index=True, return_counts=True) + + msg = 'Unique returned different results when asked for index' + assert_array_equal(v.data, v2.data, msg) + assert_array_equal(v.mask, v2.mask, msg) + def _run_axis_tests(self, dtype): data = np.array([[0, 1, 0, 0], [1, 0, 0, 0], diff --git a/numpy/ma/core.py b/numpy/ma/core.py index 5ef4e13694ba..e78d1601dc0c 100644 --- a/numpy/ma/core.py +++ b/numpy/ma/core.py @@ -5230,7 +5230,8 @@ def round(self, decimals=0, out=None): out.__setmask__(self._mask) return out - def argsort(self, axis=None, kind='quicksort', order=None, fill_value=None): + def argsort(self, axis=None, kind='quicksort', order=None, + endwith=True, fill_value=None): """ Return an ndarray of indices that sort the array along the specified axis. Masked values are filled beforehand to @@ -5241,15 +5242,21 @@ def argsort(self, axis=None, kind='quicksort', order=None, fill_value=None): axis : int, optional Axis along which to sort. The default is -1 (last axis). If None, the flattened array is used. - fill_value : var, optional - Value used to fill the array before sorting. - The default is the `fill_value` attribute of the input array. kind : {'quicksort', 'mergesort', 'heapsort'}, optional Sorting algorithm. order : list, optional When `a` is an array with fields defined, this argument specifies which fields to compare first, second, etc. Not all fields need be specified. + endwith : {True, False}, optional + Whether missing values (if any) should be treated as the largest values + (True) or the smallest values (False) + When the array contains unmasked values at the same extremes of the + datatype, the ordering of these values and the masked values is + undefined. + fill_value : {var}, optional + Value used internally for the masked values. + If ``fill_value`` is not None, it supersedes ``endwith``. Returns ------- @@ -5259,7 +5266,7 @@ def argsort(self, axis=None, kind='quicksort', order=None, fill_value=None): See Also -------- - sort : Describes sorting algorithms used. + MaskedArray.sort : Describes sorting algorithms used. lexsort : Indirect stable sort with multiple keys. ndarray.sort : Inplace sort. @@ -5278,10 +5285,19 @@ def argsort(self, axis=None, kind='quicksort', order=None, fill_value=None): array([1, 0, 2]) """ + if fill_value is None: - fill_value = default_fill_value(self) - d = self.filled(fill_value).view(ndarray) - return d.argsort(axis=axis, kind=kind, order=order) + if endwith: + # nan > inf + if np.issubdtype(self.dtype, np.floating): + fill_value = np.nan + else: + fill_value = minimum_fill_value(self) + else: + fill_value = maximum_fill_value(self) + + filled = self.filled(fill_value) + return filled.argsort(axis=axis, kind=kind, order=order) def argmin(self, axis=None, fill_value=None, out=None): """ @@ -5380,12 +5396,11 @@ def sort(self, axis=-1, kind='quicksort', order=None, to compare first, second, and so on. This list does not need to include all of the fields. endwith : {True, False}, optional - Whether missing values (if any) should be forced in the upper indices - (at the end of the array) (True) or lower indices (at the beginning). - When the array contains unmasked values of the largest (or smallest if - False) representable value of the datatype the ordering of these values - and the masked values is undefined. To enforce the masked values are - at the end (beginning) in this case one must sort the mask. + Whether missing values (if any) should be treated as the largest values + (True) or the smallest values (False) + When the array contains unmasked values at the same extremes of the + datatype, the ordering of these values and the masked values is + undefined. fill_value : {var}, optional Value used internally for the masked values. If ``fill_value`` is not None, it supersedes ``endwith``. @@ -5429,35 +5444,22 @@ def sort(self, axis=-1, kind='quicksort', order=None, """ if self._mask is nomask: ndarray.sort(self, axis=axis, kind=kind, order=order) + return + + if self is masked: + return + + sidx = self.argsort(axis=axis, kind=kind, order=order, + fill_value=fill_value, endwith=endwith) + + # save memory for 1d arrays + if self.ndim == 1: + idx = sidx else: - if self is masked: - return self - if fill_value is None: - if endwith: - # nan > inf - if np.issubdtype(self.dtype, np.floating): - filler = np.nan - else: - filler = minimum_fill_value(self) - else: - filler = maximum_fill_value(self) - else: - filler = fill_value + idx = list(np.ix_(*[np.arange(x) for x in self.shape])) + idx[axis] = sidx - sidx = self.filled(filler).argsort(axis=axis, kind=kind, - order=order) - # save meshgrid memory for 1d arrays - if self.ndim == 1: - idx = sidx - else: - idx = np.meshgrid(*[np.arange(x) for x in self.shape], sparse=True, - indexing='ij') - idx[axis] = sidx - tmp_mask = self._mask[idx].flat - tmp_data = self._data[idx].flat - self._data.flat = tmp_data - self._mask.flat = tmp_mask - return + self[...] = self[idx] def min(self, axis=None, out=None, fill_value=None, keepdims=np._NoValue): """ @@ -6500,49 +6502,33 @@ def power(a, b, third=None): result._data[invalid] = result.fill_value return result - -def argsort(a, axis=None, kind='quicksort', order=None, fill_value=None): - "Function version of the eponymous method." - if fill_value is None: - fill_value = default_fill_value(a) - d = filled(a, fill_value) - if axis is None: - return d.argsort(kind=kind, order=order) - return d.argsort(axis, kind=kind, order=order) -argsort.__doc__ = MaskedArray.argsort.__doc__ - argmin = _frommethod('argmin') argmax = _frommethod('argmax') +def argsort(a, axis=None, kind='quicksort', order=None, endwith=True, fill_value=None): + "Function version of the eponymous method." + a = np.asanyarray(a) + + if isinstance(a, MaskedArray): + return a.argsort(axis=axis, kind=kind, order=order, + endwith=endwith, fill_value=fill_value) + else: + return a.argsort(axis=axis, kind=kind, order=order) +argsort.__doc__ = MaskedArray.argsort.__doc__ def sort(a, axis=-1, kind='quicksort', order=None, endwith=True, fill_value=None): "Function version of the eponymous method." - a = narray(a, copy=True, subok=True) + a = np.array(a, copy=True, subok=True) if axis is None: a = a.flatten() axis = 0 - if fill_value is None: - if endwith: - # nan > inf - if np.issubdtype(a.dtype, np.floating): - filler = np.nan - else: - filler = minimum_fill_value(a) - else: - filler = maximum_fill_value(a) - else: - filler = fill_value - - sindx = filled(a, filler).argsort(axis=axis, kind=kind, order=order) - # save meshgrid memory for 1d arrays - if a.ndim == 1: - indx = sindx + if isinstance(a, MaskedArray): + a.sort(axis=axis, kind=kind, order=order, + endwith=endwith, fill_value=fill_value) else: - indx = np.meshgrid(*[np.arange(x) for x in a.shape], sparse=True, - indexing='ij') - indx[axis] = sindx - return a[indx] + a.sort(axis=axis, kind=kind, order=order) + return a sort.__doc__ = MaskedArray.sort.__doc__ diff --git a/numpy/ma/tests/test_core.py b/numpy/ma/tests/test_core.py index 93898c4d08ae..a65cac8c8d92 100644 --- a/numpy/ma/tests/test_core.py +++ b/numpy/ma/tests/test_core.py @@ -3031,6 +3031,20 @@ def test_sort(self): assert_equal(sortedx._data, [1, 2, -2, -1, 0]) assert_equal(sortedx._mask, [1, 1, 0, 0, 0]) + def test_argsort_matches_sort(self): + x = array([1, 4, 2, 3], mask=[0, 1, 0, 0], dtype=np.uint8) + + for kwargs in [dict(), + dict(endwith=True), + dict(endwith=False), + dict(fill_value=2), + dict(fill_value=2, endwith=True), + dict(fill_value=2, endwith=False)]: + sortedx = sort(x, **kwargs) + argsortedx = x[argsort(x, **kwargs)] + assert_equal(sortedx._data, argsortedx._data) + assert_equal(sortedx._mask, argsortedx._mask) + def test_sort_2d(self): # Check sort of 2D array. # 2D array w/o mask