-
-
Notifications
You must be signed in to change notification settings - Fork 11k
BUG: Fix argsort vs sort in Masked arrays #8678
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
9c865d5
f91eb36
11c5a9f
8e83849
821293d
ee90efc
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5230,7 +5230,8 @@ def round(self, decimals=0, out=None): | |
out.__setmask__(self._mask) | ||
return out | ||
|
||
def argsort(self, axis=None, kind='quicksort', order=None, fill_value=None): | ||
def argsort(self, axis=None, kind='quicksort', order=None, | ||
endwith=True, fill_value=None): | ||
""" | ||
Return an ndarray of indices that sort the array along the | ||
specified axis. Masked values are filled beforehand to | ||
|
@@ -5241,15 +5242,21 @@ def argsort(self, axis=None, kind='quicksort', order=None, fill_value=None): | |
axis : int, optional | ||
Axis along which to sort. The default is -1 (last axis). | ||
If None, the flattened array is used. | ||
fill_value : var, optional | ||
Value used to fill the array before sorting. | ||
The default is the `fill_value` attribute of the input array. | ||
kind : {'quicksort', 'mergesort', 'heapsort'}, optional | ||
Sorting algorithm. | ||
order : list, optional | ||
When `a` is an array with fields defined, this argument specifies | ||
which fields to compare first, second, etc. Not all fields need be | ||
specified. | ||
endwith : {True, False}, optional | ||
Whether missing values (if any) should be treated as the largest values | ||
(True) or the smallest values (False) | ||
When the array contains unmasked values at the same extremes of the | ||
datatype, the ordering of these values and the masked values is | ||
undefined. | ||
fill_value : {var}, optional | ||
Value used internally for the masked values. | ||
If ``fill_value`` is not None, it supersedes ``endwith``. | ||
|
||
Returns | ||
------- | ||
|
@@ -5259,7 +5266,7 @@ def argsort(self, axis=None, kind='quicksort', order=None, fill_value=None): | |
|
||
See Also | ||
-------- | ||
sort : Describes sorting algorithms used. | ||
MaskedArray.sort : Describes sorting algorithms used. | ||
lexsort : Indirect stable sort with multiple keys. | ||
ndarray.sort : Inplace sort. | ||
|
||
|
@@ -5278,10 +5285,19 @@ def argsort(self, axis=None, kind='quicksort', order=None, fill_value=None): | |
array([1, 0, 2]) | ||
|
||
""" | ||
|
||
if fill_value is None: | ||
fill_value = default_fill_value(self) | ||
d = self.filled(fill_value).view(ndarray) | ||
return d.argsort(axis=axis, kind=kind, order=order) | ||
if endwith: | ||
# nan > inf | ||
if np.issubdtype(self.dtype, np.floating): | ||
fill_value = np.nan | ||
else: | ||
fill_value = minimum_fill_value(self) | ||
else: | ||
fill_value = maximum_fill_value(self) | ||
|
||
filled = self.filled(fill_value) | ||
return filled.argsort(axis=axis, kind=kind, order=order) | ||
|
||
def argmin(self, axis=None, fill_value=None, out=None): | ||
""" | ||
|
@@ -5380,12 +5396,11 @@ def sort(self, axis=-1, kind='quicksort', order=None, | |
to compare first, second, and so on. This list does not need to | ||
include all of the fields. | ||
endwith : {True, False}, optional | ||
Whether missing values (if any) should be forced in the upper indices | ||
(at the end of the array) (True) or lower indices (at the beginning). | ||
When the array contains unmasked values of the largest (or smallest if | ||
False) representable value of the datatype the ordering of these values | ||
and the masked values is undefined. To enforce the masked values are | ||
at the end (beginning) in this case one must sort the mask. | ||
Whether missing values (if any) should be treated as the largest values | ||
(True) or the smallest values (False) | ||
When the array contains unmasked values at the same extremes of the | ||
datatype, the ordering of these values and the masked values is | ||
undefined. | ||
fill_value : {var}, optional | ||
Value used internally for the masked values. | ||
If ``fill_value`` is not None, it supersedes ``endwith``. | ||
|
@@ -5429,35 +5444,22 @@ def sort(self, axis=-1, kind='quicksort', order=None, | |
""" | ||
if self._mask is nomask: | ||
ndarray.sort(self, axis=axis, kind=kind, order=order) | ||
return | ||
|
||
if self is masked: | ||
return | ||
|
||
sidx = self.argsort(axis=axis, kind=kind, order=order, | ||
fill_value=fill_value, endwith=endwith) | ||
|
||
# save memory for 1d arrays | ||
if self.ndim == 1: | ||
idx = sidx | ||
else: | ||
if self is masked: | ||
return self | ||
if fill_value is None: | ||
if endwith: | ||
# nan > inf | ||
if np.issubdtype(self.dtype, np.floating): | ||
filler = np.nan | ||
else: | ||
filler = minimum_fill_value(self) | ||
else: | ||
filler = maximum_fill_value(self) | ||
else: | ||
filler = fill_value | ||
idx = list(np.ix_(*[np.arange(x) for x in self.shape])) | ||
idx[axis] = sidx | ||
|
||
sidx = self.filled(filler).argsort(axis=axis, kind=kind, | ||
order=order) | ||
# save meshgrid memory for 1d arrays | ||
if self.ndim == 1: | ||
idx = sidx | ||
else: | ||
idx = np.meshgrid(*[np.arange(x) for x in self.shape], sparse=True, | ||
indexing='ij') | ||
idx[axis] = sidx | ||
tmp_mask = self._mask[idx].flat | ||
tmp_data = self._data[idx].flat | ||
self._data.flat = tmp_data | ||
self._mask.flat = tmp_mask | ||
return | ||
self[...] = self[idx] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In theory we could do a little better here, by implementing the in-place sort in terms of the non-in place. Right now (and before this patch), the latter does a redundant copy. So either way, best left for another PR, I think |
||
|
||
def min(self, axis=None, out=None, fill_value=None, keepdims=np._NoValue): | ||
""" | ||
|
@@ -6500,49 +6502,33 @@ def power(a, b, third=None): | |
result._data[invalid] = result.fill_value | ||
return result | ||
|
||
|
||
def argsort(a, axis=None, kind='quicksort', order=None, fill_value=None): | ||
"Function version of the eponymous method." | ||
if fill_value is None: | ||
fill_value = default_fill_value(a) | ||
d = filled(a, fill_value) | ||
if axis is None: | ||
return d.argsort(kind=kind, order=order) | ||
return d.argsort(axis, kind=kind, order=order) | ||
argsort.__doc__ = MaskedArray.argsort.__doc__ | ||
|
||
argmin = _frommethod('argmin') | ||
argmax = _frommethod('argmax') | ||
|
||
def argsort(a, axis=None, kind='quicksort', order=None, endwith=True, fill_value=None): | ||
"Function version of the eponymous method." | ||
a = np.asanyarray(a) | ||
|
||
if isinstance(a, MaskedArray): | ||
return a.argsort(axis=axis, kind=kind, order=order, | ||
endwith=endwith, fill_value=fill_value) | ||
else: | ||
return a.argsort(axis=axis, kind=kind, order=order) | ||
argsort.__doc__ = MaskedArray.argsort.__doc__ | ||
|
||
def sort(a, axis=-1, kind='quicksort', order=None, endwith=True, fill_value=None): | ||
"Function version of the eponymous method." | ||
a = narray(a, copy=True, subok=True) | ||
a = np.array(a, copy=True, subok=True) | ||
if axis is None: | ||
a = a.flatten() | ||
axis = 0 | ||
if fill_value is None: | ||
if endwith: | ||
# nan > inf | ||
if np.issubdtype(a.dtype, np.floating): | ||
filler = np.nan | ||
else: | ||
filler = minimum_fill_value(a) | ||
else: | ||
filler = maximum_fill_value(a) | ||
else: | ||
filler = fill_value | ||
|
||
sindx = filled(a, filler).argsort(axis=axis, kind=kind, order=order) | ||
|
||
# save meshgrid memory for 1d arrays | ||
if a.ndim == 1: | ||
indx = sindx | ||
if isinstance(a, MaskedArray): | ||
a.sort(axis=axis, kind=kind, order=order, | ||
endwith=endwith, fill_value=fill_value) | ||
else: | ||
indx = np.meshgrid(*[np.arange(x) for x in a.shape], sparse=True, | ||
indexing='ij') | ||
indx[axis] = sindx | ||
return a[indx] | ||
a.sort(axis=axis, kind=kind, order=order) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There's a choice we need to make here. Right now (and before this patch), Should we change it to always promote to |
||
return a | ||
sort.__doc__ = MaskedArray.sort.__doc__ | ||
|
||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't really like these shortcuts very much, the explicit meshgrid is easier to read imo
there isn't really an advantage to using
ix_
is there?Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this really a shortcut though? The two functions are independant, with neither calling the other. Meshgrid doesn't do what we need by default, but
ix_
does.Furthermore,
ix_
is used as an indexer in its examples, whereas meshgrid is used to evaluate functions. The former use is what we want here.