Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
53 commits
Select commit Hold shift + click to select a range
2933930
ENH add hash based unique
adrinjalali Mar 14, 2024
8da2e72
getting closer
adrinjalali Mar 14, 2024
961ef5b
trying to expose as a module
adrinjalali Mar 15, 2024
a6b1847
trying to create a module
adrinjalali Mar 17, 2024
1f1c36c
fix build
adrinjalali Mar 18, 2024
0bc43c3
...
adrinjalali Mar 18, 2024
f94cf89
Merge remote-tracking branch 'upstream/main' into unique-cpp
adrinjalali Mar 18, 2024
a37b151
segfault fix, imported numpy
adrinjalali Mar 19, 2024
8f42b0b
getting unique back, refcount issues exist
adrinjalali Mar 19, 2024
f56634f
unique works
adrinjalali Mar 19, 2024
bce7534
remove header
adrinjalali Mar 19, 2024
0c6c588
cleanups and comments
adrinjalali Mar 21, 2024
9b7d6f6
change type
adrinjalali Mar 21, 2024
85cf692
fix for initialization issue
adrinjalali Mar 21, 2024
3db3349
trying to move module
adrinjalali Mar 22, 2024
8d4b6be
Merge remote-tracking branch 'upstream/main' into unique-cpp
adrinjalali May 13, 2024
9e7d671
Revert "trying to move module"
adrinjalali May 13, 2024
a4e8a29
use unordered_set and use a finally construct to handle exceptions
adrinjalali Sep 19, 2024
6a8c69c
Merge remote-tracking branch 'upstream/main' into unique-cpp
adrinjalali Sep 19, 2024
0c3b889
handle C++ exceptions, and use explicit types
adrinjalali Sep 20, 2024
cc39a50
make it C importable
adrinjalali Sep 23, 2024
92adb26
add missing header file
adrinjalali Sep 23, 2024
8b7ad2e
fix skip API test
adrinjalali Sep 23, 2024
8f95240
rename _core.unique to _core._unique
adrinjalali Sep 23, 2024
1d0c596
use _unique name
adrinjalali Sep 23, 2024
ed4ea89
Merge remote-tracking branch 'upstream/main' into unique-cpp
adrinjalali Jan 9, 2025
8adbf70
add freethreaded slot
adrinjalali Jan 9, 2025
a8e69ff
apply comments from review
adrinjalali Jan 14, 2025
cdf3af9
Merge remote-tracking branch 'upstream/main' into unique-cpp
adrinjalali Jan 14, 2025
fc1d50e
remove own module, fix segfault
adrinjalali Jan 14, 2025
5dbdf48
raise NotImplementedError instead of returning None
adrinjalali Jan 14, 2025
c8b9d22
release and regrab GIL
adrinjalali Jan 19, 2025
a9df742
fix GIL issues and compile separately
adrinjalali Jan 20, 2025
5333e80
Merge remote-tracking branch 'upstream/main' into unique-cpp
adrinjalali Jan 20, 2025
3bb7c97
add np_core_dep dependency, hoping it fixes the issue
adrinjalali Jan 23, 2025
95a577b
remove include
adrinjalali Jan 23, 2025
724b794
debug ...
adrinjalali Jan 23, 2025
1abc6b5
debug ...
adrinjalali Jan 23, 2025
214cd06
Py_INCREF needs the GIL
adrinjalali Jan 23, 2025
8c184ab
Merge remote-tracking branch 'upstream/main' into unique-cpp
adrinjalali Jan 23, 2025
113e021
revert debug info in CI
adrinjalali Jan 23, 2025
c733f75
Merge highway submodule changes from main
seberg Jan 24, 2025
ae0e936
Merge remote-tracking branch 'upstream/main' into unique-cpp
adrinjalali Feb 1, 2025
b50e7f3
reviews
adrinjalali Feb 20, 2025
712a5cf
add test for ValueError
adrinjalali Feb 20, 2025
8a45f04
changelog
adrinjalali Feb 20, 2025
4999daa
Merge remote-tracking branch 'upstream/main' into unique-cpp
adrinjalali Feb 20, 2025
ab86574
use macro to return notimplemented
adrinjalali Feb 21, 2025
08d7d62
Merge remote-tracking branch 'upstream/main' into unique-cpp
adrinjalali Feb 22, 2025
e1e2ddf
Apply suggestions from code review
seberg Feb 25, 2025
f96411a
MAINT,ENH: Smaller reorgs/maint and use `sorted=False` for `unique_va…
seberg Feb 25, 2025
2319947
Ensure we don't iterate if iterator is empty (also change thread stat…
seberg Feb 25, 2025
e188bf3
DOC: unique_values doc examples may have different order now
seberg Feb 25, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
MAINT,ENH: Smaller reorgs/maint and use sorted=False for `unique_va…
…lues`
  • Loading branch information
seberg committed Feb 25, 2025
commit f96411aa89949d1394647e849d9170890923bb32
7 changes: 7 additions & 0 deletions doc/release/upcoming_changes/26018.change.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
``unique_values`` may return unsorted data
------------------------------------------
The relatively new function (added in NumPy 2.0) ``unique_values`` may now
return unsorted results. Just as ``unique_counts`` and ``unique_all``
these never guaranteed a sorted result, however, the result
was sorted until now. In cases where these do return a sorted result, this
may change in future releases to improve performance.
2 changes: 1 addition & 1 deletion numpy/_core/src/multiarray/multiarraymodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -4565,7 +4565,7 @@ static struct PyMethodDef array_module_methods[] = {
{"from_dlpack", (PyCFunction)from_dlpack,
METH_FASTCALL | METH_KEYWORDS, NULL},
{"_unique_hash", (PyCFunction)array__unique_hash,
METH_VARARGS, "Collect unique values via a hash map."},
METH_O, "Collect unique values via a hash map."},
{NULL, NULL, 0, NULL} /* sentinel */
};

Expand Down
79 changes: 32 additions & 47 deletions numpy/_core/src/multiarray/unique.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,33 +39,29 @@ unique(PyArrayObject *self)
custom or complicated dtypes or string values.
*/
NPY_ALLOW_C_API_DEF;
NpyIter* iter;
NpyIter_IterNextFunc *iternext;
char** dataptr;
npy_intp* strideptr,* innersizeptr;
std::unordered_set<T> hashset;

iter = NpyIter_New(self, NPY_ITER_READONLY |
NPY_ITER_EXTERNAL_LOOP |
NPY_ITER_REFS_OK |
NPY_ITER_ZEROSIZE_OK |
NPY_ITER_GROWINNER,
NPY_KEEPORDER, NPY_NO_CASTING,
NULL);
NpyIter *iter = NpyIter_New(self, NPY_ITER_READONLY |
NPY_ITER_EXTERNAL_LOOP |
NPY_ITER_REFS_OK |
NPY_ITER_ZEROSIZE_OK |
NPY_ITER_GROWINNER,
NPY_KEEPORDER, NPY_NO_CASTING,
NULL);
// Making sure the iterator is deallocated when the function returns, with
// or w/o an exception
auto iter_dealloc = finally([&]() { NpyIter_Deallocate(iter); });
if (iter == NULL) {
return NULL;
}

iternext = NpyIter_GetIterNext(iter, NULL);
NpyIter_IterNextFunc *iternext = NpyIter_GetIterNext(iter, NULL);
if (iternext == NULL) {
return NULL;
}
dataptr = NpyIter_GetDataPtrArray(iter);
strideptr = NpyIter_GetInnerStrideArray(iter);
innersizeptr = NpyIter_GetInnerLoopSizePtr(iter);
char **dataptr = NpyIter_GetDataPtrArray(iter);
npy_intp *strideptr = NpyIter_GetInnerStrideArray(iter);
npy_intp *innersizeptr = NpyIter_GetInnerLoopSizePtr(iter);

// release the GIL
PyThreadState *_save;
Expand All @@ -85,15 +81,16 @@ unique(PyArrayObject *self)
}
} while(iternext(iter));

npy_intp dims[1] = {(npy_intp)hashset.size()};
PyArray_Descr *descr = PyArray_DESCR(self);
npy_intp length = hashset.size();

NPY_ALLOW_C_API;
PyArray_Descr *descr = PyArray_DESCR(self);
Py_INCREF(descr);
PyObject *res_obj = PyArray_NewFromDescr(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If there was a C++ exception after this, we would leak it... But, I don't think that is possible, so let's not worry about it.

&PyArray_Type,
descr,
1, // ndim
dims, // shape
&length, // shape
NULL, // strides
NULL, // data
// This flag is needed to be able to call .sort on it.
Expand Down Expand Up @@ -143,46 +140,34 @@ std::unordered_map<int, function_type> unique_funcs = {
};


/**
* Python exposed implementation of `_unique_hash`.
*
* This is a C only function wrapping code that may cause C++ exceptions into
* try/catch.
*
* @param arr NumPy array to find the unique values of.
* @return Base-class NumPy array with unique values, `NotImplemented` if the
* type is unsupported or `NULL` with an error set.
*/
extern "C" NPY_NO_EXPORT PyObject *
array__unique_hash(PyObject *NPY_UNUSED(dummy), PyObject *args)
array__unique_hash(PyObject *NPY_UNUSED(module), PyObject *arr_obj)
{
/* This is called from Python space, and expects a single numpy array as input.

It then returns a numpy array containing the unique values of the input array.

If the input array is not supported, it returns None.
*/
// this is to allow grabbing the GIL before raising a python exception.


PyArrayObject *self = NULL;
PyObject *res = NULL;
if (!PyArg_ParseTuple(args, "O&", PyArray_Converter, &self)) {
if (!PyArray_Check(arr_obj)) {
PyErr_SetString(PyExc_TypeError,
"_unique_hash() requires a NumPy array input.");
return NULL;
}
// Making sure the DECREF is called when the function returns, with
// or w/o an exception
auto self_decref = finally([&]() { Py_XDECREF(self); });
PyArrayObject *arr = (PyArrayObject *)arr_obj;

try {
/* Handle zero-sized arrays specially */
if (PyArray_SIZE(self) == 0) {
return PyArray_NewLikeArray(
self,
NPY_ANYORDER,
NULL, // descr (use prototype's descr)
0 // subok (function always returns base-class)
);
}

auto type = PyArray_TYPE(self);
auto type = PyArray_TYPE(arr);
// we only support data types present in our unique_funcs map
if (unique_funcs.find(type) == unique_funcs.end()) {
Py_RETURN_NOTIMPLEMENTED;
}

res = unique_funcs[type](self);
return res;
return unique_funcs[type](arr);
}
catch (const std::bad_alloc &e) {
PyErr_NoMemory();
Expand Down
12 changes: 6 additions & 6 deletions numpy/lib/_arraysetops_impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@

"""
import functools
from types import NotImplementedType
import warnings
from typing import NamedTuple

Expand Down Expand Up @@ -375,7 +374,7 @@ def _unique1d(ar, return_index=False, return_inverse=False,
conv = _array_converter(ar)
ar_, = conv

if type(hash_unique := _unique_hash(ar_)) != NotImplementedType:
if (hash_unique := _unique_hash(ar_)) is not NotImplemented:
if sorted:
hash_unique.sort()
# We wrap the result back in case it was a subclass of numpy.ndarray.
Expand Down Expand Up @@ -492,7 +491,7 @@ def unique_all(x):
return_index=True,
return_inverse=True,
return_counts=True,
equal_nan=False
equal_nan=False,
)
return UniqueAllResult(*result)

Expand Down Expand Up @@ -544,7 +543,7 @@ def unique_counts(x):
return_index=False,
return_inverse=False,
return_counts=True,
equal_nan=False
equal_nan=False,
)
return UniqueCountsResult(*result)

Expand Down Expand Up @@ -597,7 +596,7 @@ def unique_inverse(x):
return_index=False,
return_inverse=True,
return_counts=False,
equal_nan=False
equal_nan=False,
)
return UniqueInverseResult(*result)

Expand Down Expand Up @@ -641,7 +640,8 @@ def unique_values(x):
return_index=False,
return_inverse=False,
return_counts=False,
equal_nan=False
equal_nan=False,
sorted=False,
)


Expand Down
Loading