From c6e393887acb5f13020f81031b07a484e3c8473c Mon Sep 17 00:00:00 2001 From: OmarManzoor Date: Fri, 3 Mar 2023 15:24:10 +0500 Subject: [PATCH 1/4] MAINT replace cnp.ndarray with memory views in _fast_dict --- sklearn/utils/_fast_dict.pyx | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/sklearn/utils/_fast_dict.pyx b/sklearn/utils/_fast_dict.pyx index 74aaa16b020eb..41b944d75ab11 100644 --- a/sklearn/utils/_fast_dict.pyx +++ b/sklearn/utils/_fast_dict.pyx @@ -12,13 +12,6 @@ from libcpp.map cimport map as cpp_map import numpy as np -# Import the C-level symbols of numpy -cimport numpy as cnp - -# Numpy must be initialized. When using numpy from C or Cython you must -# _always_ do that, or you will have segfaults -cnp.import_array() - #DTYPE = np.float64 #ctypedef cnp.float64_t DTYPE_t @@ -35,8 +28,11 @@ cnp.import_array() cdef class IntFloatDict: - def __init__(self, cnp.ndarray[ITYPE_t, ndim=1] keys, - cnp.ndarray[DTYPE_t, ndim=1] values): + def __init__( + self, + ITYPE_t[:] keys, + DTYPE_t[:] values, + ): cdef int i cdef int size = values.size # Should check that sizes for keys and values are equal, and @@ -91,10 +87,8 @@ cdef class IntFloatDict: The values of the data points """ cdef int size = self.my_map.size() - cdef cnp.ndarray[ITYPE_t, ndim=1] keys = np.empty(size, - dtype=np.intp) - cdef cnp.ndarray[DTYPE_t, ndim=1] values = np.empty(size, - dtype=np.float64) + cdef ITYPE_t[:] keys = np.empty(size, dtype=np.intp) + cdef DTYPE_t[:] values = np.empty(size, dtype=np.float64) self._to_arrays(keys, values) return keys, values From 3d975547c1a8ea5d0e536501f40bd28d7371e7fd Mon Sep 17 00:00:00 2001 From: OmarManzoor Date: Fri, 3 Mar 2023 18:00:02 +0500 Subject: [PATCH 2/4] Remove the method to_arrays which was not used anywhere --- sklearn/utils/_fast_dict.pyx | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/sklearn/utils/_fast_dict.pyx b/sklearn/utils/_fast_dict.pyx index 41b944d75ab11..b959cdbabf05e 100644 --- a/sklearn/utils/_fast_dict.pyx +++ b/sklearn/utils/_fast_dict.pyx @@ -75,23 +75,6 @@ cdef class IntFloatDict: value = values[idx] yield key, value - def to_arrays(self): - """Return the key, value representation of the IntFloatDict - object. - - Returns - ======= - keys : ndarray, shape (n_items, ), dtype=int - The indices of the data points - values : ndarray, shape (n_items, ), dtype=float - The values of the data points - """ - cdef int size = self.my_map.size() - cdef ITYPE_t[:] keys = np.empty(size, dtype=np.intp) - cdef DTYPE_t[:] values = np.empty(size, dtype=np.float64) - self._to_arrays(keys, values) - return keys, values - cdef _to_arrays(self, ITYPE_t [:] keys, DTYPE_t [:] values): # Internal version of to_arrays that takes already-initialized arrays cdef cpp_map[ITYPE_t, DTYPE_t].iterator it = self.my_map.begin() From e7946fd7f6bb4f2b48f7bb7a54d37c175247067d Mon Sep 17 00:00:00 2001 From: OmarManzoor Date: Fri, 3 Mar 2023 21:04:41 +0500 Subject: [PATCH 3/4] Add the to_arrays method back, fix its return values and add a test --- sklearn/utils/_fast_dict.pyx | 17 +++++++++++++++++ sklearn/utils/tests/test_fast_dict.py | 14 ++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/sklearn/utils/_fast_dict.pyx b/sklearn/utils/_fast_dict.pyx index b959cdbabf05e..4521d0ef08768 100644 --- a/sklearn/utils/_fast_dict.pyx +++ b/sklearn/utils/_fast_dict.pyx @@ -75,6 +75,23 @@ cdef class IntFloatDict: value = values[idx] yield key, value + def to_arrays(self): + """Return the key, value representation of the IntFloatDict + object. + + Returns + ======= + keys : ndarray, shape (n_items, ), dtype=int + The indices of the data points + values : ndarray, shape (n_items, ), dtype=float + The values of the data points + """ + cdef int size = self.my_map.size() + cdef ITYPE_t[:] keys = np.empty(size, dtype=np.intp) + cdef DTYPE_t[:] values = np.empty(size, dtype=np.float64) + self._to_arrays(keys, values) + return np.asarray(keys), np.asarray(values) + cdef _to_arrays(self, ITYPE_t [:] keys, DTYPE_t [:] values): # Internal version of to_arrays that takes already-initialized arrays cdef cpp_map[ITYPE_t, DTYPE_t].iterator it = self.my_map.begin() diff --git a/sklearn/utils/tests/test_fast_dict.py b/sklearn/utils/tests/test_fast_dict.py index 050df133a2d24..0d026da52e5ec 100644 --- a/sklearn/utils/tests/test_fast_dict.py +++ b/sklearn/utils/tests/test_fast_dict.py @@ -1,6 +1,7 @@ """ Test fast_dict. """ import numpy as np +from numpy.testing import assert_array_equal, assert_allclose from sklearn.utils._fast_dict import IntFloatDict, argmin @@ -29,3 +30,16 @@ def test_int_float_dict_argmin(): values = np.arange(100, dtype=np.float64) d = IntFloatDict(keys, values) assert argmin(d) == (0, 0) + + +def test_to_arrays(): + keys_in = np.array([1, 2, 3], dtype=np.intp) + values_in = np.array([4, 5, 6], dtype=np.float64) + + d = IntFloatDict(keys_in, values_in) + keys_out, values_out = d.to_arrays() + + assert keys_out.dtype == keys_in.dtype + assert values_in.dtype == values_out.dtype + assert_array_equal(keys_out, keys_in) + assert_allclose(values_out, values_in) From 6534ee177cb8c68f267b8921c0896a74af2ee397 Mon Sep 17 00:00:00 2001 From: OmarManzoor Date: Sat, 4 Mar 2023 12:30:57 +0500 Subject: [PATCH 4/4] Address PR suggestions --- sklearn/utils/_fast_dict.pyx | 6 +++--- sklearn/utils/tests/test_fast_dict.py | 2 ++ 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/sklearn/utils/_fast_dict.pyx b/sklearn/utils/_fast_dict.pyx index 4521d0ef08768..5fe642b14c626 100644 --- a/sklearn/utils/_fast_dict.pyx +++ b/sklearn/utils/_fast_dict.pyx @@ -87,10 +87,10 @@ cdef class IntFloatDict: The values of the data points """ cdef int size = self.my_map.size() - cdef ITYPE_t[:] keys = np.empty(size, dtype=np.intp) - cdef DTYPE_t[:] values = np.empty(size, dtype=np.float64) + keys = np.empty(size, dtype=np.intp) + values = np.empty(size, dtype=np.float64) self._to_arrays(keys, values) - return np.asarray(keys), np.asarray(values) + return keys, values cdef _to_arrays(self, ITYPE_t [:] keys, DTYPE_t [:] values): # Internal version of to_arrays that takes already-initialized arrays diff --git a/sklearn/utils/tests/test_fast_dict.py b/sklearn/utils/tests/test_fast_dict.py index 0d026da52e5ec..96c14068f0db1 100644 --- a/sklearn/utils/tests/test_fast_dict.py +++ b/sklearn/utils/tests/test_fast_dict.py @@ -33,6 +33,8 @@ def test_int_float_dict_argmin(): def test_to_arrays(): + # Test that an IntFloatDict is converted into arrays + # of keys and values correctly keys_in = np.array([1, 2, 3], dtype=np.intp) values_in = np.array([4, 5, 6], dtype=np.float64)