Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit cb85791

Browse files
committed
Adapt cython submodule for heaps
Move neighbors.NeighborsHeap's code and _typedefs under sklearn.utils as cyclic imports are currently happening between sklearn.neighbors and sklearn.metrics. Also, using integral in some cases gave unexpected results. Occurences were changed to use np.int_p, as exposed by utils._typedefs.ITYPE_t (we don't need signed integer)
1 parent 03e516f commit cb85791

File tree

13 files changed

+250
-275
lines changed

13 files changed

+250
-275
lines changed

sklearn/metrics/_argkmin_fast.pyx

Lines changed: 69 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ cimport numpy as np
1313
from libc.math cimport floor, sqrt
1414
from libc.stdlib cimport free, malloc
1515

16-
from cython cimport floating, integral
16+
from cython cimport floating
1717
from cython.parallel cimport parallel, prange
1818

1919
DEF CHUNK_SIZE = 256 # number of vectors
@@ -22,7 +22,6 @@ DEF MIN_CHUNK_SAMPLES = 20
2222

2323
DEF FLOAT_INF = 1e36
2424

25-
from ..neighbors._neighbors_heap cimport _simultaneous_sort, _push
2625
from ..utils._cython_blas cimport (
2726
BLAS_Order,
2827
BLAS_Trans,
@@ -32,7 +31,11 @@ from ..utils._cython_blas cimport (
3231
Trans,
3332
_gemm,
3433
)
34+
35+
from ..utils._heap cimport _simultaneous_sort, _push
3536
from ..utils._openmp_helpers import _openmp_effective_n_threads
37+
from ..utils._typedefs cimport ITYPE_t
38+
from ..utils._typedefs import ITYPE
3639

3740

3841
### argkmin helpers
@@ -43,10 +46,10 @@ cdef void _argkmin_on_chunk(
4346
floating[::1] Y_sq_norms, # IN
4447
floating *dist_middle_terms, # IN
4548
floating *heaps_red_distances, # IN/OUT
46-
integral *heaps_indices, # IN/OUT
47-
integral k, # IN
49+
ITYPE_t *heaps_indices, # IN/OUT
50+
ITYPE_t k, # IN
4851
# ID of the first element of Y_c
49-
integral Y_idx_offset,
52+
ITYPE_t Y_idx_offset,
5053
) nogil:
5154
"""
5255
Critical part of the computation of pairwise distances.
@@ -55,7 +58,7 @@ cdef void _argkmin_on_chunk(
5558
on the gemm-trick.
5659
"""
5760
cdef:
58-
integral i, j
61+
ITYPE_t i, j
5962
# Instead of computing the full pairwise squared distances matrix,
6063
# ||X_c - Y_c||² = ||X_c||² - 2 X_c.Y_c^T + ||Y_c||²,
6164
# we only need to store the - 2 X_c.Y_c^T + ||Y_c||²
@@ -91,44 +94,44 @@ cdef int _argkmin_on_X(
9194
floating[:, ::1] X, # IN
9295
floating[:, ::1] Y, # IN
9396
floating[::1] Y_sq_norms, # IN
94-
integral chunk_size, # IN
95-
integral effective_n_threads, # IN
96-
integral[:, ::1] argkmin_indices, # OUT
97+
ITYPE_t chunk_size, # IN
98+
ITYPE_t effective_n_threads, # IN
99+
ITYPE_t[:, ::1] argkmin_indices, # OUT
97100
floating[:, ::1] argkmin_red_distances, # OUT
98101
) nogil:
99102
"""Computes the argkmin of each vector (row) of X on Y
100103
by parallelising computation on chunks of X.
101104
"""
102105
cdef:
103-
integral k = argkmin_indices.shape[1]
104-
integral d = X.shape[1]
105-
integral sf = sizeof(floating)
106-
integral si = sizeof(integral)
107-
integral n_samples_chunk = max(MIN_CHUNK_SAMPLES, chunk_size)
108-
109-
integral n_train = Y.shape[0]
110-
integral Y_n_samples_chunk = min(n_train, n_samples_chunk)
111-
integral Y_n_full_chunks = n_train / Y_n_samples_chunk
112-
integral Y_n_samples_rem = n_train % Y_n_samples_chunk
113-
114-
integral n_test = X.shape[0]
115-
integral X_n_samples_chunk = min(n_test, n_samples_chunk)
116-
integral X_n_full_chunks = n_test // X_n_samples_chunk
117-
integral X_n_samples_rem = n_test % X_n_samples_chunk
106+
ITYPE_t k = argkmin_indices.shape[1]
107+
ITYPE_t d = X.shape[1]
108+
ITYPE_t sf = sizeof(floating)
109+
ITYPE_t si = sizeof(ITYPE_t)
110+
ITYPE_t n_samples_chunk = max(MIN_CHUNK_SAMPLES, chunk_size)
111+
112+
ITYPE_t n_train = Y.shape[0]
113+
ITYPE_t Y_n_samples_chunk = min(n_train, n_samples_chunk)
114+
ITYPE_t Y_n_full_chunks = n_train / Y_n_samples_chunk
115+
ITYPE_t Y_n_samples_rem = n_train % Y_n_samples_chunk
116+
117+
ITYPE_t n_test = X.shape[0]
118+
ITYPE_t X_n_samples_chunk = min(n_test, n_samples_chunk)
119+
ITYPE_t X_n_full_chunks = n_test // X_n_samples_chunk
120+
ITYPE_t X_n_samples_rem = n_test % X_n_samples_chunk
118121

119122
# Counting remainder chunk in total number of chunks
120-
integral Y_n_chunks = Y_n_full_chunks + (
123+
ITYPE_t Y_n_chunks = Y_n_full_chunks + (
121124
n_train != (Y_n_full_chunks * Y_n_samples_chunk)
122125
)
123126

124-
integral X_n_chunks = X_n_full_chunks + (
127+
ITYPE_t X_n_chunks = X_n_full_chunks + (
125128
n_test != (X_n_full_chunks * X_n_samples_chunk)
126129
)
127130

128-
integral num_threads = min(Y_n_chunks, effective_n_threads)
131+
ITYPE_t num_threads = min(Y_n_chunks, effective_n_threads)
129132

130-
integral Y_start, Y_end, X_start, X_end
131-
integral X_chunk_idx, Y_chunk_idx, idx, jdx
133+
ITYPE_t Y_start, Y_end, X_start, X_end
134+
ITYPE_t X_chunk_idx, Y_chunk_idx, idx, jdx
132135

133136
floating *dist_middle_terms_chunks
134137
floating *heaps_red_distances_chunks
@@ -190,9 +193,9 @@ cdef int _argkmin_on_Y(
190193
floating[:, ::1] X, # IN
191194
floating[:, ::1] Y, # IN
192195
floating[::1] Y_sq_norms, # IN
193-
integral chunk_size, # IN
194-
integral effective_n_threads, # IN
195-
integral[:, ::1] argkmin_indices, # OUT
196+
ITYPE_t chunk_size, # IN
197+
ITYPE_t effective_n_threads, # IN
198+
ITYPE_t[:, ::1] argkmin_indices, # OUT
196199
floating[:, ::1] argkmin_red_distances, # OUT
197200
) nogil:
198201
"""Computes the argkmin of each vector (row) of X on Y
@@ -203,43 +206,43 @@ cdef int _argkmin_on_Y(
203206
most contexts.
204207
"""
205208
cdef:
206-
integral k = argkmin_indices.shape[1]
207-
integral d = X.shape[1]
208-
integral sf = sizeof(floating)
209-
integral si = sizeof(integral)
210-
integral n_samples_chunk = max(MIN_CHUNK_SAMPLES, chunk_size)
211-
212-
integral n_train = Y.shape[0]
213-
integral Y_n_samples_chunk = min(n_train, n_samples_chunk)
214-
integral Y_n_full_chunks = n_train / Y_n_samples_chunk
215-
integral Y_n_samples_rem = n_train % Y_n_samples_chunk
216-
217-
integral n_test = X.shape[0]
218-
integral X_n_samples_chunk = min(n_test, n_samples_chunk)
219-
integral X_n_full_chunks = n_test // X_n_samples_chunk
220-
integral X_n_samples_rem = n_test % X_n_samples_chunk
209+
ITYPE_t k = argkmin_indices.shape[1]
210+
ITYPE_t d = X.shape[1]
211+
ITYPE_t sf = sizeof(floating)
212+
ITYPE_t si = sizeof(ITYPE_t)
213+
ITYPE_t n_samples_chunk = max(MIN_CHUNK_SAMPLES, chunk_size)
214+
215+
ITYPE_t n_train = Y.shape[0]
216+
ITYPE_t Y_n_samples_chunk = min(n_train, n_samples_chunk)
217+
ITYPE_t Y_n_full_chunks = n_train / Y_n_samples_chunk
218+
ITYPE_t Y_n_samples_rem = n_train % Y_n_samples_chunk
219+
220+
ITYPE_t n_test = X.shape[0]
221+
ITYPE_t X_n_samples_chunk = min(n_test, n_samples_chunk)
222+
ITYPE_t X_n_full_chunks = n_test // X_n_samples_chunk
223+
ITYPE_t X_n_samples_rem = n_test % X_n_samples_chunk
221224

222225
# Counting remainder chunk in total number of chunks
223-
integral Y_n_chunks = Y_n_full_chunks + (
226+
ITYPE_t Y_n_chunks = Y_n_full_chunks + (
224227
n_train != (Y_n_full_chunks * Y_n_samples_chunk)
225228
)
226229

227-
integral X_n_chunks = X_n_full_chunks + (
230+
ITYPE_t X_n_chunks = X_n_full_chunks + (
228231
n_test != (X_n_full_chunks * X_n_samples_chunk)
229232
)
230233

231-
integral num_threads = min(Y_n_chunks, effective_n_threads)
234+
ITYPE_t num_threads = min(Y_n_chunks, effective_n_threads)
232235

233-
integral Y_start, Y_end, X_start, X_end
234-
integral X_chunk_idx, Y_chunk_idx, idx, jdx
236+
ITYPE_t Y_start, Y_end, X_start, X_end
237+
ITYPE_t X_chunk_idx, Y_chunk_idx, idx, jdx
235238

236239
floating *dist_middle_terms_chunks
237240
floating *heaps_red_distances_chunks
238241

239242
# As chunks of X are shared across threads, so must their
240243
# heaps. To solve this, each thread has its own locals
241244
# heaps which are then synchronised back in the main ones.
242-
integral *heaps_indices_chunks
245+
ITYPE_t *heaps_indices_chunks
243246

244247
for X_chunk_idx in range(X_n_chunks):
245248
X_start = X_chunk_idx * X_n_samples_chunk
@@ -256,7 +259,7 @@ cdef int _argkmin_on_Y(
256259
Y_n_samples_chunk * X_n_samples_chunk * sf)
257260
heaps_red_distances_chunks = <floating*> malloc(
258261
X_n_samples_chunk * k * sf)
259-
heaps_indices_chunks = <integral*> malloc(
262+
heaps_indices_chunks = <ITYPE_t*> malloc(
260263
X_n_samples_chunk * k * sf)
261264

262265
# Initialising heaps (memset can't be used here)
@@ -318,13 +321,13 @@ cdef int _argkmin_on_Y(
318321
cdef inline floating _euclidean_dist(
319322
floating[:, ::1] X,
320323
floating[:, ::1] Y,
321-
integral i,
322-
integral j,
324+
ITYPE_t i,
325+
ITYPE_t j,
323326
) nogil:
324327
cdef:
325328
floating dist = 0
326-
integral k
327-
integral upper_unrolled_idx = (X.shape[1] // 4) * 4
329+
ITYPE_t k
330+
ITYPE_t upper_unrolled_idx = (X.shape[1] // 4) * 4
328331

329332
# Unrolling loop to help with vectorisation
330333
for k in range(0, upper_unrolled_idx, 4):
@@ -341,8 +344,8 @@ cdef inline floating _euclidean_dist(
341344
cdef int _exact_euclidean_dist(
342345
floating[:, ::1] X, # IN
343346
floating[:, ::1] Y, # IN
344-
integral[:, ::1] Y_indices, # IN
345-
integral effective_n_threads, # IN
347+
ITYPE_t[:, ::1] Y_indices, # IN
348+
ITYPE_t effective_n_threads, # IN
346349
floating[:, ::1] distances, # OUT
347350
) nogil:
348351
"""
@@ -356,7 +359,7 @@ cdef int _exact_euclidean_dist(
356359
but we use a function to have a cdef nogil context.
357360
"""
358361
cdef:
359-
integral i, k
362+
ITYPE_t i, k
360363

361364
for i in prange(X.shape[0], schedule='static',
362365
nogil=True, num_threads=effective_n_threads):
@@ -370,8 +373,8 @@ cdef int _exact_euclidean_dist(
370373
def _argkmin(
371374
floating[:, ::1] X,
372375
floating[:, ::1] Y,
373-
integral k,
374-
integral chunk_size = CHUNK_SIZE,
376+
ITYPE_t k,
377+
ITYPE_t chunk_size = CHUNK_SIZE,
375378
str strategy = "auto",
376379
bint return_distance = False,
377380
):
@@ -419,13 +422,13 @@ def _argkmin(
419422
int_dtype = np.intp
420423
float_dtype = np.float32 if floating is float else np.float64
421424
cdef:
422-
integral[:, ::1] argkmin_indices = np.full((X.shape[0], k), 0,
423-
dtype=int_dtype)
425+
ITYPE_t[:, ::1] argkmin_indices = np.full((X.shape[0], k), 0,
426+
dtype=ITYPE)
424427
floating[:, ::1] argkmin_distances = np.full((X.shape[0], k),
425428
FLOAT_INF,
426429
dtype=float_dtype)
427430
floating[::1] Y_sq_norms = np.einsum('ij,ij->i', Y, Y)
428-
integral effective_n_threads = _openmp_effective_n_threads()
431+
ITYPE_t effective_n_threads = _openmp_effective_n_threads()
429432

430433
if strategy == 'auto':
431434
# This is a simple heuristic whose constant for the

0 commit comments

Comments
 (0)