Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit a943f79

Browse files
committed
ENH Add the fused CSR dense case for Euclidean Specializations
1 parent 2459331 commit a943f79

File tree

6 files changed

+235
-59
lines changed

6 files changed

+235
-59
lines changed

sklearn/metrics/_pairwise_distances_reduction/_argkmin.pyx.tp

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -61,10 +61,7 @@ cdef class ArgKmin{{name_suffix}}(BaseDistancesReduction{{name_suffix}}):
6161

6262
No instance should directly be created outside of this class method.
6363
"""
64-
if (
65-
metric in ("euclidean", "sqeuclidean")
66-
and not (issparse(X) ^ issparse(Y)) # "^" is the XOR operator
67-
):
64+
if metric in ("euclidean", "sqeuclidean"):
6865
# Specialized implementation of ArgKmin for the Euclidean distance
6966
# for the dense-dense and sparse-sparse cases.
7067
# This implementation computes the distances by chunk using

sklearn/metrics/_pairwise_distances_reduction/_dispatcher.py

Lines changed: 1 addition & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -119,26 +119,7 @@ def is_valid_sparse_matrix(X):
119119
and metric in cls.valid_metrics()
120120
)
121121

122-
# The other joblib-based back-end might be more efficient on fused sparse-dense
123-
# datasets' pairs on metric="(sq)euclidean" for some configurations because it
124-
# uses the Squared Euclidean matrix decomposition, i.e.:
125-
#
126-
# ||X_c_i - Y_c_j||² = ||X_c_i||² - 2 X_c_i.Y_c_j^T + ||Y_c_j||²
127-
#
128-
# calling efficient sparse-dense routines for matrix and vectors multiplication
129-
# implemented in SciPy we do not use yet here.
130-
# See: https://github.com/scikit-learn/scikit-learn/pull/23585#issuecomment-1247996669 # noqa
131-
# TODO: implement specialisation for (sq)euclidean on fused sparse-dense
132-
# using sparse-dense routines for matrix-vector multiplications.
133-
# Currently, only dense-dense and sparse-sparse are optimized for
134-
# the Euclidean case.
135-
fused_sparse_dense_euclidean_case_guard = not (
136-
(is_valid_sparse_matrix(X) ^ is_valid_sparse_matrix(Y)) # "^" is XOR
137-
and isinstance(metric, str)
138-
and "euclidean" in metric
139-
)
140-
141-
return is_usable and fused_sparse_dense_euclidean_case_guard
122+
return is_usable
142123

143124
@classmethod
144125
@abstractmethod

sklearn/metrics/_pairwise_distances_reduction/_middle_term_computer.pxd.tp

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,4 +186,41 @@ cdef class SparseSparseMiddleTermComputer{{name_suffix}}(MiddleTermComputer{{nam
186186
) nogil
187187

188188

189+
cdef class SparseDenseMiddleTermComputer{{name_suffix}}(MiddleTermComputer{{name_suffix}}):
190+
cdef:
191+
const DTYPE_t[:] X_data
192+
const SPARSE_INDEX_TYPE_t[:] X_indices
193+
const SPARSE_INDEX_TYPE_t[:] X_indptr
194+
195+
const DTYPE_t[:, ::1] Y
196+
197+
bint c_ordered_middle_term
198+
199+
cdef void _parallel_on_X_pre_compute_and_reduce_distances_on_chunks(
200+
self,
201+
ITYPE_t X_start,
202+
ITYPE_t X_end,
203+
ITYPE_t Y_start,
204+
ITYPE_t Y_end,
205+
ITYPE_t thread_num
206+
) nogil
207+
208+
cdef void _parallel_on_Y_pre_compute_and_reduce_distances_on_chunks(
209+
self,
210+
ITYPE_t X_start,
211+
ITYPE_t X_end,
212+
ITYPE_t Y_start,
213+
ITYPE_t Y_end,
214+
ITYPE_t thread_num
215+
) nogil
216+
217+
cdef DTYPE_t * _compute_dist_middle_terms(
218+
self,
219+
ITYPE_t X_start,
220+
ITYPE_t X_end,
221+
ITYPE_t Y_start,
222+
ITYPE_t Y_end,
223+
ITYPE_t thread_num,
224+
) nogil
225+
189226
{{endfor}}

sklearn/metrics/_pairwise_distances_reduction/_middle_term_computer.pyx.tp

Lines changed: 144 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,37 @@ cdef void _middle_term_sparse_sparse_64(
7878
if X_i_col_idx == Y_j_col_idx:
7979
D[k] += -2 * X_data[X_i_ptr] * Y_data[Y_j_ptr]
8080

81+
# TODO: compare this routine with the similar ones in SciPy, especially
82+
# `csr_matvects` which might implement a better algorithm.
83+
# See: https://github.com/scipy/scipy/blob/e58292e066ba2cb2f3d1e0563ca9314ff1f4f311/scipy/sparse/sparsetools/csr.h#L1139-L1175 # noqa
84+
cdef void _middle_term_sparse_dense_64(
85+
const DTYPE_t[:] X_data,
86+
const SPARSE_INDEX_TYPE_t[:] X_indices,
87+
const SPARSE_INDEX_TYPE_t[:] X_indptr,
88+
ITYPE_t X_start,
89+
ITYPE_t X_end,
90+
const DTYPE_t[:, ::1] Y,
91+
ITYPE_t Y_start,
92+
ITYPE_t Y_end,
93+
bint c_ordered_middle_term,
94+
DTYPE_t * D,
95+
) nogil:
96+
# This routine assumes that D points to the first element of a
97+
# zeroed buffer of length at least equal to n_X × n_Y, conceptually
98+
# representing a 2-d C-ordered array.
99+
cdef:
100+
ITYPE_t i, j, k
101+
ITYPE_t n_X = X_end - X_start
102+
ITYPE_t n_Y = Y_end - Y_start
103+
ITYPE_t X_i_col_idx, X_i_ptr, Y_j_col_idx, Y_j_ptr
104+
105+
for i in range(n_X):
106+
for j in range(n_Y):
107+
k = i * n_Y + j if c_ordered_middle_term else j * n_X + i
108+
for X_i_ptr in range(X_indptr[X_start+i], X_indptr[X_start+i+1]):
109+
X_i_col_idx = X_indices[X_i_ptr]
110+
D[k] += -2 * X_data[X_i_ptr] * Y[Y_start + j, X_i_col_idx]
111+
81112

82113
{{for name_suffix, upcast_to_float64, INPUT_DTYPE_t, INPUT_DTYPE in implementation_specific_values}}
83114

@@ -111,7 +142,7 @@ cdef class MiddleTermComputer{{name_suffix}}:
111142
n_features,
112143
chunk_size,
113144
) -> MiddleTermComputer{{name_suffix}}:
114-
"""Return the DatasetsPair implementation for the given arguments.
145+
"""Return the MiddleTermComputer implementation for the given arguments.
115146

116147
Parameters
117148
----------
@@ -151,12 +182,34 @@ cdef class MiddleTermComputer{{name_suffix}}:
151182
n_features,
152183
chunk_size,
153184
)
154-
185+
if X_is_sparse and not Y_is_sparse:
186+
return SparseDenseMiddleTermComputer{{name_suffix}}(
187+
X,
188+
# TODO: remove cast
189+
Y.astype(np.float64, copy=False),
190+
effective_n_threads,
191+
chunks_n_threads,
192+
dist_middle_terms_chunks_size,
193+
n_features,
194+
chunk_size,
195+
c_ordered_middle_term=True
196+
)
197+
if not X_is_sparse and Y_is_sparse:
198+
return SparseDenseMiddleTermComputer{{name_suffix}}(
199+
Y,
200+
# TODO: remove cast
201+
X.astype(np.float64, copy=False),
202+
effective_n_threads,
203+
chunks_n_threads,
204+
dist_middle_terms_chunks_size,
205+
n_features,
206+
chunk_size,
207+
c_ordered_middle_term=False,
208+
)
155209
raise NotImplementedError(
156-
"X and Y must be both CSR sparse matrices or both numpy arrays."
210+
"X and Y must be CSR sparse matrices or numpy arrays."
157211
)
158212

159-
160213
@classmethod
161214
def unpack_csr_matrix(cls, X: csr_matrix):
162215
"""Ensure that the CSR matrix is indexed with SPARSE_INDEX_TYPE."""
@@ -494,5 +547,92 @@ cdef class SparseSparseMiddleTermComputer{{name_suffix}}(MiddleTermComputer{{nam
494547

495548
return dist_middle_terms
496549

550+
cdef class SparseDenseMiddleTermComputer{{name_suffix}}(MiddleTermComputer{{name_suffix}}):
551+
"""Middle term of the Euclidean distance between chunks of a CSR matrix and a np.ndarray.
552+
553+
The logic of the computation is wrapped in the routine _middle_term_sparse_dense_64.
554+
This routine iterates over the data, indices and indptr arrays of the sparse matrices
555+
without densifying them.
556+
"""
557+
558+
def __init__(
559+
self,
560+
X,
561+
Y,
562+
ITYPE_t effective_n_threads,
563+
ITYPE_t chunks_n_threads,
564+
ITYPE_t dist_middle_terms_chunks_size,
565+
ITYPE_t n_features,
566+
ITYPE_t chunk_size,
567+
bint c_ordered_middle_term,
568+
):
569+
super().__init__(
570+
effective_n_threads,
571+
chunks_n_threads,
572+
dist_middle_terms_chunks_size,
573+
n_features,
574+
chunk_size,
575+
)
576+
self.X_data, self.X_indices, self.X_indptr = self.unpack_csr_matrix(X)
577+
self.Y = Y
578+
self.c_ordered_middle_term = c_ordered_middle_term
579+
580+
cdef void _parallel_on_X_pre_compute_and_reduce_distances_on_chunks(
581+
self,
582+
ITYPE_t X_start,
583+
ITYPE_t X_end,
584+
ITYPE_t Y_start,
585+
ITYPE_t Y_end,
586+
ITYPE_t thread_num,
587+
) nogil:
588+
# Flush the thread dist_middle_terms_chunks to 0.0
589+
fill(
590+
self.dist_middle_terms_chunks[thread_num].begin(),
591+
self.dist_middle_terms_chunks[thread_num].end(),
592+
0.0,
593+
)
594+
595+
cdef void _parallel_on_Y_pre_compute_and_reduce_distances_on_chunks(
596+
self,
597+
ITYPE_t X_start,
598+
ITYPE_t X_end,
599+
ITYPE_t Y_start,
600+
ITYPE_t Y_end,
601+
ITYPE_t thread_num,
602+
) nogil:
603+
# Flush the thread dist_middle_terms_chunks to 0.0
604+
fill(
605+
self.dist_middle_terms_chunks[thread_num].begin(),
606+
self.dist_middle_terms_chunks[thread_num].end(),
607+
0.0,
608+
)
609+
610+
cdef DTYPE_t * _compute_dist_middle_terms(
611+
self,
612+
ITYPE_t X_start,
613+
ITYPE_t X_end,
614+
ITYPE_t Y_start,
615+
ITYPE_t Y_end,
616+
ITYPE_t thread_num,
617+
) nogil:
618+
cdef:
619+
DTYPE_t *dist_middle_terms = (
620+
self.dist_middle_terms_chunks[thread_num].data()
621+
)
622+
623+
_middle_term_sparse_dense_64(
624+
self.X_data,
625+
self.X_indices,
626+
self.X_indptr,
627+
X_start if self.c_ordered_middle_term else Y_start,
628+
X_end if self.c_ordered_middle_term else Y_end,
629+
self.Y,
630+
Y_start if self.c_ordered_middle_term else X_start,
631+
Y_end if self.c_ordered_middle_term else X_end,
632+
self.c_ordered_middle_term,
633+
dist_middle_terms,
634+
)
635+
636+
return dist_middle_terms
497637

498638
{{endfor}}

sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pyx.tp

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -82,10 +82,7 @@ cdef class RadiusNeighbors{{name_suffix}}(BaseDistancesReduction{{name_suffix}})
8282

8383
No instance should directly be created outside of this class method.
8484
"""
85-
if (
86-
metric in ("euclidean", "sqeuclidean")
87-
and not (issparse(X) ^ issparse(Y)) # "^" is XOR
88-
):
85+
if metric in ("euclidean", "sqeuclidean"):
8986
# Specialized implementation of RadiusNeighbors for the Euclidean
9087
# distance for the dense-dense and sparse-sparse cases.
9188
# This implementation computes the distances by chunk using

sklearn/metrics/tests/test_pairwise_distances_reduction.py

Lines changed: 51 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import itertools
21
import re
32
from collections import defaultdict
43

@@ -552,15 +551,11 @@ def test_pairwise_distances_reduction_is_usable_for():
552551
np.asfortranarray(X), Y, metric
553552
)
554553

555-
# We prefer not to use those implementations for fused sparse-dense when
556-
# metric="(sq)euclidean" because it's not yet the most efficient one on
557-
# all configurations of datasets.
558-
# See: https://github.com/scikit-learn/scikit-learn/pull/23585#issuecomment-1247996669 # noqa
559-
# TODO: implement specialisation for (sq)euclidean on fused sparse-dense
560-
# using sparse-dense routines for matrix-vector multiplications.
561-
assert not BaseDistancesReductionDispatcher.is_usable_for(
562-
X_csr, Y, metric="euclidean"
554+
assert BaseDistancesReductionDispatcher.is_usable_for(X_csr, Y, metric="euclidean")
555+
assert BaseDistancesReductionDispatcher.is_usable_for(
556+
X, Y_csr, metric="sqeuclidean"
563557
)
558+
564559
assert BaseDistancesReductionDispatcher.is_usable_for(
565560
X_csr, Y_csr, metric="sqeuclidean"
566561
)
@@ -848,24 +843,53 @@ def test_format_agnosticism(
848843
**compute_parameters,
849844
)
850845

851-
for _X, _Y in itertools.product((X, X_csr), (Y, Y_csr)):
852-
if _X is X and _Y is Y:
853-
continue
854-
dist, indices = Dispatcher.compute(
855-
_X,
856-
_Y,
857-
parameter,
858-
chunk_size=50,
859-
return_distance=True,
860-
**compute_parameters,
861-
)
862-
ASSERT_RESULT[(Dispatcher, dtype)](
863-
dist_dense,
864-
dist,
865-
indices_dense,
866-
indices,
867-
**check_parameters,
868-
)
846+
dist, indices = Dispatcher.compute(
847+
X_csr,
848+
Y_csr,
849+
parameter,
850+
chunk_size=50,
851+
return_distance=True,
852+
**compute_parameters,
853+
)
854+
ASSERT_RESULT[(Dispatcher, dtype)](
855+
dist_dense,
856+
dist,
857+
indices_dense,
858+
indices,
859+
**check_parameters,
860+
)
861+
862+
dist, indices = Dispatcher.compute(
863+
X_csr,
864+
Y,
865+
parameter,
866+
chunk_size=50,
867+
return_distance=True,
868+
**compute_parameters,
869+
)
870+
ASSERT_RESULT[(Dispatcher, dtype)](
871+
dist_dense,
872+
dist,
873+
indices_dense,
874+
indices,
875+
**check_parameters,
876+
)
877+
878+
dist, indices = Dispatcher.compute(
879+
X,
880+
Y_csr,
881+
parameter,
882+
chunk_size=50,
883+
return_distance=True,
884+
**compute_parameters,
885+
)
886+
ASSERT_RESULT[(Dispatcher, dtype)](
887+
dist_dense,
888+
dist,
889+
indices_dense,
890+
indices,
891+
**check_parameters,
892+
)
869893

870894

871895
@pytest.mark.parametrize(

0 commit comments

Comments
 (0)