Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 67ea720

Browse files
ENH Add the fused CSR dense case for Euclidean Specializations (#25044)
Signed-off-by: Julien Jerphanion <[email protected]> Co-authored-by: Vincent M <[email protected]>
1 parent e0033b0 commit 67ea720

File tree

7 files changed

+240
-41
lines changed

7 files changed

+240
-41
lines changed

doc/whats_new/v1.3.rst

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,42 @@ Changes impacting all modules
8181
by :user:`John Pangas <jpangas>`, :user:`Rahil Parikh <rprkh>` ,
8282
and :user:`Alex Buzenet <albuzenet>`.
8383

84+
- |Enhancement| Added a multi-threaded Cython routine to the compute squared
85+
Euclidean distances (sometimes followed by a fused reduction operation) for a
86+
pair of datasets consisting of a sparse CSR matrix and a dense NumPy.
87+
88+
This can improve the performance of following functions and estimators:
89+
90+
- :func:`sklearn.metrics.pairwise_distances_argmin`
91+
- :func:`sklearn.metrics.pairwise_distances_argmin_min`
92+
- :class:`sklearn.cluster.AffinityPropagation`
93+
- :class:`sklearn.cluster.Birch`
94+
- :class:`sklearn.cluster.MeanShift`
95+
- :class:`sklearn.cluster.OPTICS`
96+
- :class:`sklearn.cluster.SpectralClustering`
97+
- :func:`sklearn.feature_selection.mutual_info_regression`
98+
- :class:`sklearn.neighbors.KNeighborsClassifier`
99+
- :class:`sklearn.neighbors.KNeighborsRegressor`
100+
- :class:`sklearn.neighbors.RadiusNeighborsClassifier`
101+
- :class:`sklearn.neighbors.RadiusNeighborsRegressor`
102+
- :class:`sklearn.neighbors.LocalOutlierFactor`
103+
- :class:`sklearn.neighbors.NearestNeighbors`
104+
- :class:`sklearn.manifold.Isomap`
105+
- :class:`sklearn.manifold.LocallyLinearEmbedding`
106+
- :class:`sklearn.manifold.TSNE`
107+
- :func:`sklearn.manifold.trustworthiness`
108+
- :class:`sklearn.semi_supervised.LabelPropagation`
109+
- :class:`sklearn.semi_supervised.LabelSpreading`
110+
111+
A typical example of this performance improvement happens when passing a sparse
112+
CSR matrix to the `predict` or `transform` method of estimators that rely on
113+
a dense NumPy representation to store their fitted parameters (or the reverse).
114+
115+
For instance, :meth:`sklearn.NearestNeighbors.kneighbors` is now up to 2 times faster
116+
for this case on commonly available laptops.
117+
118+
:pr:`25044` by :user:`Julien Jerphanion <jjerphan>`.
119+
84120
Changelog
85121
---------
86122

sklearn/metrics/_pairwise_distances_reduction/_argkmin.pyx.tp

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -61,10 +61,7 @@ cdef class ArgKmin{{name_suffix}}(BaseDistancesReduction{{name_suffix}}):
6161

6262
No instance should directly be created outside of this class method.
6363
"""
64-
if (
65-
metric in ("euclidean", "sqeuclidean")
66-
and not (issparse(X) ^ issparse(Y)) # "^" is the XOR operator
67-
):
64+
if metric in ("euclidean", "sqeuclidean"):
6865
# Specialized implementation of ArgKmin for the Euclidean distance
6966
# for the dense-dense and sparse-sparse cases.
7067
# This implementation computes the distances by chunk using

sklearn/metrics/_pairwise_distances_reduction/_dispatcher.py

Lines changed: 1 addition & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -119,26 +119,7 @@ def is_valid_sparse_matrix(X):
119119
and metric in cls.valid_metrics()
120120
)
121121

122-
# The other joblib-based back-end might be more efficient on fused sparse-dense
123-
# datasets' pairs on metric="(sq)euclidean" for some configurations because it
124-
# uses the Squared Euclidean matrix decomposition, i.e.:
125-
#
126-
# ||X_c_i - Y_c_j||² = ||X_c_i||² - 2 X_c_i.Y_c_j^T + ||Y_c_j||²
127-
#
128-
# calling efficient sparse-dense routines for matrix and vectors multiplication
129-
# implemented in SciPy we do not use yet here.
130-
# See: https://github.com/scikit-learn/scikit-learn/pull/23585#issuecomment-1247996669 # noqa
131-
# TODO: implement specialisation for (sq)euclidean on fused sparse-dense
132-
# using sparse-dense routines for matrix-vector multiplications.
133-
# Currently, only dense-dense and sparse-sparse are optimized for
134-
# the Euclidean case.
135-
fused_sparse_dense_euclidean_case_guard = not (
136-
(is_valid_sparse_matrix(X) ^ is_valid_sparse_matrix(Y)) # "^" is XOR
137-
and isinstance(metric, str)
138-
and "euclidean" in metric
139-
)
140-
141-
return is_usable and fused_sparse_dense_euclidean_case_guard
122+
return is_usable
142123

143124
@classmethod
144125
@abstractmethod

sklearn/metrics/_pairwise_distances_reduction/_middle_term_computer.pxd.tp

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,4 +186,45 @@ cdef class SparseSparseMiddleTermComputer{{name_suffix}}(MiddleTermComputer{{nam
186186
) noexcept nogil
187187

188188

189+
cdef class SparseDenseMiddleTermComputer{{name_suffix}}(MiddleTermComputer{{name_suffix}}):
190+
cdef:
191+
const DTYPE_t[:] X_data
192+
const SPARSE_INDEX_TYPE_t[:] X_indices
193+
const SPARSE_INDEX_TYPE_t[:] X_indptr
194+
195+
const {{INPUT_DTYPE_t}}[:, ::1] Y
196+
197+
# We treat the dense-sparse case with the sparse-dense case by simply
198+
# treating the dist_middle_terms as F-ordered and by swapping arguments.
199+
# This attribute is meant to encode the case and adapt the logic
200+
# accordingly.
201+
bint c_ordered_middle_term
202+
203+
cdef void _parallel_on_X_pre_compute_and_reduce_distances_on_chunks(
204+
self,
205+
ITYPE_t X_start,
206+
ITYPE_t X_end,
207+
ITYPE_t Y_start,
208+
ITYPE_t Y_end,
209+
ITYPE_t thread_num
210+
) noexcept nogil
211+
212+
cdef void _parallel_on_Y_pre_compute_and_reduce_distances_on_chunks(
213+
self,
214+
ITYPE_t X_start,
215+
ITYPE_t X_end,
216+
ITYPE_t Y_start,
217+
ITYPE_t Y_end,
218+
ITYPE_t thread_num
219+
) noexcept nogil
220+
221+
cdef DTYPE_t * _compute_dist_middle_terms(
222+
self,
223+
ITYPE_t X_start,
224+
ITYPE_t X_end,
225+
ITYPE_t Y_start,
226+
ITYPE_t Y_end,
227+
ITYPE_t thread_num,
228+
) noexcept nogil
229+
189230
{{endfor}}

sklearn/metrics/_pairwise_distances_reduction/_middle_term_computer.pyx.tp

Lines changed: 155 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,34 @@ cdef void _middle_term_sparse_sparse_64(
7373

7474
{{for name_suffix, upcast_to_float64, INPUT_DTYPE_t, INPUT_DTYPE in implementation_specific_values}}
7575

76+
cdef void _middle_term_sparse_dense_{{name_suffix}}(
77+
const DTYPE_t[:] X_data,
78+
const SPARSE_INDEX_TYPE_t[:] X_indices,
79+
const SPARSE_INDEX_TYPE_t[:] X_indptr,
80+
ITYPE_t X_start,
81+
ITYPE_t X_end,
82+
const {{INPUT_DTYPE_t}}[:, ::1] Y,
83+
ITYPE_t Y_start,
84+
ITYPE_t Y_end,
85+
bint c_ordered_middle_term,
86+
DTYPE_t * dist_middle_terms,
87+
) nogil:
88+
# This routine assumes that dist_middle_terms is a pointer to the first element
89+
# of a buffer filled with zeros of length at least equal to n_X × n_Y, conceptually
90+
# representing a 2-d C-ordered of F-ordered array.
91+
cdef:
92+
ITYPE_t i, j, k
93+
ITYPE_t n_X = X_end - X_start
94+
ITYPE_t n_Y = Y_end - Y_start
95+
ITYPE_t X_i_col_idx, X_i_ptr, Y_j_col_idx, Y_j_ptr
96+
97+
for i in range(n_X):
98+
for j in range(n_Y):
99+
k = i * n_Y + j if c_ordered_middle_term else j * n_X + i
100+
for X_i_ptr in range(X_indptr[X_start+i], X_indptr[X_start+i+1]):
101+
X_i_col_idx = X_indices[X_i_ptr]
102+
dist_middle_terms[k] += -2 * X_data[X_i_ptr] * Y[Y_start + j, X_i_col_idx]
103+
76104

77105
cdef class MiddleTermComputer{{name_suffix}}:
78106
"""Helper class to compute a Euclidean distance matrix in chunks.
@@ -103,7 +131,7 @@ cdef class MiddleTermComputer{{name_suffix}}:
103131
n_features,
104132
chunk_size,
105133
) -> MiddleTermComputer{{name_suffix}}:
106-
"""Return the DatasetsPair implementation for the given arguments.
134+
"""Return the MiddleTermComputer implementation for the given arguments.
107135

108136
Parameters
109137
----------
@@ -143,12 +171,39 @@ cdef class MiddleTermComputer{{name_suffix}}:
143171
n_features,
144172
chunk_size,
145173
)
146-
174+
if X_is_sparse and not Y_is_sparse:
175+
return SparseDenseMiddleTermComputer{{name_suffix}}(
176+
X,
177+
Y,
178+
effective_n_threads,
179+
chunks_n_threads,
180+
dist_middle_terms_chunks_size,
181+
n_features,
182+
chunk_size,
183+
c_ordered_middle_term=True
184+
)
185+
if not X_is_sparse and Y_is_sparse:
186+
# NOTE: The Dense-Sparse case is implement via the Sparse-Dense case.
187+
#
188+
# To do so:
189+
# - X (dense) and Y (sparse) are swapped
190+
# - the distance middle term is seen as F-ordered for consistency
191+
# (c_ordered_middle_term = False)
192+
return SparseDenseMiddleTermComputer{{name_suffix}}(
193+
# Mind that X and Y are swapped here.
194+
Y,
195+
X,
196+
effective_n_threads,
197+
chunks_n_threads,
198+
dist_middle_terms_chunks_size,
199+
n_features,
200+
chunk_size,
201+
c_ordered_middle_term=False,
202+
)
147203
raise NotImplementedError(
148-
"X and Y must be both CSR sparse matrices or both numpy arrays."
204+
"X and Y must be CSR sparse matrices or numpy arrays."
149205
)
150206

151-
152207
@classmethod
153208
def unpack_csr_matrix(cls, X: csr_matrix):
154209
"""Ensure that the CSR matrix is indexed with SPARSE_INDEX_TYPE."""
@@ -486,5 +541,101 @@ cdef class SparseSparseMiddleTermComputer{{name_suffix}}(MiddleTermComputer{{nam
486541

487542
return dist_middle_terms
488543

544+
cdef class SparseDenseMiddleTermComputer{{name_suffix}}(MiddleTermComputer{{name_suffix}}):
545+
"""Middle term of the Euclidean distance between chunks of a CSR matrix and a np.ndarray.
546+
547+
The logic of the computation is wrapped in the routine _middle_term_sparse_dense_{{name_suffix}}.
548+
This routine iterates over the data, indices and indptr arrays of the sparse matrices
549+
without densifying them.
550+
"""
551+
552+
def __init__(
553+
self,
554+
X,
555+
Y,
556+
ITYPE_t effective_n_threads,
557+
ITYPE_t chunks_n_threads,
558+
ITYPE_t dist_middle_terms_chunks_size,
559+
ITYPE_t n_features,
560+
ITYPE_t chunk_size,
561+
bint c_ordered_middle_term,
562+
):
563+
super().__init__(
564+
effective_n_threads,
565+
chunks_n_threads,
566+
dist_middle_terms_chunks_size,
567+
n_features,
568+
chunk_size,
569+
)
570+
self.X_data, self.X_indices, self.X_indptr = self.unpack_csr_matrix(X)
571+
self.Y = Y
572+
self.c_ordered_middle_term = c_ordered_middle_term
573+
574+
cdef void _parallel_on_X_pre_compute_and_reduce_distances_on_chunks(
575+
self,
576+
ITYPE_t X_start,
577+
ITYPE_t X_end,
578+
ITYPE_t Y_start,
579+
ITYPE_t Y_end,
580+
ITYPE_t thread_num,
581+
) noexcept nogil:
582+
# Fill the thread's dist_middle_terms_chunks with 0.0 before
583+
# computing its elements in _compute_dist_middle_terms.
584+
fill(
585+
self.dist_middle_terms_chunks[thread_num].begin(),
586+
self.dist_middle_terms_chunks[thread_num].end(),
587+
0.0,
588+
)
589+
590+
cdef void _parallel_on_Y_pre_compute_and_reduce_distances_on_chunks(
591+
self,
592+
ITYPE_t X_start,
593+
ITYPE_t X_end,
594+
ITYPE_t Y_start,
595+
ITYPE_t Y_end,
596+
ITYPE_t thread_num,
597+
) noexcept nogil:
598+
# Fill the thread's dist_middle_terms_chunks with 0.0 before
599+
# computing its elements in _compute_dist_middle_terms.
600+
fill(
601+
self.dist_middle_terms_chunks[thread_num].begin(),
602+
self.dist_middle_terms_chunks[thread_num].end(),
603+
0.0,
604+
)
605+
606+
cdef DTYPE_t * _compute_dist_middle_terms(
607+
self,
608+
ITYPE_t X_start,
609+
ITYPE_t X_end,
610+
ITYPE_t Y_start,
611+
ITYPE_t Y_end,
612+
ITYPE_t thread_num,
613+
) noexcept nogil:
614+
cdef:
615+
DTYPE_t *dist_middle_terms = (
616+
self.dist_middle_terms_chunks[thread_num].data()
617+
)
618+
619+
# For the dense-sparse case, we use the sparse-dense case
620+
# with dist_middle_terms seen as F-ordered.
621+
# Hence we swap indices pointers here.
622+
if not self.c_ordered_middle_term:
623+
X_start, Y_start = Y_start, X_start
624+
X_end, Y_end = Y_end, X_end
625+
626+
_middle_term_sparse_dense_{{name_suffix}}(
627+
self.X_data,
628+
self.X_indices,
629+
self.X_indptr,
630+
X_start,
631+
X_end,
632+
self.Y,
633+
Y_start,
634+
Y_end,
635+
self.c_ordered_middle_term,
636+
dist_middle_terms,
637+
)
638+
639+
return dist_middle_terms
489640

490641
{{endfor}}

sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pyx.tp

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -82,10 +82,7 @@ cdef class RadiusNeighbors{{name_suffix}}(BaseDistancesReduction{{name_suffix}})
8282

8383
No instance should directly be created outside of this class method.
8484
"""
85-
if (
86-
metric in ("euclidean", "sqeuclidean")
87-
and not (issparse(X) ^ issparse(Y)) # "^" is XOR
88-
):
85+
if metric in ("euclidean", "sqeuclidean"):
8986
# Specialized implementation of RadiusNeighbors for the Euclidean
9087
# distance for the dense-dense and sparse-sparse cases.
9188
# This implementation computes the distances by chunk using

sklearn/metrics/tests/test_pairwise_distances_reduction.py

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -553,15 +553,11 @@ def test_pairwise_distances_reduction_is_usable_for():
553553
np.asfortranarray(X), Y, metric
554554
)
555555

556-
# We prefer not to use those implementations for fused sparse-dense when
557-
# metric="(sq)euclidean" because it's not yet the most efficient one on
558-
# all configurations of datasets.
559-
# See: https://github.com/scikit-learn/scikit-learn/pull/23585#issuecomment-1247996669 # noqa
560-
# TODO: implement specialisation for (sq)euclidean on fused sparse-dense
561-
# using sparse-dense routines for matrix-vector multiplications.
562-
assert not BaseDistancesReductionDispatcher.is_usable_for(
563-
X_csr, Y, metric="euclidean"
556+
assert BaseDistancesReductionDispatcher.is_usable_for(X_csr, Y, metric="euclidean")
557+
assert BaseDistancesReductionDispatcher.is_usable_for(
558+
X, Y_csr, metric="sqeuclidean"
564559
)
560+
565561
assert BaseDistancesReductionDispatcher.is_usable_for(
566562
X_csr, Y_csr, metric="sqeuclidean"
567563
)
@@ -1060,7 +1056,7 @@ def test_pairwise_distances_argkmin(
10601056
row_idx, argkmin_indices_ref[row_idx]
10611057
]
10621058

1063-
for _X, _Y in [(X, Y), (X_csr, Y_csr)]:
1059+
for _X, _Y in itertools.product((X, X_csr), (Y, Y_csr)):
10641060
argkmin_distances, argkmin_indices = ArgKmin.compute(
10651061
_X,
10661062
_Y,

0 commit comments

Comments
 (0)