From a349f975b100343c879619596374254a8fa91d57 Mon Sep 17 00:00:00 2001 From: Julien Jerphanion Date: Sat, 5 Aug 2023 14:32:48 +0200 Subject: [PATCH 1/5] MAINT Make `ArgKminClassMode` accept sparse datasets Signed-off-by: Julien Jerphanion --- sklearn/metrics/_pairwise_distances_reduction/_dispatcher.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/sklearn/metrics/_pairwise_distances_reduction/_dispatcher.py b/sklearn/metrics/_pairwise_distances_reduction/_dispatcher.py index e23da467d723a..3fbc5a5fbb3c2 100644 --- a/sklearn/metrics/_pairwise_distances_reduction/_dispatcher.py +++ b/sklearn/metrics/_pairwise_distances_reduction/_dispatcher.py @@ -474,9 +474,6 @@ def is_usable_for(cls, X, Y, metric) -> bool: """ return ( ArgKmin.is_usable_for(X, Y, metric) - # TODO: Support CSR matrices. - and not issparse(X) - and not issparse(Y) # TODO: implement Euclidean specialization with GEMM. and metric not in ("euclidean", "sqeuclidean") ) From 876f21c30344cace1a058c02ad65f255f6a55010 Mon Sep 17 00:00:00 2001 From: Julien Jerphanion Date: Tue, 8 Aug 2023 18:40:15 +0200 Subject: [PATCH 2/5] Override valid_metrics over is_usable_for Signed-off-by: Julien Jerphanion Co-authored-by: Meekail Zain --- .../_dispatcher.py | 34 +++++-------------- 1 file changed, 8 insertions(+), 26 deletions(-) diff --git a/sklearn/metrics/_pairwise_distances_reduction/_dispatcher.py b/sklearn/metrics/_pairwise_distances_reduction/_dispatcher.py index 3fbc5a5fbb3c2..f8c14ed233705 100644 --- a/sklearn/metrics/_pairwise_distances_reduction/_dispatcher.py +++ b/sklearn/metrics/_pairwise_distances_reduction/_dispatcher.py @@ -451,32 +451,14 @@ class ArgKminClassMode(BaseDistancesReductionDispatcher): """ @classmethod - def is_usable_for(cls, X, Y, metric) -> bool: - """Return True if the dispatcher can be used for the given parameters. - - Parameters - ---------- - X : ndarray of shape (n_samples_X, n_features) - The input array to be labelled. - - Y : ndarray of shape (n_samples_Y, n_features) - The input array whose labels are provided through the `Y_labels` - parameter. - - metric : str, default='euclidean' - The distance metric to use. For a list of available metrics, see - the documentation of :class:`~sklearn.metrics.DistanceMetric`. - Currently does not support `'precomputed'`. - - Returns - ------- - True if the PairwiseDistancesReduction can be used, else False. - """ - return ( - ArgKmin.is_usable_for(X, Y, metric) - # TODO: implement Euclidean specialization with GEMM. - and metric not in ("euclidean", "sqeuclidean") - ) + def valid_metrics(cls) -> List[str]: + excluded = { + # Euclidean is technically usable for ArgKminClassMode but it would not be competitive. + # TODO: implement Euclidean specialization using GEMM. + "euclidean" + "sqeuclidean", + } + return list(set(BaseDistancesReductionDispatcher.valid_metrics()) - excluded) @classmethod def compute( From 4200088045c55cd49f0dc519f091bd903d0e4397 Mon Sep 17 00:00:00 2001 From: Julien Jerphanion Date: Sun, 13 Aug 2023 20:32:53 +0200 Subject: [PATCH 3/5] docs: Add changelog entry Signed-off-by: Julien Jerphanion --- doc/whats_new/v1.4.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/doc/whats_new/v1.4.rst b/doc/whats_new/v1.4.rst index 3508d85fdcbff..5323610c39697 100644 --- a/doc/whats_new/v1.4.rst +++ b/doc/whats_new/v1.4.rst @@ -160,6 +160,11 @@ Changelog :mod:`sklearn.neighbors` ........................ +- |Efficiency| :meth:`sklearn.neighbors.KNeighborsRegressor.predict` and + :meth:`sklearn.neighbors.KNeighborsRegressor.predict_proba` now efficiently support + pairs of dense and sparse datasets. + :pr:`27018` by :user:`Julien Jerphanion `. + - |Fix| Neighbors based estimators now correctly work when `metric="minkowski"` and the metric parameter `p` is in the range `0 < p < 1`, regardless of the `dtype` of `X`. :pr:`26760` by :user:`Shreesha Kumar Bhat `. From 72d1945e8cb103916029fee617c1320fa158fe47 Mon Sep 17 00:00:00 2001 From: Julien Jerphanion Date: Sun, 13 Aug 2023 20:45:15 +0200 Subject: [PATCH 4/5] Add a smol comma Signed-off-by: Julien Jerphanion --- sklearn/metrics/_pairwise_distances_reduction/_dispatcher.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/metrics/_pairwise_distances_reduction/_dispatcher.py b/sklearn/metrics/_pairwise_distances_reduction/_dispatcher.py index f8c14ed233705..0025f1892bb74 100644 --- a/sklearn/metrics/_pairwise_distances_reduction/_dispatcher.py +++ b/sklearn/metrics/_pairwise_distances_reduction/_dispatcher.py @@ -455,7 +455,7 @@ def valid_metrics(cls) -> List[str]: excluded = { # Euclidean is technically usable for ArgKminClassMode but it would not be competitive. # TODO: implement Euclidean specialization using GEMM. - "euclidean" + "euclidean", "sqeuclidean", } return list(set(BaseDistancesReductionDispatcher.valid_metrics()) - excluded) From 0e8ee6b9c95ac8dbc58c5cdf5ccba86c6ef39670 Mon Sep 17 00:00:00 2001 From: Julien Jerphanion Date: Mon, 14 Aug 2023 08:12:15 +0200 Subject: [PATCH 5/5] DOC Shorten long line comment Signed-off-by: Julien Jerphanion --- sklearn/metrics/_pairwise_distances_reduction/_dispatcher.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sklearn/metrics/_pairwise_distances_reduction/_dispatcher.py b/sklearn/metrics/_pairwise_distances_reduction/_dispatcher.py index 0025f1892bb74..937a6fd083260 100644 --- a/sklearn/metrics/_pairwise_distances_reduction/_dispatcher.py +++ b/sklearn/metrics/_pairwise_distances_reduction/_dispatcher.py @@ -453,7 +453,8 @@ class ArgKminClassMode(BaseDistancesReductionDispatcher): @classmethod def valid_metrics(cls) -> List[str]: excluded = { - # Euclidean is technically usable for ArgKminClassMode but it would not be competitive. + # Euclidean is technically usable for ArgKminClassMode + # but its current implementation would not be competitive. # TODO: implement Euclidean specialization using GEMM. "euclidean", "sqeuclidean",