scikit-learn · JoOkuma · Jul 29, 2022 · Jul 29, 2022 · Jul 29, 2022 · jjerphan
diff --git a/asv_benchmarks/benchmarks/neighbors.py b/asv_benchmarks/benchmarks/neighbors.py
@@ -1,8 +1,8 @@
-from sklearn.neighbors import KNeighborsClassifier
+from sklearn.neighbors import KNeighborsClassifier, RadiusNeighborsRegressor
 
 from .common import Benchmark, Estimator, Predictor
-from .datasets import _20newsgroups_lowdim_dataset
-from .utils import make_gen_classif_scorers
+from .datasets import _20newsgroups_lowdim_dataset, _synth_regression_dataset
+from .utils import make_gen_classif_scorers, make_gen_reg_scorers
 
 
 class KNeighborsClassifierBenchmark(Predictor, Estimator, Benchmark):
@@ -37,3 +37,41 @@ def make_estimator(self, params):
 
     def make_scorers(self):
         make_gen_classif_scorers(self)
+
+
+class RadiusNeighborsRegressorBenchmark(Predictor, Estimator, Benchmark):
+    """
+    Benchmarks for RadiusNeighborsRegressor
+    """
+
+    param_names = ["algorithm", "dimension", "n_jobs"]
+    params = (
+        ["brute", "kd_tree", "ball_tree"],
+        ["very-low", "low", "high"],
+        Benchmark.n_jobs_vals,
+    )
+    dim_to_number = {"very-low": 3, "low": 20, "high": 200}
+
+    def setup_cache(self):
+        super().setup_cache()
+
+    def make_data(self, params):
+        algorithm, dimension, n_jobs = params
+
+        n_features = self.dim_to_number[dimension]
+
+        data = _synth_regression_dataset(n_samples=10000, n_features=n_features)
+
+        return data
+
+    def make_estimator(self, params):
+        algorithm, dimension, n_jobs = params
+
+        estimator = RadiusNeighborsRegressor(
+            algorithm=algorithm, n_jobs=n_jobs, radius=0.05
+        )
+
+        return estimator
+
+    def make_scorers(self):
+        make_gen_reg_scorers(self)
diff --git a/sklearn/neighbors/_regression.py b/sklearn/neighbors/_regression.py
@@ -13,6 +13,7 @@
 import warnings
 
 import numpy as np
+from scipy.sparse import csr_matrix
 
 from ._base import _get_weights
 from ._base import NeighborsBase, KNeighborsMixin, RadiusNeighborsMixin
@@ -466,25 +467,29 @@ def predict(self, X):
         if _y.ndim == 1:
             _y = _y.reshape((-1, 1))
 
-        empty_obs = np.full_like(_y[0], np.nan)
+        # converting weights to sparse matrix
+        n_samples = len(neigh_ind)
+        n_neigh = np.asarray([len(ind) for ind in neigh_ind])
 
-        if weights is None:
-            y_pred = np.array(
-                [
-                    np.mean(_y[ind, :], axis=0) if len(ind) else empty_obs
-                    for (i, ind) in enumerate(neigh_ind)
-                ]
-            )
+        neigh_dst = np.concatenate(neigh_ind, axis=0)
+        neigh_src = np.repeat(np.arange(n_samples), repeats=n_neigh)
 
+        if weights is None:
+            weights = np.ones(len(neigh_src), dtype=bool)
         else:
-            y_pred = np.array(
-                [
-                    np.average(_y[ind, :], axis=0, weights=weights[i])
-                    if len(ind)
-                    else empty_obs
-                    for (i, ind) in enumerate(neigh_ind)
-                ]
-            )
+            weights = np.concatenate(weights, axis=0)
+
+        weights = csr_matrix(
+            (weights, (neigh_src, neigh_dst)),
+            shape=(n_samples, len(_y)),
+        )
+
+        # normalization factor so weights sum = 1
+        norm_factor = weights.sum(axis=1)
+        y_pred = weights @ _y
+        with np.errstate(divide="ignore"):
+            # normalizing and assigning NaN to lonely samples
+            y_pred = np.where(norm_factor > 0, y_pred / norm_factor, np.nan)
 
         if np.any(np.isnan(y_pred)):
             empty_warning_msg = (