From 73ae8b2802ebe26b1e6901bb50b2462dc365d254 Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Thu, 3 Mar 2022 13:59:35 +0100
Subject: [PATCH 1/5] TST Adapt test_neighbors.py to test implementations on
 32bit datasets

---
 sklearn/neighbors/tests/test_neighbors.py | 233 +++++++++++++---------
 1 file changed, 140 insertions(+), 93 deletions(-)

diff --git a/sklearn/neighbors/tests/test_neighbors.py b/sklearn/neighbors/tests/test_neighbors.py
index a1e0b01ef3eeb..fe436d9e28667 100644
--- a/sklearn/neighbors/tests/test_neighbors.py
+++ b/sklearn/neighbors/tests/test_neighbors.py
@@ -66,6 +66,7 @@
     set.intersection(*map(set, neighbors.VALID_METRICS.values()))
 )
 P = (1, 2, 3, 4, np.inf)
+DTYPES = (np.float64, np.float32)
 JOBLIB_BACKENDS = list(joblib.parallel.BACKENDS.keys())
 
 # Filter deprecation warnings.
@@ -125,6 +126,9 @@ def _weight_func(dist):
     return retval**2
 
 
+WEIGHTS = ["uniform", "distance", _weight_func]
+
+
 @pytest.mark.parametrize(
     "n_samples, n_features, n_query_pts, n_neighbors",
     [
@@ -133,6 +137,7 @@ def _weight_func(dist):
     ],
 )
 @pytest.mark.parametrize("query_is_train", [False, True])
+@pytest.mark.parametrize("dtype", DTYPES)
 @pytest.mark.parametrize("metric", COMMON_VALID_METRICS)
 def test_unsupervised_kneighbors(
     n_samples,
@@ -140,6 +145,7 @@ def test_unsupervised_kneighbors(
     n_query_pts,
     n_neighbors,
     query_is_train,
+    dtype,
     metric,
 ):
     # The different algorithms must return identical results
@@ -148,9 +154,11 @@ def test_unsupervised_kneighbors(
 
     # Redefining the rng locally to use the same generated X
     local_rng = np.random.RandomState(0)
-    X = local_rng.rand(n_samples, n_features)
+    X = local_rng.rand(n_samples, n_features).astype(dtype)
 
-    query = X if query_is_train else local_rng.rand(n_query_pts, n_features)
+    query = (
+        X if query_is_train else local_rng.rand(n_query_pts, n_features).astype(dtype)
+    )
 
     results_nodist = []
     results = []
@@ -207,6 +215,7 @@ def test_unsupervised_kneighbors(
 )
 @pytest.mark.parametrize("metric", COMMON_VALID_METRICS)
 @pytest.mark.parametrize("n_neighbors, radius", [(1, 100), (50, 500), (100, 1000)])
+@pytest.mark.parametrize("dtype", DTYPES)
 @pytest.mark.parametrize(
     "NeighborsMixinSubclass",
     [
@@ -223,6 +232,7 @@ def test_neigh_predictions_algorithm_agnosticity(
     metric,
     n_neighbors,
     radius,
+    dtype,
     NeighborsMixinSubclass,
 ):
     # The different algorithms must return identical predictions results
@@ -230,10 +240,10 @@ def test_neigh_predictions_algorithm_agnosticity(
 
     # Redefining the rng locally to use the same generated X
     local_rng = np.random.RandomState(0)
-    X = local_rng.rand(n_samples, n_features)
+    X = local_rng.rand(n_samples, n_features).astype(dtype)
     y = local_rng.randint(3, size=n_samples)
 
-    query = local_rng.rand(n_query_pts, n_features)
+    query = local_rng.rand(n_query_pts, n_features).astype(dtype)
 
     predict_results = []
 
@@ -260,6 +270,8 @@ def test_neigh_predictions_algorithm_agnosticity(
                 f"The '{algorithm}' and '{next_algorithm}' "
                 "algorithms return different predictions."
             ),
+            rtol=1e-7,
+            atol=1e-7,
         )
 
 
@@ -271,10 +283,11 @@ def test_neigh_predictions_algorithm_agnosticity(
         neighbors.NearestNeighbors,
     ],
 )
-def test_unsupervised_inputs(KNeighborsMixinSubclass):
+@pytest.mark.parametrize("dtype", DTYPES)
+def test_unsupervised_inputs(KNeighborsMixinSubclass, dtype):
     # Test unsupervised inputs for neighbors estimators
 
-    X = rng.random_sample((10, 3))
+    X = rng.random_sample((10, 3)).astype(dtype)
     y = rng.randint(3, size=10)
     nbrs_fid = neighbors.NearestNeighbors(n_neighbors=1)
     nbrs_fid.fit(X)
@@ -508,15 +521,16 @@ def test_precomputed_cross_validation():
         assert_array_equal(metric_score, precomp_score)
 
 
+@pytest.mark.parametrize("dtype", DTYPES)
 def test_unsupervised_radius_neighbors(
-    n_samples=20, n_features=5, n_query_pts=2, radius=0.5, random_state=0
+    dtype, n_samples=20, n_features=5, n_query_pts=2, radius=0.5, random_state=0
 ):
     # Test unsupervised radius-based query
     rng = np.random.RandomState(random_state)
 
-    X = rng.rand(n_samples, n_features)
+    X = rng.rand(n_samples, n_features).astype(dtype)
 
-    test = rng.rand(n_query_pts, n_features)
+    test = rng.rand(n_query_pts, n_features).astype(dtype)
 
     for p in P:
         results = []
@@ -552,38 +566,45 @@ def test_unsupervised_radius_neighbors(
             )
 
 
+@pytest.mark.parametrize("dtype", DTYPES)
+@pytest.mark.parametrize("algorithm", ALGORITHMS)
+@pytest.mark.parametrize("weights", WEIGHTS)
 def test_kneighbors_classifier(
-    n_samples=40, n_features=5, n_test_pts=10, n_neighbors=5, random_state=0
+    dtype,
+    algorithm,
+    weights,
+    n_samples=40,
+    n_features=5,
+    n_test_pts=10,
+    n_neighbors=5,
+    random_state=0,
 ):
     # Test k-neighbors classification
     rng = np.random.RandomState(random_state)
-    X = 2 * rng.rand(n_samples, n_features) - 1
+    X = 2 * rng.rand(n_samples, n_features).astype(dtype) - 1
     y = ((X**2).sum(axis=1) < 0.5).astype(int)
     y_str = y.astype(str)
 
-    weight_func = _weight_func
-
-    for algorithm in ALGORITHMS:
-        for weights in ["uniform", "distance", weight_func]:
-            knn = neighbors.KNeighborsClassifier(
-                n_neighbors=n_neighbors, weights=weights, algorithm=algorithm
-            )
-            knn.fit(X, y)
-            epsilon = 1e-5 * (2 * rng.rand(1, n_features) - 1)
-            y_pred = knn.predict(X[:n_test_pts] + epsilon)
-            assert_array_equal(y_pred, y[:n_test_pts])
-            # Test prediction with y_str
-            knn.fit(X, y_str)
-            y_pred = knn.predict(X[:n_test_pts] + epsilon)
-            assert_array_equal(y_pred, y_str[:n_test_pts])
+    knn = neighbors.KNeighborsClassifier(
+        n_neighbors=n_neighbors, weights=weights, algorithm=algorithm
+    )
+    knn.fit(X, y)
+    epsilon = 1e-5 * (2 * rng.rand(1, n_features) - 1)
+    y_pred = knn.predict(X[:n_test_pts] + epsilon)
+    assert_array_equal(y_pred, y[:n_test_pts])
+    # Test prediction with y_str
+    knn.fit(X, y_str)
+    y_pred = knn.predict(X[:n_test_pts] + epsilon)
+    assert_array_equal(y_pred, y_str[:n_test_pts])
 
 
+@pytest.mark.parametrize("dtype", DTYPES)
 def test_kneighbors_classifier_float_labels(
-    n_samples=40, n_features=5, n_test_pts=10, n_neighbors=5, random_state=0
+    dtype, n_samples=40, n_features=5, n_test_pts=10, n_neighbors=5, random_state=0
 ):
     # Test k-neighbors classification
     rng = np.random.RandomState(random_state)
-    X = 2 * rng.rand(n_samples, n_features) - 1
+    X = 2 * rng.rand(n_samples, n_features).astype(dtype) - 1
     y = ((X**2).sum(axis=1) < 0.5).astype(int)
 
     knn = neighbors.KNeighborsClassifier(n_neighbors=n_neighbors)
@@ -593,9 +614,12 @@ def test_kneighbors_classifier_float_labels(
     assert_array_equal(y_pred, y[:n_test_pts])
 
 
-def test_kneighbors_classifier_predict_proba():
+@pytest.mark.parametrize("dtype", DTYPES)
+def test_kneighbors_classifier_predict_proba(dtype):
     # Test KNeighborsClassifier.predict_proba() method
-    X = np.array([[0, 2, 0], [0, 2, 1], [2, 0, 0], [2, 2, 0], [0, 0, 2], [0, 0, 1]])
+    X = np.array(
+        [[0, 2, 0], [0, 2, 1], [2, 0, 0], [2, 2, 0], [0, 0, 2], [0, 0, 1]]
+    ).astype(dtype)
     y = np.array([4, 4, 5, 5, 1, 1])
     cls = neighbors.KNeighborsClassifier(n_neighbors=3, p=1)  # cityblock dist
     cls.fit(X, y)
@@ -623,29 +647,35 @@ def test_kneighbors_classifier_predict_proba():
     assert_array_almost_equal(real_prob, y_prob)
 
 
+@pytest.mark.parametrize("dtype", DTYPES)
+@pytest.mark.parametrize("algorithm", ALGORITHMS)
+@pytest.mark.parametrize("weights", WEIGHTS)
 def test_radius_neighbors_classifier(
-    n_samples=40, n_features=5, n_test_pts=10, radius=0.5, random_state=0
+    dtype,
+    algorithm,
+    weights,
+    n_samples=40,
+    n_features=5,
+    n_test_pts=10,
+    radius=0.5,
+    random_state=0,
 ):
     # Test radius-based classification
     rng = np.random.RandomState(random_state)
-    X = 2 * rng.rand(n_samples, n_features) - 1
+    X = 2 * rng.rand(n_samples, n_features).astype(dtype) - 1
     y = ((X**2).sum(axis=1) < 0.5).astype(int)
     y_str = y.astype(str)
 
-    weight_func = _weight_func
-
-    for algorithm in ALGORITHMS:
-        for weights in ["uniform", "distance", weight_func]:
-            neigh = neighbors.RadiusNeighborsClassifier(
-                radius=radius, weights=weights, algorithm=algorithm
-            )
-            neigh.fit(X, y)
-            epsilon = 1e-5 * (2 * rng.rand(1, n_features) - 1)
-            y_pred = neigh.predict(X[:n_test_pts] + epsilon)
-            assert_array_equal(y_pred, y[:n_test_pts])
-            neigh.fit(X, y_str)
-            y_pred = neigh.predict(X[:n_test_pts] + epsilon)
-            assert_array_equal(y_pred, y_str[:n_test_pts])
+    neigh = neighbors.RadiusNeighborsClassifier(
+        radius=radius, weights=weights, algorithm=algorithm
+    )
+    neigh.fit(X, y)
+    epsilon = 1e-5 * (2 * rng.rand(1, n_features) - 1)
+    y_pred = neigh.predict(X[:n_test_pts] + epsilon)
+    assert_array_equal(y_pred, y[:n_test_pts])
+    neigh.fit(X, y_str)
+    y_pred = neigh.predict(X[:n_test_pts] + epsilon)
+    assert_array_equal(y_pred, y_str[:n_test_pts])
 
 
 # TODO: Remove in v1.2
@@ -663,65 +693,67 @@ def test_radius_neighbors_classifier_kwargs_is_deprecated():
         neighbors.RadiusNeighborsClassifier(**extra_kwargs)
 
 
-def test_radius_neighbors_classifier_when_no_neighbors():
+@pytest.mark.parametrize("dtype", DTYPES)
+@pytest.mark.parametrize("algorithm", ALGORITHMS)
+@pytest.mark.parametrize("weights", WEIGHTS)
+@pytest.mark.parametrize("outlier_label", [0, -1, None])
+def test_radius_neighbors_classifier_when_no_neighbors(
+    dtype, algorithm, weights, outlier_label
+):
     # Test radius-based classifier when no neighbors found.
     # In this case it should rise an informative exception
 
-    X = np.array([[1.0, 1.0], [2.0, 2.0]])
+    X = np.array([[1.0, 1.0], [2.0, 2.0]]).astype(dtype)
     y = np.array([1, 2])
     radius = 0.1
 
-    z1 = np.array([[1.01, 1.01], [2.01, 2.01]])  # no outliers
-    z2 = np.array([[1.01, 1.01], [1.4, 1.4]])  # one outlier
+    z1 = np.array([[1.01, 1.01], [2.01, 2.01]]).astype(dtype)  # no outliers
+    z2 = np.array([[1.01, 1.01], [1.4, 1.4]]).astype(dtype)  # one outlier
 
-    weight_func = _weight_func
-
-    for outlier_label in [0, -1, None]:
-        for algorithm in ALGORITHMS:
-            for weights in ["uniform", "distance", weight_func]:
-                rnc = neighbors.RadiusNeighborsClassifier
-                clf = rnc(
-                    radius=radius,
-                    weights=weights,
-                    algorithm=algorithm,
-                    outlier_label=outlier_label,
-                )
-                clf.fit(X, y)
-                assert_array_equal(np.array([1, 2]), clf.predict(z1))
-                if outlier_label is None:
-                    with pytest.raises(ValueError):
-                        clf.predict(z2)
+    rnc = neighbors.RadiusNeighborsClassifier
+    clf = rnc(
+        radius=radius,
+        weights=weights,
+        algorithm=algorithm,
+        outlier_label=outlier_label,
+    )
+    clf.fit(X, y)
+    assert_array_equal(np.array([1, 2]), clf.predict(z1))
+    if outlier_label is None:
+        with pytest.raises(ValueError):
+            clf.predict(z2)
 
 
-def test_radius_neighbors_classifier_outlier_labeling():
+@pytest.mark.parametrize("dtype", DTYPES)
+@pytest.mark.parametrize("algorithm", ALGORITHMS)
+@pytest.mark.parametrize("weights", WEIGHTS)
+def test_radius_neighbors_classifier_outlier_labeling(dtype, algorithm, weights):
     # Test radius-based classifier when no neighbors found and outliers
     # are labeled.
 
-    X = np.array([[1.0, 1.0], [2.0, 2.0], [0.99, 0.99], [0.98, 0.98], [2.01, 2.01]])
+    X = np.array(
+        [[1.0, 1.0], [2.0, 2.0], [0.99, 0.99], [0.98, 0.98], [2.01, 2.01]]
+    ).astype(dtype)
     y = np.array([1, 2, 1, 1, 2])
     radius = 0.1
 
-    z1 = np.array([[1.01, 1.01], [2.01, 2.01]])  # no outliers
-    z2 = np.array([[1.4, 1.4], [1.01, 1.01], [2.01, 2.01]])  # one outlier
+    z1 = np.array([[1.01, 1.01], [2.01, 2.01]]).astype(dtype)  # no outliers
+    z2 = np.array([[1.4, 1.4], [1.01, 1.01], [2.01, 2.01]]).astype(dtype)  # one outlier
     correct_labels1 = np.array([1, 2])
     correct_labels2 = np.array([-1, 1, 2])
     outlier_proba = np.array([0, 0])
 
-    weight_func = _weight_func
-
-    for algorithm in ALGORITHMS:
-        for weights in ["uniform", "distance", weight_func]:
-            clf = neighbors.RadiusNeighborsClassifier(
-                radius=radius, weights=weights, algorithm=algorithm, outlier_label=-1
-            )
-            clf.fit(X, y)
-            assert_array_equal(correct_labels1, clf.predict(z1))
-            assert_array_equal(correct_labels2, clf.predict(z2))
-            assert_array_equal(outlier_proba, clf.predict_proba(z2)[0])
+    clf = neighbors.RadiusNeighborsClassifier(
+        radius=radius, weights=weights, algorithm=algorithm, outlier_label=-1
+    )
+    clf.fit(X, y)
+    assert_array_equal(correct_labels1, clf.predict(z1))
+    assert_array_equal(correct_labels2, clf.predict(z2))
+    assert_array_equal(outlier_proba, clf.predict_proba(z2)[0])
 
     # test outlier_labeling of using predict_proba()
     RNC = neighbors.RadiusNeighborsClassifier
-    X = np.array([[0], [1], [2], [3], [4], [5], [6], [7], [8], [9]])
+    X = np.array([[0], [1], [2], [3], [4], [5], [6], [7], [8], [9]]).astype(dtype)
     y = np.array([0, 2, 2, 1, 1, 1, 3, 3, 3, 3])
 
     # test outlier_label scalar verification
@@ -1450,13 +1482,14 @@ def test_neighbors_badargs():
         - set(["pyfunc", *BOOL_METRICS])
     ),
 )
+@pytest.mark.parametrize("dtype", DTYPES)
 def test_neighbors_metrics(
-    metric, n_samples=20, n_features=3, n_query_pts=2, n_neighbors=5
+    dtype, metric, n_samples=20, n_features=3, n_query_pts=2, n_neighbors=5
 ):
     # Test computing the neighbors for various metrics
     algorithms = ["brute", "ball_tree", "kd_tree"]
-    X_train = rng.rand(n_samples, n_features)
-    X_test = rng.rand(n_query_pts, n_features)
+    X_train = rng.rand(n_samples, n_features).astype(dtype)
+    X_test = rng.rand(n_query_pts, n_features).astype(dtype)
 
     metric_params_list = _generate_test_params_for(metric, n_features)
 
@@ -1496,7 +1529,12 @@ def test_neighbors_metrics(
                 and algorithm == "brute"
                 and sp_version >= parse_version("1.6.0")
             ):
-                ExceptionToAssert = FutureWarning
+                if dtype == np.float64:
+                    # Warning from sklearn.metrics._dist_metrics.WMinkowskiDistance
+                    ExceptionToAssert = FutureWarning
+                if dtype == np.float32:
+                    # Warning from Scipy
+                    ExceptionToAssert = DeprecationWarning
 
             with pytest.warns(ExceptionToAssert):
                 results[algorithm] = neigh.kneighbors(X_test, return_distance=True)
@@ -1521,12 +1559,13 @@ def test_neighbors_metrics(
 @pytest.mark.parametrize(
     "metric", sorted(set(neighbors.VALID_METRICS["brute"]) - set(["precomputed"]))
 )
+@pytest.mark.parametrize("dtype", DTYPES)
 def test_kneighbors_brute_backend(
-    metric, n_samples=2000, n_features=30, n_query_pts=100, n_neighbors=5
+    dtype, metric, n_samples=2000, n_features=30, n_query_pts=100, n_neighbors=5
 ):
     # Both backend for the 'brute' algorithm of kneighbors must give identical results.
-    X_train = rng.rand(n_samples, n_features)
-    X_test = rng.rand(n_query_pts, n_features)
+    X_train = rng.rand(n_samples, n_features).astype(dtype)
+    X_test = rng.rand(n_query_pts, n_features).astype(dtype)
 
     # Haversine distance only accepts 2D data
     if metric == "haversine":
@@ -1539,7 +1578,12 @@ def test_kneighbors_brute_backend(
     # wminkoski is deprecated in SciPy 1.6.0 and removed in 1.8.0
     ExceptionToAssert = None
     if metric == "wminkowski" and sp_version >= parse_version("1.6.0"):
-        ExceptionToAssert = FutureWarning
+        if dtype == np.float64:
+            # Warning from sklearn.metrics._dist_metrics.WMinkowskiDistance
+            ExceptionToAssert = FutureWarning
+        if dtype == np.float32:
+            # Warning from Scipy
+            ExceptionToAssert = DeprecationWarning
 
     for metric_params in metric_params_list:
         p = metric_params.pop("p", 2)
@@ -1593,8 +1637,11 @@ def custom_metric(x1, x2):
 # TODO: Remove filterwarnings in 1.3 when wminkowski is removed
 @pytest.mark.filterwarnings("ignore:WMinkowskiDistance:FutureWarning:sklearn")
 @pytest.mark.parametrize("metric", neighbors.VALID_METRICS["brute"])
-def test_valid_brute_metric_for_auto_algorithm(metric, n_samples=20, n_features=12):
-    X = rng.rand(n_samples, n_features)
+@pytest.mark.parametrize("dtype", DTYPES)
+def test_valid_brute_metric_for_auto_algorithm(
+    dtype, metric, n_samples=20, n_features=12
+):
+    X = rng.rand(n_samples, n_features).astype(dtype)
     Xcsr = csr_matrix(X)
 
     metric_params_list = _generate_test_params_for(metric, n_features)

From 7e04e7b9a5b8a9ae307219db67e7395ddb7eb3f8 Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Thu, 17 Mar 2022 18:53:10 +0100
Subject: [PATCH 2/5] TST Use global_dtype

---
 sklearn/neighbors/tests/test_neighbors.py | 172 +++++++++++-----------
 1 file changed, 83 insertions(+), 89 deletions(-)

diff --git a/sklearn/neighbors/tests/test_neighbors.py b/sklearn/neighbors/tests/test_neighbors.py
index cca85c5ca9bd3..4b83815cb1b10 100644
--- a/sklearn/neighbors/tests/test_neighbors.py
+++ b/sklearn/neighbors/tests/test_neighbors.py
@@ -42,7 +42,6 @@
 from sklearn.pipeline import make_pipeline
 from sklearn.utils._testing import (
     assert_allclose,
-    assert_array_almost_equal,
     assert_array_equal,
 )
 from sklearn.utils._testing import ignore_warnings
@@ -72,7 +71,6 @@
     set.intersection(*map(set, neighbors.VALID_METRICS.values()))
 )
 P = (1, 2, 3, 4, np.inf)
-DTYPES = (np.float64, np.float32)
 JOBLIB_BACKENDS = list(joblib.parallel.BACKENDS.keys())
 
 # Filter deprecation warnings.
@@ -143,15 +141,14 @@ def _weight_func(dist):
     ],
 )
 @pytest.mark.parametrize("query_is_train", [False, True])
-@pytest.mark.parametrize("dtype", DTYPES)
 @pytest.mark.parametrize("metric", COMMON_VALID_METRICS)
 def test_unsupervised_kneighbors(
+    global_dtype,
     n_samples,
     n_features,
     n_query_pts,
     n_neighbors,
     query_is_train,
-    dtype,
     metric,
 ):
     # The different algorithms must return identical results
@@ -160,10 +157,12 @@ def test_unsupervised_kneighbors(
 
     # Redefining the rng locally to use the same generated X
     local_rng = np.random.RandomState(0)
-    X = local_rng.rand(n_samples, n_features).astype(dtype)
+    X = local_rng.rand(n_samples, n_features).astype(global_dtype)
 
     query = (
-        X if query_is_train else local_rng.rand(n_query_pts, n_features).astype(dtype)
+        X
+        if query_is_train
+        else local_rng.rand(n_query_pts, n_features).astype(global_dtype)
     )
 
     results_nodist = []
@@ -221,7 +220,6 @@ def test_unsupervised_kneighbors(
 )
 @pytest.mark.parametrize("metric", COMMON_VALID_METRICS)
 @pytest.mark.parametrize("n_neighbors, radius", [(1, 100), (50, 500), (100, 1000)])
-@pytest.mark.parametrize("dtype", DTYPES)
 @pytest.mark.parametrize(
     "NeighborsMixinSubclass",
     [
@@ -232,13 +230,13 @@ def test_unsupervised_kneighbors(
     ],
 )
 def test_neigh_predictions_algorithm_agnosticity(
+    global_dtype,
     n_samples,
     n_features,
     n_query_pts,
     metric,
     n_neighbors,
     radius,
-    dtype,
     NeighborsMixinSubclass,
 ):
     # The different algorithms must return identical predictions results
@@ -246,10 +244,10 @@ def test_neigh_predictions_algorithm_agnosticity(
 
     # Redefining the rng locally to use the same generated X
     local_rng = np.random.RandomState(0)
-    X = local_rng.rand(n_samples, n_features).astype(dtype)
+    X = local_rng.rand(n_samples, n_features).astype(global_dtype)
     y = local_rng.randint(3, size=n_samples)
 
-    query = local_rng.rand(n_query_pts, n_features).astype(dtype)
+    query = local_rng.rand(n_query_pts, n_features).astype(global_dtype)
 
     predict_results = []
 
@@ -289,11 +287,10 @@ def test_neigh_predictions_algorithm_agnosticity(
         neighbors.NearestNeighbors,
     ],
 )
-@pytest.mark.parametrize("dtype", DTYPES)
-def test_unsupervised_inputs(KNeighborsMixinSubclass, dtype):
+def test_unsupervised_inputs(global_dtype, KNeighborsMixinSubclass):
     # Test unsupervised inputs for neighbors estimators
 
-    X = rng.random_sample((10, 3)).astype(dtype)
+    X = rng.random_sample((10, 3)).astype(global_dtype)
     y = rng.randint(3, size=10)
     nbrs_fid = neighbors.NearestNeighbors(n_neighbors=1)
     nbrs_fid.fit(X)
@@ -307,8 +304,8 @@ def test_unsupervised_inputs(KNeighborsMixinSubclass, dtype):
 
         dist2, ind2 = nbrs.kneighbors(X)
 
-        assert_array_almost_equal(dist1, dist2)
-        assert_array_almost_equal(ind1, ind2)
+        assert_allclose(dist1, dist2)
+        assert_allclose(ind1, ind2)
 
 
 def test_n_neighbors_datatype():
@@ -359,8 +356,8 @@ def check_precomputed(make_train_test, estimators):
         )
         nbrs_D.fit(DXX)
         dist_D, ind_D = getattr(nbrs_D, method)(DYX)
-        assert_array_almost_equal(dist_X, dist_D)
-        assert_array_almost_equal(ind_X, ind_D)
+        assert_allclose(dist_X, dist_D)
+        assert_allclose(ind_X, ind_D)
 
         # Check auto works too
         nbrs_D = neighbors.NearestNeighbors(
@@ -368,14 +365,14 @@ def check_precomputed(make_train_test, estimators):
         )
         nbrs_D.fit(DXX)
         dist_D, ind_D = getattr(nbrs_D, method)(DYX)
-        assert_array_almost_equal(dist_X, dist_D)
-        assert_array_almost_equal(ind_X, ind_D)
+        assert_allclose(dist_X, dist_D)
+        assert_allclose(ind_X, ind_D)
 
         # Check X=None in prediction
         dist_X, ind_X = getattr(nbrs_X, method)(None)
         dist_D, ind_D = getattr(nbrs_D, method)(None)
-        assert_array_almost_equal(dist_X, dist_D)
-        assert_array_almost_equal(ind_X, ind_D)
+        assert_allclose(dist_X, dist_D)
+        assert_allclose(ind_X, ind_D)
 
         # Must raise a ValueError if the matrix is not of correct shape
         with pytest.raises(ValueError):
@@ -388,7 +385,7 @@ def check_precomputed(make_train_test, estimators):
         pred_X = est.fit(X, target).predict(Y)
         est.metric = "precomputed"
         pred_D = est.fit(DXX, target).predict(DYX)
-        assert_array_almost_equal(pred_X, pred_D)
+        assert_allclose(pred_X, pred_D)
 
 
 def test_precomputed_dense():
@@ -527,16 +524,15 @@ def test_precomputed_cross_validation():
         assert_array_equal(metric_score, precomp_score)
 
 
-@pytest.mark.parametrize("dtype", DTYPES)
 def test_unsupervised_radius_neighbors(
-    dtype, n_samples=20, n_features=5, n_query_pts=2, radius=0.5, random_state=0
+    global_dtype, n_samples=20, n_features=5, n_query_pts=2, radius=0.5, random_state=0
 ):
     # Test unsupervised radius-based query
     rng = np.random.RandomState(random_state)
 
-    X = rng.rand(n_samples, n_features).astype(dtype)
+    X = rng.rand(n_samples, n_features).astype(global_dtype)
 
-    test = rng.rand(n_query_pts, n_features).astype(dtype)
+    test = rng.rand(n_query_pts, n_features).astype(global_dtype)
 
     for p in P:
         results = []
@@ -557,26 +553,23 @@ def test_unsupervised_radius_neighbors(
                 i1[:] = i1[j]
             results.append((dist, ind))
 
-            assert_array_almost_equal(
-                np.concatenate(list(ind)), np.concatenate(list(ind1))
-            )
+            assert_allclose(np.concatenate(list(ind)), np.concatenate(list(ind1)))
 
         for i in range(len(results) - 1):
-            assert_array_almost_equal(
+            assert_allclose(
                 np.concatenate(list(results[i][0])),
                 np.concatenate(list(results[i + 1][0])),
             ),
-            assert_array_almost_equal(
+            assert_allclose(
                 np.concatenate(list(results[i][1])),
                 np.concatenate(list(results[i + 1][1])),
             )
 
 
-@pytest.mark.parametrize("dtype", DTYPES)
 @pytest.mark.parametrize("algorithm", ALGORITHMS)
 @pytest.mark.parametrize("weights", WEIGHTS)
 def test_kneighbors_classifier(
-    dtype,
+    global_dtype,
     algorithm,
     weights,
     n_samples=40,
@@ -587,7 +580,7 @@ def test_kneighbors_classifier(
 ):
     # Test k-neighbors classification
     rng = np.random.RandomState(random_state)
-    X = 2 * rng.rand(n_samples, n_features).astype(dtype) - 1
+    X = 2 * rng.rand(n_samples, n_features).astype(global_dtype) - 1
     y = ((X**2).sum(axis=1) < 0.5).astype(int)
     y_str = y.astype(str)
 
@@ -604,13 +597,17 @@ def test_kneighbors_classifier(
     assert_array_equal(y_pred, y_str[:n_test_pts])
 
 
-@pytest.mark.parametrize("dtype", DTYPES)
 def test_kneighbors_classifier_float_labels(
-    dtype, n_samples=40, n_features=5, n_test_pts=10, n_neighbors=5, random_state=0
+    global_dtype,
+    n_samples=40,
+    n_features=5,
+    n_test_pts=10,
+    n_neighbors=5,
+    random_state=0,
 ):
     # Test k-neighbors classification
     rng = np.random.RandomState(random_state)
-    X = 2 * rng.rand(n_samples, n_features).astype(dtype) - 1
+    X = 2 * rng.rand(n_samples, n_features).astype(global_dtype) - 1
     y = ((X**2).sum(axis=1) < 0.5).astype(int)
 
     knn = neighbors.KNeighborsClassifier(n_neighbors=n_neighbors)
@@ -620,12 +617,11 @@ def test_kneighbors_classifier_float_labels(
     assert_array_equal(y_pred, y[:n_test_pts])
 
 
-@pytest.mark.parametrize("dtype", DTYPES)
-def test_kneighbors_classifier_predict_proba(dtype):
+def test_kneighbors_classifier_predict_proba(global_dtype):
     # Test KNeighborsClassifier.predict_proba() method
     X = np.array(
         [[0, 2, 0], [0, 2, 1], [2, 0, 0], [2, 2, 0], [0, 0, 2], [0, 0, 1]]
-    ).astype(dtype)
+    ).astype(global_dtype)
     y = np.array([4, 4, 5, 5, 1, 1])
     cls = neighbors.KNeighborsClassifier(n_neighbors=3, p=1)  # cityblock dist
     cls.fit(X, y)
@@ -650,14 +646,13 @@ def test_kneighbors_classifier_predict_proba(dtype):
     cls.fit(X, y)
     y_prob = cls.predict_proba(np.array([[0, 2, 0], [2, 2, 2]]))
     real_prob = np.array([[0, 1, 0], [0, 0.4, 0.6]])
-    assert_array_almost_equal(real_prob, y_prob)
+    assert_allclose(real_prob, y_prob)
 
 
-@pytest.mark.parametrize("dtype", DTYPES)
 @pytest.mark.parametrize("algorithm", ALGORITHMS)
 @pytest.mark.parametrize("weights", WEIGHTS)
 def test_radius_neighbors_classifier(
-    dtype,
+    global_dtype,
     algorithm,
     weights,
     n_samples=40,
@@ -668,7 +663,7 @@ def test_radius_neighbors_classifier(
 ):
     # Test radius-based classification
     rng = np.random.RandomState(random_state)
-    X = 2 * rng.rand(n_samples, n_features).astype(dtype) - 1
+    X = 2 * rng.rand(n_samples, n_features).astype(global_dtype) - 1
     y = ((X**2).sum(axis=1) < 0.5).astype(int)
     y_str = y.astype(str)
 
@@ -699,22 +694,21 @@ def test_radius_neighbors_classifier_kwargs_is_deprecated():
         neighbors.RadiusNeighborsClassifier(**extra_kwargs)
 
 
-@pytest.mark.parametrize("dtype", DTYPES)
 @pytest.mark.parametrize("algorithm", ALGORITHMS)
 @pytest.mark.parametrize("weights", WEIGHTS)
 @pytest.mark.parametrize("outlier_label", [0, -1, None])
 def test_radius_neighbors_classifier_when_no_neighbors(
-    dtype, algorithm, weights, outlier_label
+    global_dtype, algorithm, weights, outlier_label
 ):
     # Test radius-based classifier when no neighbors found.
     # In this case it should rise an informative exception
 
-    X = np.array([[1.0, 1.0], [2.0, 2.0]]).astype(dtype)
+    X = np.array([[1.0, 1.0], [2.0, 2.0]]).astype(global_dtype)
     y = np.array([1, 2])
     radius = 0.1
 
-    z1 = np.array([[1.01, 1.01], [2.01, 2.01]]).astype(dtype)  # no outliers
-    z2 = np.array([[1.01, 1.01], [1.4, 1.4]]).astype(dtype)  # one outlier
+    z1 = np.array([[1.01, 1.01], [2.01, 2.01]]).astype(global_dtype)  # no outliers
+    z2 = np.array([[1.01, 1.01], [1.4, 1.4]]).astype(global_dtype)  # one outlier
 
     rnc = neighbors.RadiusNeighborsClassifier
     clf = rnc(
@@ -730,21 +724,22 @@ def test_radius_neighbors_classifier_when_no_neighbors(
             clf.predict(z2)
 
 
-@pytest.mark.parametrize("dtype", DTYPES)
 @pytest.mark.parametrize("algorithm", ALGORITHMS)
 @pytest.mark.parametrize("weights", WEIGHTS)
-def test_radius_neighbors_classifier_outlier_labeling(dtype, algorithm, weights):
+def test_radius_neighbors_classifier_outlier_labeling(global_dtype, algorithm, weights):
     # Test radius-based classifier when no neighbors found and outliers
     # are labeled.
 
     X = np.array(
         [[1.0, 1.0], [2.0, 2.0], [0.99, 0.99], [0.98, 0.98], [2.01, 2.01]]
-    ).astype(dtype)
+    ).astype(global_dtype)
     y = np.array([1, 2, 1, 1, 2])
     radius = 0.1
 
-    z1 = np.array([[1.01, 1.01], [2.01, 2.01]]).astype(dtype)  # no outliers
-    z2 = np.array([[1.4, 1.4], [1.01, 1.01], [2.01, 2.01]]).astype(dtype)  # one outlier
+    z1 = np.array([[1.01, 1.01], [2.01, 2.01]]).astype(global_dtype)  # no outliers
+    z2 = np.array([[1.4, 1.4], [1.01, 1.01], [2.01, 2.01]]).astype(
+        global_dtype
+    )  # one outlier
     correct_labels1 = np.array([1, 2])
     correct_labels2 = np.array([-1, 1, 2])
     outlier_proba = np.array([0, 0])
@@ -759,7 +754,9 @@ def test_radius_neighbors_classifier_outlier_labeling(dtype, algorithm, weights)
 
     # test outlier_labeling of using predict_proba()
     RNC = neighbors.RadiusNeighborsClassifier
-    X = np.array([[0], [1], [2], [3], [4], [5], [6], [7], [8], [9]]).astype(dtype)
+    X = np.array([[0], [1], [2], [3], [4], [5], [6], [7], [8], [9]]).astype(
+        global_dtype
+    )
     y = np.array([0, 2, 2, 1, 1, 1, 3, 3, 3, 3])
 
     # test outlier_label scalar verification
@@ -896,7 +893,7 @@ def test_neighbors_regressors_zero_distance():
                 radius=radius, weights=weights, algorithm=algorithm
             )
             rnn.fit(X, y)
-            assert_array_almost_equal(rnn_correct_labels, rnn.predict(z))
+            assert_allclose(rnn_correct_labels, rnn.predict(z))
 
         for weights, corr_labels in zip(
             ["uniform", "distance"], [knn_correct_unif, knn_correct_dist]
@@ -905,7 +902,7 @@ def test_neighbors_regressors_zero_distance():
                 n_neighbors=2, weights=weights, algorithm=algorithm
             )
             knn.fit(X, y)
-            assert_array_almost_equal(corr_labels, knn.predict(z))
+            assert_allclose(corr_labels, knn.predict(z))
 
 
 def test_radius_neighbors_boundary_handling():
@@ -1037,7 +1034,7 @@ def test_RadiusNeighborsClassifier_multioutput():
         y_pred_mo = rnn_mo.predict(X_test)
 
         assert y_pred_mo.shape == y_test.shape
-        assert_array_almost_equal(y_pred_mo, y_pred_so)
+        assert_allclose(y_pred_mo, y_pred_so)
 
 
 def test_kneighbors_classifier_sparse(
@@ -1094,14 +1091,14 @@ def test_KNeighborsClassifier_multioutput():
         y_pred_mo = knn_mo.predict(X_test)
 
         assert y_pred_mo.shape == y_test.shape
-        assert_array_almost_equal(y_pred_mo, y_pred_so)
+        assert_allclose(y_pred_mo, y_pred_so)
 
         # Check proba
         y_pred_proba_mo = knn_mo.predict_proba(X_test)
         assert len(y_pred_proba_mo) == n_output
 
         for proba_mo, proba_so in zip(y_pred_proba_mo, y_pred_proba_so):
-            assert_array_almost_equal(proba_mo, proba_so)
+            assert_allclose(proba_mo, proba_so)
 
 
 def test_kneighbors_regressor(
@@ -1150,7 +1147,7 @@ def test_KNeighborsRegressor_multioutput_uniform_weight():
 
         assert y_pred.shape == y_test.shape
         assert y_pred_idx.shape == y_test.shape
-        assert_array_almost_equal(y_pred, y_pred_idx)
+        assert_allclose(y_pred, y_pred_idx)
 
 
 def test_kneighbors_regressor_multioutput(
@@ -1242,7 +1239,7 @@ def test_RadiusNeighborsRegressor_multioutput_with_uniform_weight():
 
         assert y_pred_idx.shape == y_test.shape
         assert y_pred.shape == y_test.shape
-        assert_array_almost_equal(y_pred, y_pred_idx)
+        assert_allclose(y_pred, y_pred_idx)
 
 
 def test_RadiusNeighborsRegressor_multioutput(
@@ -1349,7 +1346,7 @@ def test_kneighbors_graph():
     assert_array_equal(A.toarray(), np.eye(A.shape[0]))
 
     A = neighbors.kneighbors_graph(X, 1, mode="distance")
-    assert_array_almost_equal(
+    assert_allclose(
         A.toarray(), [[0.00, 1.01, 0.0], [1.01, 0.0, 0.0], [0.00, 1.40716026, 0.0]]
     )
 
@@ -1358,7 +1355,7 @@ def test_kneighbors_graph():
     assert_array_equal(A.toarray(), [[1.0, 1.0, 0.0], [1.0, 1.0, 0.0], [0.0, 1.0, 1.0]])
 
     A = neighbors.kneighbors_graph(X, 2, mode="distance")
-    assert_array_almost_equal(
+    assert_allclose(
         A.toarray(),
         [
             [0.0, 1.01, 2.23606798],
@@ -1369,7 +1366,7 @@ def test_kneighbors_graph():
 
     # n_neighbors = 3
     A = neighbors.kneighbors_graph(X, 3, mode="connectivity", include_self=True)
-    assert_array_almost_equal(A.toarray(), [[1, 1, 1], [1, 1, 1], [1, 1, 1]])
+    assert_allclose(A.toarray(), [[1, 1, 1], [1, 1, 1], [1, 1, 1]])
 
 
 @pytest.mark.parametrize("n_neighbors", [1, 2, 3])
@@ -1381,7 +1378,7 @@ def test_kneighbors_graph_sparse(n_neighbors, mode, seed=36):
     X = rng.randn(10, 10)
     Xcsr = csr_matrix(X)
 
-    assert_array_almost_equal(
+    assert_allclose(
         neighbors.kneighbors_graph(X, n_neighbors, mode=mode).toarray(),
         neighbors.kneighbors_graph(Xcsr, n_neighbors, mode=mode).toarray(),
     )
@@ -1395,7 +1392,7 @@ def test_radius_neighbors_graph():
     assert_array_equal(A.toarray(), [[1.0, 1.0, 0.0], [1.0, 1.0, 1.0], [0.0, 1.0, 1.0]])
 
     A = neighbors.radius_neighbors_graph(X, 1.5, mode="distance")
-    assert_array_almost_equal(
+    assert_allclose(
         A.toarray(), [[0.0, 1.01, 0.0], [1.01, 0.0, 1.40716026], [0.0, 1.40716026, 0.0]]
     )
 
@@ -1409,7 +1406,7 @@ def test_radius_neighbors_graph_sparse(n_neighbors, mode, seed=36):
     X = rng.randn(10, 10)
     Xcsr = csr_matrix(X)
 
-    assert_array_almost_equal(
+    assert_allclose(
         neighbors.radius_neighbors_graph(X, n_neighbors, mode=mode).toarray(),
         neighbors.radius_neighbors_graph(Xcsr, n_neighbors, mode=mode).toarray(),
     )
@@ -1488,14 +1485,13 @@ def test_neighbors_badargs():
         - set(["pyfunc", *BOOL_METRICS])
     ),
 )
-@pytest.mark.parametrize("dtype", DTYPES)
 def test_neighbors_metrics(
-    dtype, metric, n_samples=20, n_features=3, n_query_pts=2, n_neighbors=5
+    global_dtype, metric, n_samples=20, n_features=3, n_query_pts=2, n_neighbors=5
 ):
     # Test computing the neighbors for various metrics
     algorithms = ["brute", "ball_tree", "kd_tree"]
-    X_train = rng.rand(n_samples, n_features).astype(dtype)
-    X_test = rng.rand(n_query_pts, n_features).astype(dtype)
+    X_train = rng.rand(n_samples, n_features).astype(global_dtype)
+    X_test = rng.rand(n_query_pts, n_features).astype(global_dtype)
 
     metric_params_list = _generate_test_params_for(metric, n_features)
 
@@ -1535,10 +1531,10 @@ def test_neighbors_metrics(
                 and algorithm == "brute"
                 and sp_version >= parse_version("1.6.0")
             ):
-                if dtype == np.float64:
+                if global_dtype == np.float64:
                     # Warning from sklearn.metrics._dist_metrics.WMinkowskiDistance
                     ExceptionToAssert = FutureWarning
-                if dtype == np.float32:
+                if global_dtype == np.float32:
                     # Warning from Scipy
                     ExceptionToAssert = DeprecationWarning
 
@@ -1565,13 +1561,12 @@ def test_neighbors_metrics(
 @pytest.mark.parametrize(
     "metric", sorted(set(neighbors.VALID_METRICS["brute"]) - set(["precomputed"]))
 )
-@pytest.mark.parametrize("dtype", DTYPES)
 def test_kneighbors_brute_backend(
-    dtype, metric, n_samples=2000, n_features=30, n_query_pts=100, n_neighbors=5
+    global_dtype, metric, n_samples=2000, n_features=30, n_query_pts=100, n_neighbors=5
 ):
     # Both backend for the 'brute' algorithm of kneighbors must give identical results.
-    X_train = rng.rand(n_samples, n_features).astype(dtype)
-    X_test = rng.rand(n_query_pts, n_features).astype(dtype)
+    X_train = rng.rand(n_samples, n_features).astype(global_dtype)
+    X_test = rng.rand(n_query_pts, n_features).astype(global_dtype)
 
     # Haversine distance only accepts 2D data
     if metric == "haversine":
@@ -1584,10 +1579,10 @@ def test_kneighbors_brute_backend(
     # wminkoski is deprecated in SciPy 1.6.0 and removed in 1.8.0
     ExceptionToAssert = None
     if metric == "wminkowski" and sp_version >= parse_version("1.6.0"):
-        if dtype == np.float64:
+        if global_dtype == np.float64:
             # Warning from sklearn.metrics._dist_metrics.WMinkowskiDistance
             ExceptionToAssert = FutureWarning
-        if dtype == np.float32:
+        if global_dtype == np.float32:
             # Warning from Scipy
             ExceptionToAssert = DeprecationWarning
 
@@ -1637,17 +1632,16 @@ def custom_metric(x1, x2):
     dist1, ind1 = nbrs1.kneighbors(X)
     dist2, ind2 = nbrs2.kneighbors(X)
 
-    assert_array_almost_equal(dist1, dist2)
+    assert_allclose(dist1, dist2)
 
 
 # TODO: Remove filterwarnings in 1.3 when wminkowski is removed
 @pytest.mark.filterwarnings("ignore:WMinkowskiDistance:FutureWarning:sklearn")
 @pytest.mark.parametrize("metric", neighbors.VALID_METRICS["brute"])
-@pytest.mark.parametrize("dtype", DTYPES)
 def test_valid_brute_metric_for_auto_algorithm(
-    dtype, metric, n_samples=20, n_features=12
+    global_dtype, metric, n_samples=20, n_features=12
 ):
-    X = rng.rand(n_samples, n_features).astype(dtype)
+    X = rng.rand(n_samples, n_features).astype(global_dtype)
     Xcsr = csr_matrix(X)
 
     metric_params_list = _generate_test_params_for(metric, n_features)
@@ -1892,9 +1886,9 @@ def test_same_knn_parallel(algorithm):
     graph_parallel = clf.kneighbors_graph(X_test, mode="distance").toarray()
 
     assert_array_equal(y, y_parallel)
-    assert_array_almost_equal(dist, dist_parallel)
+    assert_allclose(dist, dist_parallel)
     assert_array_equal(ind, ind_parallel)
-    assert_array_almost_equal(graph, graph_parallel)
+    assert_allclose(graph, graph_parallel)
 
 
 @pytest.mark.parametrize("algorithm", ALGORITHMS)
@@ -1918,9 +1912,9 @@ def test_same_radius_neighbors_parallel(algorithm):
 
     assert_array_equal(y, y_parallel)
     for i in range(len(dist)):
-        assert_array_almost_equal(dist[i], dist_parallel[i])
+        assert_allclose(dist[i], dist_parallel[i])
         assert_array_equal(ind[i], ind_parallel[i])
-    assert_array_almost_equal(graph, graph_parallel)
+    assert_allclose(graph, graph_parallel)
 
 
 @pytest.mark.parametrize("backend", JOBLIB_BACKENDS)
@@ -2054,7 +2048,7 @@ def test_pipeline_with_nearest_neighbors_transformer():
 
         y_pred_chain = reg_chain.fit(X, y).predict(X2)
         y_pred_compact = reg_compact.fit(X, y).predict(X2)
-        assert_array_almost_equal(y_pred_chain, y_pred_compact)
+        assert_allclose(y_pred_chain, y_pred_compact)
 
 
 @pytest.mark.parametrize(

From 35ff90893cf86e8ccc69242c4bef6e804ed39552 Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Tue, 29 Mar 2022 13:03:35 +0200
Subject: [PATCH 3/5] Apply review comments
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Jérémie du Boisberranger
---
 sklearn/neighbors/tests/test_neighbors.py | 25 ++++++++---------------
 1 file changed, 9 insertions(+), 16 deletions(-)

diff --git a/sklearn/neighbors/tests/test_neighbors.py b/sklearn/neighbors/tests/test_neighbors.py
index 4b83815cb1b10..cfe67a4a799d0 100644
--- a/sklearn/neighbors/tests/test_neighbors.py
+++ b/sklearn/neighbors/tests/test_neighbors.py
@@ -274,8 +274,6 @@ def test_neigh_predictions_algorithm_agnosticity(
                 f"The '{algorithm}' and '{next_algorithm}' "
                 "algorithms return different predictions."
             ),
-            rtol=1e-7,
-            atol=1e-7,
         )
 
 
@@ -305,7 +303,7 @@ def test_unsupervised_inputs(global_dtype, KNeighborsMixinSubclass):
         dist2, ind2 = nbrs.kneighbors(X)
 
         assert_allclose(dist1, dist2)
-        assert_allclose(ind1, ind2)
+        assert_array_equal(ind1, ind2)
 
 
 def test_n_neighbors_datatype():
@@ -357,7 +355,7 @@ def check_precomputed(make_train_test, estimators):
         nbrs_D.fit(DXX)
         dist_D, ind_D = getattr(nbrs_D, method)(DYX)
         assert_allclose(dist_X, dist_D)
-        assert_allclose(ind_X, ind_D)
+        assert_array_equal(ind_X, ind_D)
 
         # Check auto works too
         nbrs_D = neighbors.NearestNeighbors(
@@ -366,13 +364,13 @@ def check_precomputed(make_train_test, estimators):
         nbrs_D.fit(DXX)
         dist_D, ind_D = getattr(nbrs_D, method)(DYX)
         assert_allclose(dist_X, dist_D)
-        assert_allclose(ind_X, ind_D)
+        assert_array_equal(ind_X, ind_D)
 
         # Check X=None in prediction
         dist_X, ind_X = getattr(nbrs_X, method)(None)
         dist_D, ind_D = getattr(nbrs_D, method)(None)
         assert_allclose(dist_X, dist_D)
-        assert_allclose(ind_X, ind_D)
+        assert_array_equal(ind_X, ind_D)
 
         # Must raise a ValueError if the matrix is not of correct shape
         with pytest.raises(ValueError):
@@ -750,7 +748,7 @@ def test_radius_neighbors_classifier_outlier_labeling(global_dtype, algorithm, w
     clf.fit(X, y)
     assert_array_equal(correct_labels1, clf.predict(z1))
     assert_array_equal(correct_labels2, clf.predict(z2))
-    assert_array_equal(outlier_proba, clf.predict_proba(z2)[0])
+    assert_allclose(outlier_proba, clf.predict_proba(z2)[0])
 
     # test outlier_labeling of using predict_proba()
     RNC = neighbors.RadiusNeighborsClassifier
@@ -1034,7 +1032,7 @@ def test_RadiusNeighborsClassifier_multioutput():
         y_pred_mo = rnn_mo.predict(X_test)
 
         assert y_pred_mo.shape == y_test.shape
-        assert_allclose(y_pred_mo, y_pred_so)
+        assert_array_equal(y_pred_mo, y_pred_so)
 
 
 def test_kneighbors_classifier_sparse(
@@ -1091,14 +1089,14 @@ def test_KNeighborsClassifier_multioutput():
         y_pred_mo = knn_mo.predict(X_test)
 
         assert y_pred_mo.shape == y_test.shape
-        assert_allclose(y_pred_mo, y_pred_so)
+        assert_array_equal(y_pred_mo, y_pred_so)
 
         # Check proba
         y_pred_proba_mo = knn_mo.predict_proba(X_test)
         assert len(y_pred_proba_mo) == n_output
 
         for proba_mo, proba_so in zip(y_pred_proba_mo, y_pred_proba_so):
-            assert_allclose(proba_mo, proba_so)
+            assert_array_equal(proba_mo, proba_so)
 
 
 def test_kneighbors_regressor(
@@ -1531,12 +1529,7 @@ def test_neighbors_metrics(
                 and algorithm == "brute"
                 and sp_version >= parse_version("1.6.0")
             ):
-                if global_dtype == np.float64:
-                    # Warning from sklearn.metrics._dist_metrics.WMinkowskiDistance
-                    ExceptionToAssert = FutureWarning
-                if global_dtype == np.float32:
-                    # Warning from Scipy
-                    ExceptionToAssert = DeprecationWarning
+                ExceptionToAssert = FutureWarning
 
             with pytest.warns(ExceptionToAssert):
                 results[algorithm] = neigh.kneighbors(X_test, return_distance=True)

From 382a47294633bedd6114f8c80649914bb009ffb2 Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Wed, 30 Mar 2022 10:15:54 +0200
Subject: [PATCH 4/5] Don't copy on same dtype

---
 sklearn/neighbors/tests/test_neighbors.py | 52 +++++++++++++----------
 1 file changed, 29 insertions(+), 23 deletions(-)

diff --git a/sklearn/neighbors/tests/test_neighbors.py b/sklearn/neighbors/tests/test_neighbors.py
index cfe67a4a799d0..d662075c62ea0 100644
--- a/sklearn/neighbors/tests/test_neighbors.py
+++ b/sklearn/neighbors/tests/test_neighbors.py
@@ -157,12 +157,12 @@ def test_unsupervised_kneighbors(
 
     # Redefining the rng locally to use the same generated X
     local_rng = np.random.RandomState(0)
-    X = local_rng.rand(n_samples, n_features).astype(global_dtype)
+    X = local_rng.rand(n_samples, n_features).astype(global_dtype, copy=False)
 
     query = (
         X
         if query_is_train
-        else local_rng.rand(n_query_pts, n_features).astype(global_dtype)
+        else local_rng.rand(n_query_pts, n_features).astype(global_dtype, copy=False)
     )
 
     results_nodist = []
@@ -244,10 +244,10 @@ def test_neigh_predictions_algorithm_agnosticity(
 
     # Redefining the rng locally to use the same generated X
     local_rng = np.random.RandomState(0)
-    X = local_rng.rand(n_samples, n_features).astype(global_dtype)
+    X = local_rng.rand(n_samples, n_features).astype(global_dtype, copy=False)
     y = local_rng.randint(3, size=n_samples)
 
-    query = local_rng.rand(n_query_pts, n_features).astype(global_dtype)
+    query = local_rng.rand(n_query_pts, n_features).astype(global_dtype, copy=False)
 
     predict_results = []
 
@@ -288,7 +288,7 @@ def test_neigh_predictions_algorithm_agnosticity(
 def test_unsupervised_inputs(global_dtype, KNeighborsMixinSubclass):
     # Test unsupervised inputs for neighbors estimators
 
-    X = rng.random_sample((10, 3)).astype(global_dtype)
+    X = rng.random_sample((10, 3)).astype(global_dtype, copy=False)
     y = rng.randint(3, size=10)
     nbrs_fid = neighbors.NearestNeighbors(n_neighbors=1)
     nbrs_fid.fit(X)
@@ -528,9 +528,9 @@ def test_unsupervised_radius_neighbors(
     # Test unsupervised radius-based query
     rng = np.random.RandomState(random_state)
 
-    X = rng.rand(n_samples, n_features).astype(global_dtype)
+    X = rng.rand(n_samples, n_features).astype(global_dtype, copy=False)
 
-    test = rng.rand(n_query_pts, n_features).astype(global_dtype)
+    test = rng.rand(n_query_pts, n_features).astype(global_dtype, copy=False)
 
     for p in P:
         results = []
@@ -578,7 +578,7 @@ def test_kneighbors_classifier(
 ):
     # Test k-neighbors classification
     rng = np.random.RandomState(random_state)
-    X = 2 * rng.rand(n_samples, n_features).astype(global_dtype) - 1
+    X = 2 * rng.rand(n_samples, n_features).astype(global_dtype, copy=False) - 1
     y = ((X**2).sum(axis=1) < 0.5).astype(int)
     y_str = y.astype(str)
 
@@ -605,7 +605,7 @@ def test_kneighbors_classifier_float_labels(
 ):
     # Test k-neighbors classification
     rng = np.random.RandomState(random_state)
-    X = 2 * rng.rand(n_samples, n_features).astype(global_dtype) - 1
+    X = 2 * rng.rand(n_samples, n_features).astype(global_dtype, copy=False) - 1
     y = ((X**2).sum(axis=1) < 0.5).astype(int)
 
     knn = neighbors.KNeighborsClassifier(n_neighbors=n_neighbors)
@@ -619,7 +619,7 @@ def test_kneighbors_classifier_predict_proba(global_dtype):
     # Test KNeighborsClassifier.predict_proba() method
     X = np.array(
         [[0, 2, 0], [0, 2, 1], [2, 0, 0], [2, 2, 0], [0, 0, 2], [0, 0, 1]]
-    ).astype(global_dtype)
+    ).astype(global_dtype, copy=False)
     y = np.array([4, 4, 5, 5, 1, 1])
     cls = neighbors.KNeighborsClassifier(n_neighbors=3, p=1)  # cityblock dist
     cls.fit(X, y)
@@ -661,7 +661,7 @@ def test_radius_neighbors_classifier(
 ):
     # Test radius-based classification
     rng = np.random.RandomState(random_state)
-    X = 2 * rng.rand(n_samples, n_features).astype(global_dtype) - 1
+    X = 2 * rng.rand(n_samples, n_features).astype(global_dtype, copy=False) - 1
     y = ((X**2).sum(axis=1) < 0.5).astype(int)
     y_str = y.astype(str)
 
@@ -701,12 +701,16 @@ def test_radius_neighbors_classifier_when_no_neighbors(
     # Test radius-based classifier when no neighbors found.
     # In this case it should rise an informative exception
 
-    X = np.array([[1.0, 1.0], [2.0, 2.0]]).astype(global_dtype)
+    X = np.array([[1.0, 1.0], [2.0, 2.0]]).astype(global_dtype, copy=False)
     y = np.array([1, 2])
     radius = 0.1
 
-    z1 = np.array([[1.01, 1.01], [2.01, 2.01]]).astype(global_dtype)  # no outliers
-    z2 = np.array([[1.01, 1.01], [1.4, 1.4]]).astype(global_dtype)  # one outlier
+    z1 = np.array([[1.01, 1.01], [2.01, 2.01]]).astype(
+        global_dtype, copy=False
+    )  # no outliers
+    z2 = np.array([[1.01, 1.01], [1.4, 1.4]]).astype(
+        global_dtype, copy=False
+    )  # one outlier
 
     rnc = neighbors.RadiusNeighborsClassifier
     clf = rnc(
@@ -730,13 +734,15 @@ def test_radius_neighbors_classifier_outlier_labeling(global_dtype, algorithm, w
 
     X = np.array(
         [[1.0, 1.0], [2.0, 2.0], [0.99, 0.99], [0.98, 0.98], [2.01, 2.01]]
-    ).astype(global_dtype)
+    ).astype(global_dtype, copy=False)
     y = np.array([1, 2, 1, 1, 2])
     radius = 0.1
 
-    z1 = np.array([[1.01, 1.01], [2.01, 2.01]]).astype(global_dtype)  # no outliers
+    z1 = np.array([[1.01, 1.01], [2.01, 2.01]]).astype(
+        global_dtype, copy=False
+    )  # no outliers
     z2 = np.array([[1.4, 1.4], [1.01, 1.01], [2.01, 2.01]]).astype(
-        global_dtype
+        global_dtype, copy=False
     )  # one outlier
     correct_labels1 = np.array([1, 2])
     correct_labels2 = np.array([-1, 1, 2])
@@ -753,7 +759,7 @@ def test_radius_neighbors_classifier_outlier_labeling(global_dtype, algorithm, w
     # test outlier_labeling of using predict_proba()
     RNC = neighbors.RadiusNeighborsClassifier
     X = np.array([[0], [1], [2], [3], [4], [5], [6], [7], [8], [9]]).astype(
-        global_dtype
+        global_dtype, copy=False
     )
     y = np.array([0, 2, 2, 1, 1, 1, 3, 3, 3, 3])
 
@@ -1488,8 +1494,8 @@ def test_neighbors_metrics(
 ):
     # Test computing the neighbors for various metrics
     algorithms = ["brute", "ball_tree", "kd_tree"]
-    X_train = rng.rand(n_samples, n_features).astype(global_dtype)
-    X_test = rng.rand(n_query_pts, n_features).astype(global_dtype)
+    X_train = rng.rand(n_samples, n_features).astype(global_dtype, copy=False)
+    X_test = rng.rand(n_query_pts, n_features).astype(global_dtype, copy=False)
 
     metric_params_list = _generate_test_params_for(metric, n_features)
 
@@ -1558,8 +1564,8 @@ def test_kneighbors_brute_backend(
     global_dtype, metric, n_samples=2000, n_features=30, n_query_pts=100, n_neighbors=5
 ):
     # Both backend for the 'brute' algorithm of kneighbors must give identical results.
-    X_train = rng.rand(n_samples, n_features).astype(global_dtype)
-    X_test = rng.rand(n_query_pts, n_features).astype(global_dtype)
+    X_train = rng.rand(n_samples, n_features).astype(global_dtype, copy=False)
+    X_test = rng.rand(n_query_pts, n_features).astype(global_dtype, copy=False)
 
     # Haversine distance only accepts 2D data
     if metric == "haversine":
@@ -1634,7 +1640,7 @@ def custom_metric(x1, x2):
 def test_valid_brute_metric_for_auto_algorithm(
     global_dtype, metric, n_samples=20, n_features=12
 ):
-    X = rng.rand(n_samples, n_features).astype(global_dtype)
+    X = rng.rand(n_samples, n_features).astype(global_dtype, copy=False)
     Xcsr = csr_matrix(X)
 
     metric_params_list = _generate_test_params_for(metric, n_features)

From 0a90e35dd786b43009a573c4d6f59686c57710f3 Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Wed, 30 Mar 2022 11:04:47 +0200
Subject: [PATCH 5/5] Pass dtype=global_dtype directly to np.array([...]) + fix
 warnings in tests

---
 sklearn/neighbors/tests/test_neighbors.py | 42 +++++++++++------------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/sklearn/neighbors/tests/test_neighbors.py b/sklearn/neighbors/tests/test_neighbors.py
index d662075c62ea0..2bbfc9dbfa07b 100644
--- a/sklearn/neighbors/tests/test_neighbors.py
+++ b/sklearn/neighbors/tests/test_neighbors.py
@@ -662,7 +662,7 @@ def test_radius_neighbors_classifier(
     # Test radius-based classification
     rng = np.random.RandomState(random_state)
     X = 2 * rng.rand(n_samples, n_features).astype(global_dtype, copy=False) - 1
-    y = ((X**2).sum(axis=1) < 0.5).astype(int)
+    y = ((X**2).sum(axis=1) < radius).astype(int)
     y_str = y.astype(str)
 
     neigh = neighbors.RadiusNeighborsClassifier(
@@ -701,16 +701,15 @@ def test_radius_neighbors_classifier_when_no_neighbors(
     # Test radius-based classifier when no neighbors found.
     # In this case it should rise an informative exception
 
-    X = np.array([[1.0, 1.0], [2.0, 2.0]]).astype(global_dtype, copy=False)
+    X = np.array([[1.0, 1.0], [2.0, 2.0]], dtype=global_dtype)
     y = np.array([1, 2])
     radius = 0.1
 
-    z1 = np.array([[1.01, 1.01], [2.01, 2.01]]).astype(
-        global_dtype, copy=False
-    )  # no outliers
-    z2 = np.array([[1.01, 1.01], [1.4, 1.4]]).astype(
-        global_dtype, copy=False
-    )  # one outlier
+    # no outliers
+    z1 = np.array([[1.01, 1.01], [2.01, 2.01]], dtype=global_dtype)
+
+    # one outlier
+    z2 = np.array([[1.01, 1.01], [1.4, 1.4]], dtype=global_dtype)
 
     rnc = neighbors.RadiusNeighborsClassifier
     clf = rnc(
@@ -733,17 +732,18 @@ def test_radius_neighbors_classifier_outlier_labeling(global_dtype, algorithm, w
     # are labeled.
 
     X = np.array(
-        [[1.0, 1.0], [2.0, 2.0], [0.99, 0.99], [0.98, 0.98], [2.01, 2.01]]
-    ).astype(global_dtype, copy=False)
+        [[1.0, 1.0], [2.0, 2.0], [0.99, 0.99], [0.98, 0.98], [2.01, 2.01]],
+        dtype=global_dtype,
+    )
     y = np.array([1, 2, 1, 1, 2])
     radius = 0.1
 
-    z1 = np.array([[1.01, 1.01], [2.01, 2.01]]).astype(
-        global_dtype, copy=False
-    )  # no outliers
-    z2 = np.array([[1.4, 1.4], [1.01, 1.01], [2.01, 2.01]]).astype(
-        global_dtype, copy=False
-    )  # one outlier
+    # no outliers
+    z1 = np.array([[1.01, 1.01], [2.01, 2.01]], dtype=global_dtype)
+
+    # one outlier
+    z2 = np.array([[1.4, 1.4], [1.01, 1.01], [2.01, 2.01]], dtype=global_dtype)
+
     correct_labels1 = np.array([1, 2])
     correct_labels2 = np.array([-1, 1, 2])
     outlier_proba = np.array([0, 0])
@@ -753,14 +753,14 @@ def test_radius_neighbors_classifier_outlier_labeling(global_dtype, algorithm, w
     )
     clf.fit(X, y)
     assert_array_equal(correct_labels1, clf.predict(z1))
-    assert_array_equal(correct_labels2, clf.predict(z2))
-    assert_allclose(outlier_proba, clf.predict_proba(z2)[0])
+    with pytest.warns(UserWarning, match="Outlier label -1 is not in training classes"):
+        assert_array_equal(correct_labels2, clf.predict(z2))
+    with pytest.warns(UserWarning, match="Outlier label -1 is not in training classes"):
+        assert_allclose(outlier_proba, clf.predict_proba(z2)[0])
 
     # test outlier_labeling of using predict_proba()
     RNC = neighbors.RadiusNeighborsClassifier
-    X = np.array([[0], [1], [2], [3], [4], [5], [6], [7], [8], [9]]).astype(
-        global_dtype, copy=False
-    )
+    X = np.array([[0], [1], [2], [3], [4], [5], [6], [7], [8], [9]], dtype=global_dtype)
     y = np.array([0, 2, 2, 1, 1, 1, 3, 3, 3, 3])
 
     # test outlier_label scalar verification