From 73ae8b2802ebe26b1e6901bb50b2462dc365d254 Mon Sep 17 00:00:00 2001 From: Julien Jerphanion Date: Thu, 3 Mar 2022 13:59:35 +0100 Subject: [PATCH 1/5] TST Adapt test_neighbors.py to test implementations on 32bit datasets --- sklearn/neighbors/tests/test_neighbors.py | 233 +++++++++++++--------- 1 file changed, 140 insertions(+), 93 deletions(-) diff --git a/sklearn/neighbors/tests/test_neighbors.py b/sklearn/neighbors/tests/test_neighbors.py index a1e0b01ef3eeb..fe436d9e28667 100644 --- a/sklearn/neighbors/tests/test_neighbors.py +++ b/sklearn/neighbors/tests/test_neighbors.py @@ -66,6 +66,7 @@ set.intersection(*map(set, neighbors.VALID_METRICS.values())) ) P = (1, 2, 3, 4, np.inf) +DTYPES = (np.float64, np.float32) JOBLIB_BACKENDS = list(joblib.parallel.BACKENDS.keys()) # Filter deprecation warnings. @@ -125,6 +126,9 @@ def _weight_func(dist): return retval**2 +WEIGHTS = ["uniform", "distance", _weight_func] + + @pytest.mark.parametrize( "n_samples, n_features, n_query_pts, n_neighbors", [ @@ -133,6 +137,7 @@ def _weight_func(dist): ], ) @pytest.mark.parametrize("query_is_train", [False, True]) +@pytest.mark.parametrize("dtype", DTYPES) @pytest.mark.parametrize("metric", COMMON_VALID_METRICS) def test_unsupervised_kneighbors( n_samples, @@ -140,6 +145,7 @@ def test_unsupervised_kneighbors( n_query_pts, n_neighbors, query_is_train, + dtype, metric, ): # The different algorithms must return identical results @@ -148,9 +154,11 @@ def test_unsupervised_kneighbors( # Redefining the rng locally to use the same generated X local_rng = np.random.RandomState(0) - X = local_rng.rand(n_samples, n_features) + X = local_rng.rand(n_samples, n_features).astype(dtype) - query = X if query_is_train else local_rng.rand(n_query_pts, n_features) + query = ( + X if query_is_train else local_rng.rand(n_query_pts, n_features).astype(dtype) + ) results_nodist = [] results = [] @@ -207,6 +215,7 @@ def test_unsupervised_kneighbors( ) @pytest.mark.parametrize("metric", COMMON_VALID_METRICS) @pytest.mark.parametrize("n_neighbors, radius", [(1, 100), (50, 500), (100, 1000)]) +@pytest.mark.parametrize("dtype", DTYPES) @pytest.mark.parametrize( "NeighborsMixinSubclass", [ @@ -223,6 +232,7 @@ def test_neigh_predictions_algorithm_agnosticity( metric, n_neighbors, radius, + dtype, NeighborsMixinSubclass, ): # The different algorithms must return identical predictions results @@ -230,10 +240,10 @@ def test_neigh_predictions_algorithm_agnosticity( # Redefining the rng locally to use the same generated X local_rng = np.random.RandomState(0) - X = local_rng.rand(n_samples, n_features) + X = local_rng.rand(n_samples, n_features).astype(dtype) y = local_rng.randint(3, size=n_samples) - query = local_rng.rand(n_query_pts, n_features) + query = local_rng.rand(n_query_pts, n_features).astype(dtype) predict_results = [] @@ -260,6 +270,8 @@ def test_neigh_predictions_algorithm_agnosticity( f"The '{algorithm}' and '{next_algorithm}' " "algorithms return different predictions." ), + rtol=1e-7, + atol=1e-7, ) @@ -271,10 +283,11 @@ def test_neigh_predictions_algorithm_agnosticity( neighbors.NearestNeighbors, ], ) -def test_unsupervised_inputs(KNeighborsMixinSubclass): +@pytest.mark.parametrize("dtype", DTYPES) +def test_unsupervised_inputs(KNeighborsMixinSubclass, dtype): # Test unsupervised inputs for neighbors estimators - X = rng.random_sample((10, 3)) + X = rng.random_sample((10, 3)).astype(dtype) y = rng.randint(3, size=10) nbrs_fid = neighbors.NearestNeighbors(n_neighbors=1) nbrs_fid.fit(X) @@ -508,15 +521,16 @@ def test_precomputed_cross_validation(): assert_array_equal(metric_score, precomp_score) +@pytest.mark.parametrize("dtype", DTYPES) def test_unsupervised_radius_neighbors( - n_samples=20, n_features=5, n_query_pts=2, radius=0.5, random_state=0 + dtype, n_samples=20, n_features=5, n_query_pts=2, radius=0.5, random_state=0 ): # Test unsupervised radius-based query rng = np.random.RandomState(random_state) - X = rng.rand(n_samples, n_features) + X = rng.rand(n_samples, n_features).astype(dtype) - test = rng.rand(n_query_pts, n_features) + test = rng.rand(n_query_pts, n_features).astype(dtype) for p in P: results = [] @@ -552,38 +566,45 @@ def test_unsupervised_radius_neighbors( ) +@pytest.mark.parametrize("dtype", DTYPES) +@pytest.mark.parametrize("algorithm", ALGORITHMS) +@pytest.mark.parametrize("weights", WEIGHTS) def test_kneighbors_classifier( - n_samples=40, n_features=5, n_test_pts=10, n_neighbors=5, random_state=0 + dtype, + algorithm, + weights, + n_samples=40, + n_features=5, + n_test_pts=10, + n_neighbors=5, + random_state=0, ): # Test k-neighbors classification rng = np.random.RandomState(random_state) - X = 2 * rng.rand(n_samples, n_features) - 1 + X = 2 * rng.rand(n_samples, n_features).astype(dtype) - 1 y = ((X**2).sum(axis=1) < 0.5).astype(int) y_str = y.astype(str) - weight_func = _weight_func - - for algorithm in ALGORITHMS: - for weights in ["uniform", "distance", weight_func]: - knn = neighbors.KNeighborsClassifier( - n_neighbors=n_neighbors, weights=weights, algorithm=algorithm - ) - knn.fit(X, y) - epsilon = 1e-5 * (2 * rng.rand(1, n_features) - 1) - y_pred = knn.predict(X[:n_test_pts] + epsilon) - assert_array_equal(y_pred, y[:n_test_pts]) - # Test prediction with y_str - knn.fit(X, y_str) - y_pred = knn.predict(X[:n_test_pts] + epsilon) - assert_array_equal(y_pred, y_str[:n_test_pts]) + knn = neighbors.KNeighborsClassifier( + n_neighbors=n_neighbors, weights=weights, algorithm=algorithm + ) + knn.fit(X, y) + epsilon = 1e-5 * (2 * rng.rand(1, n_features) - 1) + y_pred = knn.predict(X[:n_test_pts] + epsilon) + assert_array_equal(y_pred, y[:n_test_pts]) + # Test prediction with y_str + knn.fit(X, y_str) + y_pred = knn.predict(X[:n_test_pts] + epsilon) + assert_array_equal(y_pred, y_str[:n_test_pts]) +@pytest.mark.parametrize("dtype", DTYPES) def test_kneighbors_classifier_float_labels( - n_samples=40, n_features=5, n_test_pts=10, n_neighbors=5, random_state=0 + dtype, n_samples=40, n_features=5, n_test_pts=10, n_neighbors=5, random_state=0 ): # Test k-neighbors classification rng = np.random.RandomState(random_state) - X = 2 * rng.rand(n_samples, n_features) - 1 + X = 2 * rng.rand(n_samples, n_features).astype(dtype) - 1 y = ((X**2).sum(axis=1) < 0.5).astype(int) knn = neighbors.KNeighborsClassifier(n_neighbors=n_neighbors) @@ -593,9 +614,12 @@ def test_kneighbors_classifier_float_labels( assert_array_equal(y_pred, y[:n_test_pts]) -def test_kneighbors_classifier_predict_proba(): +@pytest.mark.parametrize("dtype", DTYPES) +def test_kneighbors_classifier_predict_proba(dtype): # Test KNeighborsClassifier.predict_proba() method - X = np.array([[0, 2, 0], [0, 2, 1], [2, 0, 0], [2, 2, 0], [0, 0, 2], [0, 0, 1]]) + X = np.array( + [[0, 2, 0], [0, 2, 1], [2, 0, 0], [2, 2, 0], [0, 0, 2], [0, 0, 1]] + ).astype(dtype) y = np.array([4, 4, 5, 5, 1, 1]) cls = neighbors.KNeighborsClassifier(n_neighbors=3, p=1) # cityblock dist cls.fit(X, y) @@ -623,29 +647,35 @@ def test_kneighbors_classifier_predict_proba(): assert_array_almost_equal(real_prob, y_prob) +@pytest.mark.parametrize("dtype", DTYPES) +@pytest.mark.parametrize("algorithm", ALGORITHMS) +@pytest.mark.parametrize("weights", WEIGHTS) def test_radius_neighbors_classifier( - n_samples=40, n_features=5, n_test_pts=10, radius=0.5, random_state=0 + dtype, + algorithm, + weights, + n_samples=40, + n_features=5, + n_test_pts=10, + radius=0.5, + random_state=0, ): # Test radius-based classification rng = np.random.RandomState(random_state) - X = 2 * rng.rand(n_samples, n_features) - 1 + X = 2 * rng.rand(n_samples, n_features).astype(dtype) - 1 y = ((X**2).sum(axis=1) < 0.5).astype(int) y_str = y.astype(str) - weight_func = _weight_func - - for algorithm in ALGORITHMS: - for weights in ["uniform", "distance", weight_func]: - neigh = neighbors.RadiusNeighborsClassifier( - radius=radius, weights=weights, algorithm=algorithm - ) - neigh.fit(X, y) - epsilon = 1e-5 * (2 * rng.rand(1, n_features) - 1) - y_pred = neigh.predict(X[:n_test_pts] + epsilon) - assert_array_equal(y_pred, y[:n_test_pts]) - neigh.fit(X, y_str) - y_pred = neigh.predict(X[:n_test_pts] + epsilon) - assert_array_equal(y_pred, y_str[:n_test_pts]) + neigh = neighbors.RadiusNeighborsClassifier( + radius=radius, weights=weights, algorithm=algorithm + ) + neigh.fit(X, y) + epsilon = 1e-5 * (2 * rng.rand(1, n_features) - 1) + y_pred = neigh.predict(X[:n_test_pts] + epsilon) + assert_array_equal(y_pred, y[:n_test_pts]) + neigh.fit(X, y_str) + y_pred = neigh.predict(X[:n_test_pts] + epsilon) + assert_array_equal(y_pred, y_str[:n_test_pts]) # TODO: Remove in v1.2 @@ -663,65 +693,67 @@ def test_radius_neighbors_classifier_kwargs_is_deprecated(): neighbors.RadiusNeighborsClassifier(**extra_kwargs) -def test_radius_neighbors_classifier_when_no_neighbors(): +@pytest.mark.parametrize("dtype", DTYPES) +@pytest.mark.parametrize("algorithm", ALGORITHMS) +@pytest.mark.parametrize("weights", WEIGHTS) +@pytest.mark.parametrize("outlier_label", [0, -1, None]) +def test_radius_neighbors_classifier_when_no_neighbors( + dtype, algorithm, weights, outlier_label +): # Test radius-based classifier when no neighbors found. # In this case it should rise an informative exception - X = np.array([[1.0, 1.0], [2.0, 2.0]]) + X = np.array([[1.0, 1.0], [2.0, 2.0]]).astype(dtype) y = np.array([1, 2]) radius = 0.1 - z1 = np.array([[1.01, 1.01], [2.01, 2.01]]) # no outliers - z2 = np.array([[1.01, 1.01], [1.4, 1.4]]) # one outlier + z1 = np.array([[1.01, 1.01], [2.01, 2.01]]).astype(dtype) # no outliers + z2 = np.array([[1.01, 1.01], [1.4, 1.4]]).astype(dtype) # one outlier - weight_func = _weight_func - - for outlier_label in [0, -1, None]: - for algorithm in ALGORITHMS: - for weights in ["uniform", "distance", weight_func]: - rnc = neighbors.RadiusNeighborsClassifier - clf = rnc( - radius=radius, - weights=weights, - algorithm=algorithm, - outlier_label=outlier_label, - ) - clf.fit(X, y) - assert_array_equal(np.array([1, 2]), clf.predict(z1)) - if outlier_label is None: - with pytest.raises(ValueError): - clf.predict(z2) + rnc = neighbors.RadiusNeighborsClassifier + clf = rnc( + radius=radius, + weights=weights, + algorithm=algorithm, + outlier_label=outlier_label, + ) + clf.fit(X, y) + assert_array_equal(np.array([1, 2]), clf.predict(z1)) + if outlier_label is None: + with pytest.raises(ValueError): + clf.predict(z2) -def test_radius_neighbors_classifier_outlier_labeling(): +@pytest.mark.parametrize("dtype", DTYPES) +@pytest.mark.parametrize("algorithm", ALGORITHMS) +@pytest.mark.parametrize("weights", WEIGHTS) +def test_radius_neighbors_classifier_outlier_labeling(dtype, algorithm, weights): # Test radius-based classifier when no neighbors found and outliers # are labeled. - X = np.array([[1.0, 1.0], [2.0, 2.0], [0.99, 0.99], [0.98, 0.98], [2.01, 2.01]]) + X = np.array( + [[1.0, 1.0], [2.0, 2.0], [0.99, 0.99], [0.98, 0.98], [2.01, 2.01]] + ).astype(dtype) y = np.array([1, 2, 1, 1, 2]) radius = 0.1 - z1 = np.array([[1.01, 1.01], [2.01, 2.01]]) # no outliers - z2 = np.array([[1.4, 1.4], [1.01, 1.01], [2.01, 2.01]]) # one outlier + z1 = np.array([[1.01, 1.01], [2.01, 2.01]]).astype(dtype) # no outliers + z2 = np.array([[1.4, 1.4], [1.01, 1.01], [2.01, 2.01]]).astype(dtype) # one outlier correct_labels1 = np.array([1, 2]) correct_labels2 = np.array([-1, 1, 2]) outlier_proba = np.array([0, 0]) - weight_func = _weight_func - - for algorithm in ALGORITHMS: - for weights in ["uniform", "distance", weight_func]: - clf = neighbors.RadiusNeighborsClassifier( - radius=radius, weights=weights, algorithm=algorithm, outlier_label=-1 - ) - clf.fit(X, y) - assert_array_equal(correct_labels1, clf.predict(z1)) - assert_array_equal(correct_labels2, clf.predict(z2)) - assert_array_equal(outlier_proba, clf.predict_proba(z2)[0]) + clf = neighbors.RadiusNeighborsClassifier( + radius=radius, weights=weights, algorithm=algorithm, outlier_label=-1 + ) + clf.fit(X, y) + assert_array_equal(correct_labels1, clf.predict(z1)) + assert_array_equal(correct_labels2, clf.predict(z2)) + assert_array_equal(outlier_proba, clf.predict_proba(z2)[0]) # test outlier_labeling of using predict_proba() RNC = neighbors.RadiusNeighborsClassifier - X = np.array([[0], [1], [2], [3], [4], [5], [6], [7], [8], [9]]) + X = np.array([[0], [1], [2], [3], [4], [5], [6], [7], [8], [9]]).astype(dtype) y = np.array([0, 2, 2, 1, 1, 1, 3, 3, 3, 3]) # test outlier_label scalar verification @@ -1450,13 +1482,14 @@ def test_neighbors_badargs(): - set(["pyfunc", *BOOL_METRICS]) ), ) +@pytest.mark.parametrize("dtype", DTYPES) def test_neighbors_metrics( - metric, n_samples=20, n_features=3, n_query_pts=2, n_neighbors=5 + dtype, metric, n_samples=20, n_features=3, n_query_pts=2, n_neighbors=5 ): # Test computing the neighbors for various metrics algorithms = ["brute", "ball_tree", "kd_tree"] - X_train = rng.rand(n_samples, n_features) - X_test = rng.rand(n_query_pts, n_features) + X_train = rng.rand(n_samples, n_features).astype(dtype) + X_test = rng.rand(n_query_pts, n_features).astype(dtype) metric_params_list = _generate_test_params_for(metric, n_features) @@ -1496,7 +1529,12 @@ def test_neighbors_metrics( and algorithm == "brute" and sp_version >= parse_version("1.6.0") ): - ExceptionToAssert = FutureWarning + if dtype == np.float64: + # Warning from sklearn.metrics._dist_metrics.WMinkowskiDistance + ExceptionToAssert = FutureWarning + if dtype == np.float32: + # Warning from Scipy + ExceptionToAssert = DeprecationWarning with pytest.warns(ExceptionToAssert): results[algorithm] = neigh.kneighbors(X_test, return_distance=True) @@ -1521,12 +1559,13 @@ def test_neighbors_metrics( @pytest.mark.parametrize( "metric", sorted(set(neighbors.VALID_METRICS["brute"]) - set(["precomputed"])) ) +@pytest.mark.parametrize("dtype", DTYPES) def test_kneighbors_brute_backend( - metric, n_samples=2000, n_features=30, n_query_pts=100, n_neighbors=5 + dtype, metric, n_samples=2000, n_features=30, n_query_pts=100, n_neighbors=5 ): # Both backend for the 'brute' algorithm of kneighbors must give identical results. - X_train = rng.rand(n_samples, n_features) - X_test = rng.rand(n_query_pts, n_features) + X_train = rng.rand(n_samples, n_features).astype(dtype) + X_test = rng.rand(n_query_pts, n_features).astype(dtype) # Haversine distance only accepts 2D data if metric == "haversine": @@ -1539,7 +1578,12 @@ def test_kneighbors_brute_backend( # wminkoski is deprecated in SciPy 1.6.0 and removed in 1.8.0 ExceptionToAssert = None if metric == "wminkowski" and sp_version >= parse_version("1.6.0"): - ExceptionToAssert = FutureWarning + if dtype == np.float64: + # Warning from sklearn.metrics._dist_metrics.WMinkowskiDistance + ExceptionToAssert = FutureWarning + if dtype == np.float32: + # Warning from Scipy + ExceptionToAssert = DeprecationWarning for metric_params in metric_params_list: p = metric_params.pop("p", 2) @@ -1593,8 +1637,11 @@ def custom_metric(x1, x2): # TODO: Remove filterwarnings in 1.3 when wminkowski is removed @pytest.mark.filterwarnings("ignore:WMinkowskiDistance:FutureWarning:sklearn") @pytest.mark.parametrize("metric", neighbors.VALID_METRICS["brute"]) -def test_valid_brute_metric_for_auto_algorithm(metric, n_samples=20, n_features=12): - X = rng.rand(n_samples, n_features) +@pytest.mark.parametrize("dtype", DTYPES) +def test_valid_brute_metric_for_auto_algorithm( + dtype, metric, n_samples=20, n_features=12 +): + X = rng.rand(n_samples, n_features).astype(dtype) Xcsr = csr_matrix(X) metric_params_list = _generate_test_params_for(metric, n_features) From 7e04e7b9a5b8a9ae307219db67e7395ddb7eb3f8 Mon Sep 17 00:00:00 2001 From: Julien Jerphanion Date: Thu, 17 Mar 2022 18:53:10 +0100 Subject: [PATCH 2/5] TST Use global_dtype --- sklearn/neighbors/tests/test_neighbors.py | 172 +++++++++++----------- 1 file changed, 83 insertions(+), 89 deletions(-) diff --git a/sklearn/neighbors/tests/test_neighbors.py b/sklearn/neighbors/tests/test_neighbors.py index cca85c5ca9bd3..4b83815cb1b10 100644 --- a/sklearn/neighbors/tests/test_neighbors.py +++ b/sklearn/neighbors/tests/test_neighbors.py @@ -42,7 +42,6 @@ from sklearn.pipeline import make_pipeline from sklearn.utils._testing import ( assert_allclose, - assert_array_almost_equal, assert_array_equal, ) from sklearn.utils._testing import ignore_warnings @@ -72,7 +71,6 @@ set.intersection(*map(set, neighbors.VALID_METRICS.values())) ) P = (1, 2, 3, 4, np.inf) -DTYPES = (np.float64, np.float32) JOBLIB_BACKENDS = list(joblib.parallel.BACKENDS.keys()) # Filter deprecation warnings. @@ -143,15 +141,14 @@ def _weight_func(dist): ], ) @pytest.mark.parametrize("query_is_train", [False, True]) -@pytest.mark.parametrize("dtype", DTYPES) @pytest.mark.parametrize("metric", COMMON_VALID_METRICS) def test_unsupervised_kneighbors( + global_dtype, n_samples, n_features, n_query_pts, n_neighbors, query_is_train, - dtype, metric, ): # The different algorithms must return identical results @@ -160,10 +157,12 @@ def test_unsupervised_kneighbors( # Redefining the rng locally to use the same generated X local_rng = np.random.RandomState(0) - X = local_rng.rand(n_samples, n_features).astype(dtype) + X = local_rng.rand(n_samples, n_features).astype(global_dtype) query = ( - X if query_is_train else local_rng.rand(n_query_pts, n_features).astype(dtype) + X + if query_is_train + else local_rng.rand(n_query_pts, n_features).astype(global_dtype) ) results_nodist = [] @@ -221,7 +220,6 @@ def test_unsupervised_kneighbors( ) @pytest.mark.parametrize("metric", COMMON_VALID_METRICS) @pytest.mark.parametrize("n_neighbors, radius", [(1, 100), (50, 500), (100, 1000)]) -@pytest.mark.parametrize("dtype", DTYPES) @pytest.mark.parametrize( "NeighborsMixinSubclass", [ @@ -232,13 +230,13 @@ def test_unsupervised_kneighbors( ], ) def test_neigh_predictions_algorithm_agnosticity( + global_dtype, n_samples, n_features, n_query_pts, metric, n_neighbors, radius, - dtype, NeighborsMixinSubclass, ): # The different algorithms must return identical predictions results @@ -246,10 +244,10 @@ def test_neigh_predictions_algorithm_agnosticity( # Redefining the rng locally to use the same generated X local_rng = np.random.RandomState(0) - X = local_rng.rand(n_samples, n_features).astype(dtype) + X = local_rng.rand(n_samples, n_features).astype(global_dtype) y = local_rng.randint(3, size=n_samples) - query = local_rng.rand(n_query_pts, n_features).astype(dtype) + query = local_rng.rand(n_query_pts, n_features).astype(global_dtype) predict_results = [] @@ -289,11 +287,10 @@ def test_neigh_predictions_algorithm_agnosticity( neighbors.NearestNeighbors, ], ) -@pytest.mark.parametrize("dtype", DTYPES) -def test_unsupervised_inputs(KNeighborsMixinSubclass, dtype): +def test_unsupervised_inputs(global_dtype, KNeighborsMixinSubclass): # Test unsupervised inputs for neighbors estimators - X = rng.random_sample((10, 3)).astype(dtype) + X = rng.random_sample((10, 3)).astype(global_dtype) y = rng.randint(3, size=10) nbrs_fid = neighbors.NearestNeighbors(n_neighbors=1) nbrs_fid.fit(X) @@ -307,8 +304,8 @@ def test_unsupervised_inputs(KNeighborsMixinSubclass, dtype): dist2, ind2 = nbrs.kneighbors(X) - assert_array_almost_equal(dist1, dist2) - assert_array_almost_equal(ind1, ind2) + assert_allclose(dist1, dist2) + assert_allclose(ind1, ind2) def test_n_neighbors_datatype(): @@ -359,8 +356,8 @@ def check_precomputed(make_train_test, estimators): ) nbrs_D.fit(DXX) dist_D, ind_D = getattr(nbrs_D, method)(DYX) - assert_array_almost_equal(dist_X, dist_D) - assert_array_almost_equal(ind_X, ind_D) + assert_allclose(dist_X, dist_D) + assert_allclose(ind_X, ind_D) # Check auto works too nbrs_D = neighbors.NearestNeighbors( @@ -368,14 +365,14 @@ def check_precomputed(make_train_test, estimators): ) nbrs_D.fit(DXX) dist_D, ind_D = getattr(nbrs_D, method)(DYX) - assert_array_almost_equal(dist_X, dist_D) - assert_array_almost_equal(ind_X, ind_D) + assert_allclose(dist_X, dist_D) + assert_allclose(ind_X, ind_D) # Check X=None in prediction dist_X, ind_X = getattr(nbrs_X, method)(None) dist_D, ind_D = getattr(nbrs_D, method)(None) - assert_array_almost_equal(dist_X, dist_D) - assert_array_almost_equal(ind_X, ind_D) + assert_allclose(dist_X, dist_D) + assert_allclose(ind_X, ind_D) # Must raise a ValueError if the matrix is not of correct shape with pytest.raises(ValueError): @@ -388,7 +385,7 @@ def check_precomputed(make_train_test, estimators): pred_X = est.fit(X, target).predict(Y) est.metric = "precomputed" pred_D = est.fit(DXX, target).predict(DYX) - assert_array_almost_equal(pred_X, pred_D) + assert_allclose(pred_X, pred_D) def test_precomputed_dense(): @@ -527,16 +524,15 @@ def test_precomputed_cross_validation(): assert_array_equal(metric_score, precomp_score) -@pytest.mark.parametrize("dtype", DTYPES) def test_unsupervised_radius_neighbors( - dtype, n_samples=20, n_features=5, n_query_pts=2, radius=0.5, random_state=0 + global_dtype, n_samples=20, n_features=5, n_query_pts=2, radius=0.5, random_state=0 ): # Test unsupervised radius-based query rng = np.random.RandomState(random_state) - X = rng.rand(n_samples, n_features).astype(dtype) + X = rng.rand(n_samples, n_features).astype(global_dtype) - test = rng.rand(n_query_pts, n_features).astype(dtype) + test = rng.rand(n_query_pts, n_features).astype(global_dtype) for p in P: results = [] @@ -557,26 +553,23 @@ def test_unsupervised_radius_neighbors( i1[:] = i1[j] results.append((dist, ind)) - assert_array_almost_equal( - np.concatenate(list(ind)), np.concatenate(list(ind1)) - ) + assert_allclose(np.concatenate(list(ind)), np.concatenate(list(ind1))) for i in range(len(results) - 1): - assert_array_almost_equal( + assert_allclose( np.concatenate(list(results[i][0])), np.concatenate(list(results[i + 1][0])), ), - assert_array_almost_equal( + assert_allclose( np.concatenate(list(results[i][1])), np.concatenate(list(results[i + 1][1])), ) -@pytest.mark.parametrize("dtype", DTYPES) @pytest.mark.parametrize("algorithm", ALGORITHMS) @pytest.mark.parametrize("weights", WEIGHTS) def test_kneighbors_classifier( - dtype, + global_dtype, algorithm, weights, n_samples=40, @@ -587,7 +580,7 @@ def test_kneighbors_classifier( ): # Test k-neighbors classification rng = np.random.RandomState(random_state) - X = 2 * rng.rand(n_samples, n_features).astype(dtype) - 1 + X = 2 * rng.rand(n_samples, n_features).astype(global_dtype) - 1 y = ((X**2).sum(axis=1) < 0.5).astype(int) y_str = y.astype(str) @@ -604,13 +597,17 @@ def test_kneighbors_classifier( assert_array_equal(y_pred, y_str[:n_test_pts]) -@pytest.mark.parametrize("dtype", DTYPES) def test_kneighbors_classifier_float_labels( - dtype, n_samples=40, n_features=5, n_test_pts=10, n_neighbors=5, random_state=0 + global_dtype, + n_samples=40, + n_features=5, + n_test_pts=10, + n_neighbors=5, + random_state=0, ): # Test k-neighbors classification rng = np.random.RandomState(random_state) - X = 2 * rng.rand(n_samples, n_features).astype(dtype) - 1 + X = 2 * rng.rand(n_samples, n_features).astype(global_dtype) - 1 y = ((X**2).sum(axis=1) < 0.5).astype(int) knn = neighbors.KNeighborsClassifier(n_neighbors=n_neighbors) @@ -620,12 +617,11 @@ def test_kneighbors_classifier_float_labels( assert_array_equal(y_pred, y[:n_test_pts]) -@pytest.mark.parametrize("dtype", DTYPES) -def test_kneighbors_classifier_predict_proba(dtype): +def test_kneighbors_classifier_predict_proba(global_dtype): # Test KNeighborsClassifier.predict_proba() method X = np.array( [[0, 2, 0], [0, 2, 1], [2, 0, 0], [2, 2, 0], [0, 0, 2], [0, 0, 1]] - ).astype(dtype) + ).astype(global_dtype) y = np.array([4, 4, 5, 5, 1, 1]) cls = neighbors.KNeighborsClassifier(n_neighbors=3, p=1) # cityblock dist cls.fit(X, y) @@ -650,14 +646,13 @@ def test_kneighbors_classifier_predict_proba(dtype): cls.fit(X, y) y_prob = cls.predict_proba(np.array([[0, 2, 0], [2, 2, 2]])) real_prob = np.array([[0, 1, 0], [0, 0.4, 0.6]]) - assert_array_almost_equal(real_prob, y_prob) + assert_allclose(real_prob, y_prob) -@pytest.mark.parametrize("dtype", DTYPES) @pytest.mark.parametrize("algorithm", ALGORITHMS) @pytest.mark.parametrize("weights", WEIGHTS) def test_radius_neighbors_classifier( - dtype, + global_dtype, algorithm, weights, n_samples=40, @@ -668,7 +663,7 @@ def test_radius_neighbors_classifier( ): # Test radius-based classification rng = np.random.RandomState(random_state) - X = 2 * rng.rand(n_samples, n_features).astype(dtype) - 1 + X = 2 * rng.rand(n_samples, n_features).astype(global_dtype) - 1 y = ((X**2).sum(axis=1) < 0.5).astype(int) y_str = y.astype(str) @@ -699,22 +694,21 @@ def test_radius_neighbors_classifier_kwargs_is_deprecated(): neighbors.RadiusNeighborsClassifier(**extra_kwargs) -@pytest.mark.parametrize("dtype", DTYPES) @pytest.mark.parametrize("algorithm", ALGORITHMS) @pytest.mark.parametrize("weights", WEIGHTS) @pytest.mark.parametrize("outlier_label", [0, -1, None]) def test_radius_neighbors_classifier_when_no_neighbors( - dtype, algorithm, weights, outlier_label + global_dtype, algorithm, weights, outlier_label ): # Test radius-based classifier when no neighbors found. # In this case it should rise an informative exception - X = np.array([[1.0, 1.0], [2.0, 2.0]]).astype(dtype) + X = np.array([[1.0, 1.0], [2.0, 2.0]]).astype(global_dtype) y = np.array([1, 2]) radius = 0.1 - z1 = np.array([[1.01, 1.01], [2.01, 2.01]]).astype(dtype) # no outliers - z2 = np.array([[1.01, 1.01], [1.4, 1.4]]).astype(dtype) # one outlier + z1 = np.array([[1.01, 1.01], [2.01, 2.01]]).astype(global_dtype) # no outliers + z2 = np.array([[1.01, 1.01], [1.4, 1.4]]).astype(global_dtype) # one outlier rnc = neighbors.RadiusNeighborsClassifier clf = rnc( @@ -730,21 +724,22 @@ def test_radius_neighbors_classifier_when_no_neighbors( clf.predict(z2) -@pytest.mark.parametrize("dtype", DTYPES) @pytest.mark.parametrize("algorithm", ALGORITHMS) @pytest.mark.parametrize("weights", WEIGHTS) -def test_radius_neighbors_classifier_outlier_labeling(dtype, algorithm, weights): +def test_radius_neighbors_classifier_outlier_labeling(global_dtype, algorithm, weights): # Test radius-based classifier when no neighbors found and outliers # are labeled. X = np.array( [[1.0, 1.0], [2.0, 2.0], [0.99, 0.99], [0.98, 0.98], [2.01, 2.01]] - ).astype(dtype) + ).astype(global_dtype) y = np.array([1, 2, 1, 1, 2]) radius = 0.1 - z1 = np.array([[1.01, 1.01], [2.01, 2.01]]).astype(dtype) # no outliers - z2 = np.array([[1.4, 1.4], [1.01, 1.01], [2.01, 2.01]]).astype(dtype) # one outlier + z1 = np.array([[1.01, 1.01], [2.01, 2.01]]).astype(global_dtype) # no outliers + z2 = np.array([[1.4, 1.4], [1.01, 1.01], [2.01, 2.01]]).astype( + global_dtype + ) # one outlier correct_labels1 = np.array([1, 2]) correct_labels2 = np.array([-1, 1, 2]) outlier_proba = np.array([0, 0]) @@ -759,7 +754,9 @@ def test_radius_neighbors_classifier_outlier_labeling(dtype, algorithm, weights) # test outlier_labeling of using predict_proba() RNC = neighbors.RadiusNeighborsClassifier - X = np.array([[0], [1], [2], [3], [4], [5], [6], [7], [8], [9]]).astype(dtype) + X = np.array([[0], [1], [2], [3], [4], [5], [6], [7], [8], [9]]).astype( + global_dtype + ) y = np.array([0, 2, 2, 1, 1, 1, 3, 3, 3, 3]) # test outlier_label scalar verification @@ -896,7 +893,7 @@ def test_neighbors_regressors_zero_distance(): radius=radius, weights=weights, algorithm=algorithm ) rnn.fit(X, y) - assert_array_almost_equal(rnn_correct_labels, rnn.predict(z)) + assert_allclose(rnn_correct_labels, rnn.predict(z)) for weights, corr_labels in zip( ["uniform", "distance"], [knn_correct_unif, knn_correct_dist] @@ -905,7 +902,7 @@ def test_neighbors_regressors_zero_distance(): n_neighbors=2, weights=weights, algorithm=algorithm ) knn.fit(X, y) - assert_array_almost_equal(corr_labels, knn.predict(z)) + assert_allclose(corr_labels, knn.predict(z)) def test_radius_neighbors_boundary_handling(): @@ -1037,7 +1034,7 @@ def test_RadiusNeighborsClassifier_multioutput(): y_pred_mo = rnn_mo.predict(X_test) assert y_pred_mo.shape == y_test.shape - assert_array_almost_equal(y_pred_mo, y_pred_so) + assert_allclose(y_pred_mo, y_pred_so) def test_kneighbors_classifier_sparse( @@ -1094,14 +1091,14 @@ def test_KNeighborsClassifier_multioutput(): y_pred_mo = knn_mo.predict(X_test) assert y_pred_mo.shape == y_test.shape - assert_array_almost_equal(y_pred_mo, y_pred_so) + assert_allclose(y_pred_mo, y_pred_so) # Check proba y_pred_proba_mo = knn_mo.predict_proba(X_test) assert len(y_pred_proba_mo) == n_output for proba_mo, proba_so in zip(y_pred_proba_mo, y_pred_proba_so): - assert_array_almost_equal(proba_mo, proba_so) + assert_allclose(proba_mo, proba_so) def test_kneighbors_regressor( @@ -1150,7 +1147,7 @@ def test_KNeighborsRegressor_multioutput_uniform_weight(): assert y_pred.shape == y_test.shape assert y_pred_idx.shape == y_test.shape - assert_array_almost_equal(y_pred, y_pred_idx) + assert_allclose(y_pred, y_pred_idx) def test_kneighbors_regressor_multioutput( @@ -1242,7 +1239,7 @@ def test_RadiusNeighborsRegressor_multioutput_with_uniform_weight(): assert y_pred_idx.shape == y_test.shape assert y_pred.shape == y_test.shape - assert_array_almost_equal(y_pred, y_pred_idx) + assert_allclose(y_pred, y_pred_idx) def test_RadiusNeighborsRegressor_multioutput( @@ -1349,7 +1346,7 @@ def test_kneighbors_graph(): assert_array_equal(A.toarray(), np.eye(A.shape[0])) A = neighbors.kneighbors_graph(X, 1, mode="distance") - assert_array_almost_equal( + assert_allclose( A.toarray(), [[0.00, 1.01, 0.0], [1.01, 0.0, 0.0], [0.00, 1.40716026, 0.0]] ) @@ -1358,7 +1355,7 @@ def test_kneighbors_graph(): assert_array_equal(A.toarray(), [[1.0, 1.0, 0.0], [1.0, 1.0, 0.0], [0.0, 1.0, 1.0]]) A = neighbors.kneighbors_graph(X, 2, mode="distance") - assert_array_almost_equal( + assert_allclose( A.toarray(), [ [0.0, 1.01, 2.23606798], @@ -1369,7 +1366,7 @@ def test_kneighbors_graph(): # n_neighbors = 3 A = neighbors.kneighbors_graph(X, 3, mode="connectivity", include_self=True) - assert_array_almost_equal(A.toarray(), [[1, 1, 1], [1, 1, 1], [1, 1, 1]]) + assert_allclose(A.toarray(), [[1, 1, 1], [1, 1, 1], [1, 1, 1]]) @pytest.mark.parametrize("n_neighbors", [1, 2, 3]) @@ -1381,7 +1378,7 @@ def test_kneighbors_graph_sparse(n_neighbors, mode, seed=36): X = rng.randn(10, 10) Xcsr = csr_matrix(X) - assert_array_almost_equal( + assert_allclose( neighbors.kneighbors_graph(X, n_neighbors, mode=mode).toarray(), neighbors.kneighbors_graph(Xcsr, n_neighbors, mode=mode).toarray(), ) @@ -1395,7 +1392,7 @@ def test_radius_neighbors_graph(): assert_array_equal(A.toarray(), [[1.0, 1.0, 0.0], [1.0, 1.0, 1.0], [0.0, 1.0, 1.0]]) A = neighbors.radius_neighbors_graph(X, 1.5, mode="distance") - assert_array_almost_equal( + assert_allclose( A.toarray(), [[0.0, 1.01, 0.0], [1.01, 0.0, 1.40716026], [0.0, 1.40716026, 0.0]] ) @@ -1409,7 +1406,7 @@ def test_radius_neighbors_graph_sparse(n_neighbors, mode, seed=36): X = rng.randn(10, 10) Xcsr = csr_matrix(X) - assert_array_almost_equal( + assert_allclose( neighbors.radius_neighbors_graph(X, n_neighbors, mode=mode).toarray(), neighbors.radius_neighbors_graph(Xcsr, n_neighbors, mode=mode).toarray(), ) @@ -1488,14 +1485,13 @@ def test_neighbors_badargs(): - set(["pyfunc", *BOOL_METRICS]) ), ) -@pytest.mark.parametrize("dtype", DTYPES) def test_neighbors_metrics( - dtype, metric, n_samples=20, n_features=3, n_query_pts=2, n_neighbors=5 + global_dtype, metric, n_samples=20, n_features=3, n_query_pts=2, n_neighbors=5 ): # Test computing the neighbors for various metrics algorithms = ["brute", "ball_tree", "kd_tree"] - X_train = rng.rand(n_samples, n_features).astype(dtype) - X_test = rng.rand(n_query_pts, n_features).astype(dtype) + X_train = rng.rand(n_samples, n_features).astype(global_dtype) + X_test = rng.rand(n_query_pts, n_features).astype(global_dtype) metric_params_list = _generate_test_params_for(metric, n_features) @@ -1535,10 +1531,10 @@ def test_neighbors_metrics( and algorithm == "brute" and sp_version >= parse_version("1.6.0") ): - if dtype == np.float64: + if global_dtype == np.float64: # Warning from sklearn.metrics._dist_metrics.WMinkowskiDistance ExceptionToAssert = FutureWarning - if dtype == np.float32: + if global_dtype == np.float32: # Warning from Scipy ExceptionToAssert = DeprecationWarning @@ -1565,13 +1561,12 @@ def test_neighbors_metrics( @pytest.mark.parametrize( "metric", sorted(set(neighbors.VALID_METRICS["brute"]) - set(["precomputed"])) ) -@pytest.mark.parametrize("dtype", DTYPES) def test_kneighbors_brute_backend( - dtype, metric, n_samples=2000, n_features=30, n_query_pts=100, n_neighbors=5 + global_dtype, metric, n_samples=2000, n_features=30, n_query_pts=100, n_neighbors=5 ): # Both backend for the 'brute' algorithm of kneighbors must give identical results. - X_train = rng.rand(n_samples, n_features).astype(dtype) - X_test = rng.rand(n_query_pts, n_features).astype(dtype) + X_train = rng.rand(n_samples, n_features).astype(global_dtype) + X_test = rng.rand(n_query_pts, n_features).astype(global_dtype) # Haversine distance only accepts 2D data if metric == "haversine": @@ -1584,10 +1579,10 @@ def test_kneighbors_brute_backend( # wminkoski is deprecated in SciPy 1.6.0 and removed in 1.8.0 ExceptionToAssert = None if metric == "wminkowski" and sp_version >= parse_version("1.6.0"): - if dtype == np.float64: + if global_dtype == np.float64: # Warning from sklearn.metrics._dist_metrics.WMinkowskiDistance ExceptionToAssert = FutureWarning - if dtype == np.float32: + if global_dtype == np.float32: # Warning from Scipy ExceptionToAssert = DeprecationWarning @@ -1637,17 +1632,16 @@ def custom_metric(x1, x2): dist1, ind1 = nbrs1.kneighbors(X) dist2, ind2 = nbrs2.kneighbors(X) - assert_array_almost_equal(dist1, dist2) + assert_allclose(dist1, dist2) # TODO: Remove filterwarnings in 1.3 when wminkowski is removed @pytest.mark.filterwarnings("ignore:WMinkowskiDistance:FutureWarning:sklearn") @pytest.mark.parametrize("metric", neighbors.VALID_METRICS["brute"]) -@pytest.mark.parametrize("dtype", DTYPES) def test_valid_brute_metric_for_auto_algorithm( - dtype, metric, n_samples=20, n_features=12 + global_dtype, metric, n_samples=20, n_features=12 ): - X = rng.rand(n_samples, n_features).astype(dtype) + X = rng.rand(n_samples, n_features).astype(global_dtype) Xcsr = csr_matrix(X) metric_params_list = _generate_test_params_for(metric, n_features) @@ -1892,9 +1886,9 @@ def test_same_knn_parallel(algorithm): graph_parallel = clf.kneighbors_graph(X_test, mode="distance").toarray() assert_array_equal(y, y_parallel) - assert_array_almost_equal(dist, dist_parallel) + assert_allclose(dist, dist_parallel) assert_array_equal(ind, ind_parallel) - assert_array_almost_equal(graph, graph_parallel) + assert_allclose(graph, graph_parallel) @pytest.mark.parametrize("algorithm", ALGORITHMS) @@ -1918,9 +1912,9 @@ def test_same_radius_neighbors_parallel(algorithm): assert_array_equal(y, y_parallel) for i in range(len(dist)): - assert_array_almost_equal(dist[i], dist_parallel[i]) + assert_allclose(dist[i], dist_parallel[i]) assert_array_equal(ind[i], ind_parallel[i]) - assert_array_almost_equal(graph, graph_parallel) + assert_allclose(graph, graph_parallel) @pytest.mark.parametrize("backend", JOBLIB_BACKENDS) @@ -2054,7 +2048,7 @@ def test_pipeline_with_nearest_neighbors_transformer(): y_pred_chain = reg_chain.fit(X, y).predict(X2) y_pred_compact = reg_compact.fit(X, y).predict(X2) - assert_array_almost_equal(y_pred_chain, y_pred_compact) + assert_allclose(y_pred_chain, y_pred_compact) @pytest.mark.parametrize( From 35ff90893cf86e8ccc69242c4bef6e804ed39552 Mon Sep 17 00:00:00 2001 From: Julien Jerphanion Date: Tue, 29 Mar 2022 13:03:35 +0200 Subject: [PATCH 3/5] Apply review comments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Jérémie du Boisberranger --- sklearn/neighbors/tests/test_neighbors.py | 25 ++++++++--------------- 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/sklearn/neighbors/tests/test_neighbors.py b/sklearn/neighbors/tests/test_neighbors.py index 4b83815cb1b10..cfe67a4a799d0 100644 --- a/sklearn/neighbors/tests/test_neighbors.py +++ b/sklearn/neighbors/tests/test_neighbors.py @@ -274,8 +274,6 @@ def test_neigh_predictions_algorithm_agnosticity( f"The '{algorithm}' and '{next_algorithm}' " "algorithms return different predictions." ), - rtol=1e-7, - atol=1e-7, ) @@ -305,7 +303,7 @@ def test_unsupervised_inputs(global_dtype, KNeighborsMixinSubclass): dist2, ind2 = nbrs.kneighbors(X) assert_allclose(dist1, dist2) - assert_allclose(ind1, ind2) + assert_array_equal(ind1, ind2) def test_n_neighbors_datatype(): @@ -357,7 +355,7 @@ def check_precomputed(make_train_test, estimators): nbrs_D.fit(DXX) dist_D, ind_D = getattr(nbrs_D, method)(DYX) assert_allclose(dist_X, dist_D) - assert_allclose(ind_X, ind_D) + assert_array_equal(ind_X, ind_D) # Check auto works too nbrs_D = neighbors.NearestNeighbors( @@ -366,13 +364,13 @@ def check_precomputed(make_train_test, estimators): nbrs_D.fit(DXX) dist_D, ind_D = getattr(nbrs_D, method)(DYX) assert_allclose(dist_X, dist_D) - assert_allclose(ind_X, ind_D) + assert_array_equal(ind_X, ind_D) # Check X=None in prediction dist_X, ind_X = getattr(nbrs_X, method)(None) dist_D, ind_D = getattr(nbrs_D, method)(None) assert_allclose(dist_X, dist_D) - assert_allclose(ind_X, ind_D) + assert_array_equal(ind_X, ind_D) # Must raise a ValueError if the matrix is not of correct shape with pytest.raises(ValueError): @@ -750,7 +748,7 @@ def test_radius_neighbors_classifier_outlier_labeling(global_dtype, algorithm, w clf.fit(X, y) assert_array_equal(correct_labels1, clf.predict(z1)) assert_array_equal(correct_labels2, clf.predict(z2)) - assert_array_equal(outlier_proba, clf.predict_proba(z2)[0]) + assert_allclose(outlier_proba, clf.predict_proba(z2)[0]) # test outlier_labeling of using predict_proba() RNC = neighbors.RadiusNeighborsClassifier @@ -1034,7 +1032,7 @@ def test_RadiusNeighborsClassifier_multioutput(): y_pred_mo = rnn_mo.predict(X_test) assert y_pred_mo.shape == y_test.shape - assert_allclose(y_pred_mo, y_pred_so) + assert_array_equal(y_pred_mo, y_pred_so) def test_kneighbors_classifier_sparse( @@ -1091,14 +1089,14 @@ def test_KNeighborsClassifier_multioutput(): y_pred_mo = knn_mo.predict(X_test) assert y_pred_mo.shape == y_test.shape - assert_allclose(y_pred_mo, y_pred_so) + assert_array_equal(y_pred_mo, y_pred_so) # Check proba y_pred_proba_mo = knn_mo.predict_proba(X_test) assert len(y_pred_proba_mo) == n_output for proba_mo, proba_so in zip(y_pred_proba_mo, y_pred_proba_so): - assert_allclose(proba_mo, proba_so) + assert_array_equal(proba_mo, proba_so) def test_kneighbors_regressor( @@ -1531,12 +1529,7 @@ def test_neighbors_metrics( and algorithm == "brute" and sp_version >= parse_version("1.6.0") ): - if global_dtype == np.float64: - # Warning from sklearn.metrics._dist_metrics.WMinkowskiDistance - ExceptionToAssert = FutureWarning - if global_dtype == np.float32: - # Warning from Scipy - ExceptionToAssert = DeprecationWarning + ExceptionToAssert = FutureWarning with pytest.warns(ExceptionToAssert): results[algorithm] = neigh.kneighbors(X_test, return_distance=True) From 382a47294633bedd6114f8c80649914bb009ffb2 Mon Sep 17 00:00:00 2001 From: Julien Jerphanion Date: Wed, 30 Mar 2022 10:15:54 +0200 Subject: [PATCH 4/5] Don't copy on same dtype --- sklearn/neighbors/tests/test_neighbors.py | 52 +++++++++++++---------- 1 file changed, 29 insertions(+), 23 deletions(-) diff --git a/sklearn/neighbors/tests/test_neighbors.py b/sklearn/neighbors/tests/test_neighbors.py index cfe67a4a799d0..d662075c62ea0 100644 --- a/sklearn/neighbors/tests/test_neighbors.py +++ b/sklearn/neighbors/tests/test_neighbors.py @@ -157,12 +157,12 @@ def test_unsupervised_kneighbors( # Redefining the rng locally to use the same generated X local_rng = np.random.RandomState(0) - X = local_rng.rand(n_samples, n_features).astype(global_dtype) + X = local_rng.rand(n_samples, n_features).astype(global_dtype, copy=False) query = ( X if query_is_train - else local_rng.rand(n_query_pts, n_features).astype(global_dtype) + else local_rng.rand(n_query_pts, n_features).astype(global_dtype, copy=False) ) results_nodist = [] @@ -244,10 +244,10 @@ def test_neigh_predictions_algorithm_agnosticity( # Redefining the rng locally to use the same generated X local_rng = np.random.RandomState(0) - X = local_rng.rand(n_samples, n_features).astype(global_dtype) + X = local_rng.rand(n_samples, n_features).astype(global_dtype, copy=False) y = local_rng.randint(3, size=n_samples) - query = local_rng.rand(n_query_pts, n_features).astype(global_dtype) + query = local_rng.rand(n_query_pts, n_features).astype(global_dtype, copy=False) predict_results = [] @@ -288,7 +288,7 @@ def test_neigh_predictions_algorithm_agnosticity( def test_unsupervised_inputs(global_dtype, KNeighborsMixinSubclass): # Test unsupervised inputs for neighbors estimators - X = rng.random_sample((10, 3)).astype(global_dtype) + X = rng.random_sample((10, 3)).astype(global_dtype, copy=False) y = rng.randint(3, size=10) nbrs_fid = neighbors.NearestNeighbors(n_neighbors=1) nbrs_fid.fit(X) @@ -528,9 +528,9 @@ def test_unsupervised_radius_neighbors( # Test unsupervised radius-based query rng = np.random.RandomState(random_state) - X = rng.rand(n_samples, n_features).astype(global_dtype) + X = rng.rand(n_samples, n_features).astype(global_dtype, copy=False) - test = rng.rand(n_query_pts, n_features).astype(global_dtype) + test = rng.rand(n_query_pts, n_features).astype(global_dtype, copy=False) for p in P: results = [] @@ -578,7 +578,7 @@ def test_kneighbors_classifier( ): # Test k-neighbors classification rng = np.random.RandomState(random_state) - X = 2 * rng.rand(n_samples, n_features).astype(global_dtype) - 1 + X = 2 * rng.rand(n_samples, n_features).astype(global_dtype, copy=False) - 1 y = ((X**2).sum(axis=1) < 0.5).astype(int) y_str = y.astype(str) @@ -605,7 +605,7 @@ def test_kneighbors_classifier_float_labels( ): # Test k-neighbors classification rng = np.random.RandomState(random_state) - X = 2 * rng.rand(n_samples, n_features).astype(global_dtype) - 1 + X = 2 * rng.rand(n_samples, n_features).astype(global_dtype, copy=False) - 1 y = ((X**2).sum(axis=1) < 0.5).astype(int) knn = neighbors.KNeighborsClassifier(n_neighbors=n_neighbors) @@ -619,7 +619,7 @@ def test_kneighbors_classifier_predict_proba(global_dtype): # Test KNeighborsClassifier.predict_proba() method X = np.array( [[0, 2, 0], [0, 2, 1], [2, 0, 0], [2, 2, 0], [0, 0, 2], [0, 0, 1]] - ).astype(global_dtype) + ).astype(global_dtype, copy=False) y = np.array([4, 4, 5, 5, 1, 1]) cls = neighbors.KNeighborsClassifier(n_neighbors=3, p=1) # cityblock dist cls.fit(X, y) @@ -661,7 +661,7 @@ def test_radius_neighbors_classifier( ): # Test radius-based classification rng = np.random.RandomState(random_state) - X = 2 * rng.rand(n_samples, n_features).astype(global_dtype) - 1 + X = 2 * rng.rand(n_samples, n_features).astype(global_dtype, copy=False) - 1 y = ((X**2).sum(axis=1) < 0.5).astype(int) y_str = y.astype(str) @@ -701,12 +701,16 @@ def test_radius_neighbors_classifier_when_no_neighbors( # Test radius-based classifier when no neighbors found. # In this case it should rise an informative exception - X = np.array([[1.0, 1.0], [2.0, 2.0]]).astype(global_dtype) + X = np.array([[1.0, 1.0], [2.0, 2.0]]).astype(global_dtype, copy=False) y = np.array([1, 2]) radius = 0.1 - z1 = np.array([[1.01, 1.01], [2.01, 2.01]]).astype(global_dtype) # no outliers - z2 = np.array([[1.01, 1.01], [1.4, 1.4]]).astype(global_dtype) # one outlier + z1 = np.array([[1.01, 1.01], [2.01, 2.01]]).astype( + global_dtype, copy=False + ) # no outliers + z2 = np.array([[1.01, 1.01], [1.4, 1.4]]).astype( + global_dtype, copy=False + ) # one outlier rnc = neighbors.RadiusNeighborsClassifier clf = rnc( @@ -730,13 +734,15 @@ def test_radius_neighbors_classifier_outlier_labeling(global_dtype, algorithm, w X = np.array( [[1.0, 1.0], [2.0, 2.0], [0.99, 0.99], [0.98, 0.98], [2.01, 2.01]] - ).astype(global_dtype) + ).astype(global_dtype, copy=False) y = np.array([1, 2, 1, 1, 2]) radius = 0.1 - z1 = np.array([[1.01, 1.01], [2.01, 2.01]]).astype(global_dtype) # no outliers + z1 = np.array([[1.01, 1.01], [2.01, 2.01]]).astype( + global_dtype, copy=False + ) # no outliers z2 = np.array([[1.4, 1.4], [1.01, 1.01], [2.01, 2.01]]).astype( - global_dtype + global_dtype, copy=False ) # one outlier correct_labels1 = np.array([1, 2]) correct_labels2 = np.array([-1, 1, 2]) @@ -753,7 +759,7 @@ def test_radius_neighbors_classifier_outlier_labeling(global_dtype, algorithm, w # test outlier_labeling of using predict_proba() RNC = neighbors.RadiusNeighborsClassifier X = np.array([[0], [1], [2], [3], [4], [5], [6], [7], [8], [9]]).astype( - global_dtype + global_dtype, copy=False ) y = np.array([0, 2, 2, 1, 1, 1, 3, 3, 3, 3]) @@ -1488,8 +1494,8 @@ def test_neighbors_metrics( ): # Test computing the neighbors for various metrics algorithms = ["brute", "ball_tree", "kd_tree"] - X_train = rng.rand(n_samples, n_features).astype(global_dtype) - X_test = rng.rand(n_query_pts, n_features).astype(global_dtype) + X_train = rng.rand(n_samples, n_features).astype(global_dtype, copy=False) + X_test = rng.rand(n_query_pts, n_features).astype(global_dtype, copy=False) metric_params_list = _generate_test_params_for(metric, n_features) @@ -1558,8 +1564,8 @@ def test_kneighbors_brute_backend( global_dtype, metric, n_samples=2000, n_features=30, n_query_pts=100, n_neighbors=5 ): # Both backend for the 'brute' algorithm of kneighbors must give identical results. - X_train = rng.rand(n_samples, n_features).astype(global_dtype) - X_test = rng.rand(n_query_pts, n_features).astype(global_dtype) + X_train = rng.rand(n_samples, n_features).astype(global_dtype, copy=False) + X_test = rng.rand(n_query_pts, n_features).astype(global_dtype, copy=False) # Haversine distance only accepts 2D data if metric == "haversine": @@ -1634,7 +1640,7 @@ def custom_metric(x1, x2): def test_valid_brute_metric_for_auto_algorithm( global_dtype, metric, n_samples=20, n_features=12 ): - X = rng.rand(n_samples, n_features).astype(global_dtype) + X = rng.rand(n_samples, n_features).astype(global_dtype, copy=False) Xcsr = csr_matrix(X) metric_params_list = _generate_test_params_for(metric, n_features) From 0a90e35dd786b43009a573c4d6f59686c57710f3 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Wed, 30 Mar 2022 11:04:47 +0200 Subject: [PATCH 5/5] Pass dtype=global_dtype directly to np.array([...]) + fix warnings in tests --- sklearn/neighbors/tests/test_neighbors.py | 42 +++++++++++------------ 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/sklearn/neighbors/tests/test_neighbors.py b/sklearn/neighbors/tests/test_neighbors.py index d662075c62ea0..2bbfc9dbfa07b 100644 --- a/sklearn/neighbors/tests/test_neighbors.py +++ b/sklearn/neighbors/tests/test_neighbors.py @@ -662,7 +662,7 @@ def test_radius_neighbors_classifier( # Test radius-based classification rng = np.random.RandomState(random_state) X = 2 * rng.rand(n_samples, n_features).astype(global_dtype, copy=False) - 1 - y = ((X**2).sum(axis=1) < 0.5).astype(int) + y = ((X**2).sum(axis=1) < radius).astype(int) y_str = y.astype(str) neigh = neighbors.RadiusNeighborsClassifier( @@ -701,16 +701,15 @@ def test_radius_neighbors_classifier_when_no_neighbors( # Test radius-based classifier when no neighbors found. # In this case it should rise an informative exception - X = np.array([[1.0, 1.0], [2.0, 2.0]]).astype(global_dtype, copy=False) + X = np.array([[1.0, 1.0], [2.0, 2.0]], dtype=global_dtype) y = np.array([1, 2]) radius = 0.1 - z1 = np.array([[1.01, 1.01], [2.01, 2.01]]).astype( - global_dtype, copy=False - ) # no outliers - z2 = np.array([[1.01, 1.01], [1.4, 1.4]]).astype( - global_dtype, copy=False - ) # one outlier + # no outliers + z1 = np.array([[1.01, 1.01], [2.01, 2.01]], dtype=global_dtype) + + # one outlier + z2 = np.array([[1.01, 1.01], [1.4, 1.4]], dtype=global_dtype) rnc = neighbors.RadiusNeighborsClassifier clf = rnc( @@ -733,17 +732,18 @@ def test_radius_neighbors_classifier_outlier_labeling(global_dtype, algorithm, w # are labeled. X = np.array( - [[1.0, 1.0], [2.0, 2.0], [0.99, 0.99], [0.98, 0.98], [2.01, 2.01]] - ).astype(global_dtype, copy=False) + [[1.0, 1.0], [2.0, 2.0], [0.99, 0.99], [0.98, 0.98], [2.01, 2.01]], + dtype=global_dtype, + ) y = np.array([1, 2, 1, 1, 2]) radius = 0.1 - z1 = np.array([[1.01, 1.01], [2.01, 2.01]]).astype( - global_dtype, copy=False - ) # no outliers - z2 = np.array([[1.4, 1.4], [1.01, 1.01], [2.01, 2.01]]).astype( - global_dtype, copy=False - ) # one outlier + # no outliers + z1 = np.array([[1.01, 1.01], [2.01, 2.01]], dtype=global_dtype) + + # one outlier + z2 = np.array([[1.4, 1.4], [1.01, 1.01], [2.01, 2.01]], dtype=global_dtype) + correct_labels1 = np.array([1, 2]) correct_labels2 = np.array([-1, 1, 2]) outlier_proba = np.array([0, 0]) @@ -753,14 +753,14 @@ def test_radius_neighbors_classifier_outlier_labeling(global_dtype, algorithm, w ) clf.fit(X, y) assert_array_equal(correct_labels1, clf.predict(z1)) - assert_array_equal(correct_labels2, clf.predict(z2)) - assert_allclose(outlier_proba, clf.predict_proba(z2)[0]) + with pytest.warns(UserWarning, match="Outlier label -1 is not in training classes"): + assert_array_equal(correct_labels2, clf.predict(z2)) + with pytest.warns(UserWarning, match="Outlier label -1 is not in training classes"): + assert_allclose(outlier_proba, clf.predict_proba(z2)[0]) # test outlier_labeling of using predict_proba() RNC = neighbors.RadiusNeighborsClassifier - X = np.array([[0], [1], [2], [3], [4], [5], [6], [7], [8], [9]]).astype( - global_dtype, copy=False - ) + X = np.array([[0], [1], [2], [3], [4], [5], [6], [7], [8], [9]], dtype=global_dtype) y = np.array([0, 2, 2, 1, 1, 1, 3, 3, 3, 3]) # test outlier_label scalar verification