diff --git a/doc/whats_new/upcoming_changes/sklearn.metrics/30537.api.rst b/doc/whats_new/upcoming_changes/sklearn.metrics/30537.api.rst new file mode 100644 index 0000000000000..ae37a8d285a31 --- /dev/null +++ b/doc/whats_new/upcoming_changes/sklearn.metrics/30537.api.rst @@ -0,0 +1,5 @@ +- |API| :func:`~metrics.pairwise.paired_distances`, + :func:`~metrics.pairwise.paired_euclidean_distances`, + :func:`~metrics.pairwise.paired_manhattan_distances` and + :func:`~metrics.pairwise.paired_cosine_distances` are now deprecated and + will be removed in 1.9. By :user:`Success Moses `. diff --git a/sklearn/cluster/_agglomerative.py b/sklearn/cluster/_agglomerative.py index 2fa7253e665b8..770362b773b08 100644 --- a/sklearn/cluster/_agglomerative.py +++ b/sklearn/cluster/_agglomerative.py @@ -23,7 +23,7 @@ ) from ..metrics import DistanceMetric from ..metrics._dist_metrics import METRIC_MAPPING64 -from ..metrics.pairwise import _VALID_METRICS, paired_distances +from ..metrics.pairwise import _VALID_METRICS, _paired_distances from ..utils import check_array from ..utils._fast_dict import IntFloatDict from ..utils._param_validation import ( @@ -605,7 +605,7 @@ def linkage_tree( else: # FIXME We compute all the distances, while we could have only computed # the "interesting" distances - distances = paired_distances( + distances = _paired_distances( X[connectivity.row], X[connectivity.col], metric=affinity ) connectivity.data = distances diff --git a/sklearn/metrics/pairwise.py b/sklearn/metrics/pairwise.py index 3e1fe1d68420f..5fa90a7920562 100644 --- a/sklearn/metrics/pairwise.py +++ b/sklearn/metrics/pairwise.py @@ -38,7 +38,7 @@ StrOptions, validate_params, ) -from ..utils.deprecation import _deprecate_force_all_finite +from ..utils.deprecation import _deprecate_force_all_finite, deprecated from ..utils.extmath import row_norms, safe_sparse_dot from ..utils.fixes import parse_version, sp_base_version from ..utils.parallel import Parallel, delayed @@ -1187,6 +1187,13 @@ def cosine_distances(X, Y=None): # Paired distances + + +# TODO(1.9): Remove in 1.9 +@deprecated( + "The public function `sklearn.pairwise.paired_euclidean_distances` has been " + "deprecated in 1.7 and will be removed in 1.9." +) @validate_params( {"X": ["array-like", "sparse matrix"], "Y": ["array-like", "sparse matrix"]}, prefer_skip_nested_validation=True, @@ -1212,16 +1219,53 @@ def paired_euclidean_distances(X, Y): Examples -------- - >>> from sklearn.metrics.pairwise import paired_euclidean_distances + >>> from sklearn.metrics.pairwise import _paired_euclidean_distances >>> X = [[0, 0, 0], [1, 1, 1]] >>> Y = [[1, 0, 0], [1, 1, 0]] - >>> paired_euclidean_distances(X, Y) + >>> _paired_euclidean_distances(X, Y) array([1., 1.]) """ + + return _paired_euclidean_distances(X, Y) + + +def _paired_euclidean_distances(X, Y): + """Compute the paired euclidean distances between X and Y. + + Read more in the :ref:`User Guide `. + + Parameters + ---------- + X : {array-like, sparse matrix} of shape (n_samples, n_features) + Input array/matrix X. + + Y : {array-like, sparse matrix} of shape (n_samples, n_features) + Input array/matrix Y. + + Returns + ------- + distances : ndarray of shape (n_samples,) + Output array/matrix containing the calculated paired euclidean + distances. + + Examples + -------- + >>> from sklearn.metrics.pairwise import _paired_euclidean_distances + >>> X = [[0, 0, 0], [1, 1, 1]] + >>> Y = [[1, 0, 0], [1, 1, 0]] + >>> _paired_euclidean_distances(X, Y) + array([1., 1.]) + """ + X, Y = check_paired_arrays(X, Y) return row_norms(X - Y) +# TODO(1.9): Remove in 1.9 +@deprecated( + "The public function `sklearn.pairwise.paired_manhattan_distances` has been " + "deprecated in 1.7 and will be removed in 1.9." +) @validate_params( {"X": ["array-like", "sparse matrix"], "Y": ["array-like", "sparse matrix"]}, prefer_skip_nested_validation=True, @@ -1250,11 +1294,46 @@ def paired_manhattan_distances(X, Y): Examples -------- - >>> from sklearn.metrics.pairwise import paired_manhattan_distances + >>> from sklearn.metrics.pairwise import _paired_manhattan_distances + >>> import numpy as np + >>> X = np.array([[1, 1, 0], [0, 1, 0], [0, 0, 1]]) + >>> Y = np.array([[0, 1, 0], [0, 0, 1], [0, 0, 0]]) + >>> _paired_manhattan_distances(X, Y) + array([1., 2., 1.]) + """ + + return _paired_manhattan_distances(X, Y) + + +def _paired_manhattan_distances(X, Y): + """Compute the paired L1 distances between X and Y. + + Distances are calculated between (X[0], Y[0]), (X[1], Y[1]), ..., + (X[n_samples], Y[n_samples]). + + Read more in the :ref:`User Guide `. + + Parameters + ---------- + X : {array-like, sparse matrix} of shape (n_samples, n_features) + An array-like where each row is a sample and each column is a feature. + + Y : {array-like, sparse matrix} of shape (n_samples, n_features) + An array-like where each row is a sample and each column is a feature. + + Returns + ------- + distances : ndarray of shape (n_samples,) + L1 paired distances between the row vectors of `X` + and the row vectors of `Y`. + + Examples + -------- + >>> from sklearn.metrics.pairwise import _paired_manhattan_distances >>> import numpy as np >>> X = np.array([[1, 1, 0], [0, 1, 0], [0, 0, 1]]) >>> Y = np.array([[0, 1, 0], [0, 0, 1], [0, 0, 0]]) - >>> paired_manhattan_distances(X, Y) + >>> _paired_manhattan_distances(X, Y) array([1., 2., 1.]) """ X, Y = check_paired_arrays(X, Y) @@ -1266,6 +1345,11 @@ def paired_manhattan_distances(X, Y): return np.abs(diff).sum(axis=-1) +# TODO(1.9): Remove in 1.9 +@deprecated( + "The public function `sklearn.pairwise.paired_cosine_distances` has been " + "deprecated in 1.7 and will be removed in 1.9." +) @validate_params( {"X": ["array-like", "sparse matrix"], "Y": ["array-like", "sparse matrix"]}, prefer_skip_nested_validation=True, @@ -1298,16 +1382,55 @@ def paired_cosine_distances(X, Y): Examples -------- - >>> from sklearn.metrics.pairwise import paired_cosine_distances + >>> from sklearn.metrics.pairwise import _paired_cosine_distances >>> X = [[0, 0, 0], [1, 1, 1]] >>> Y = [[1, 0, 0], [1, 1, 0]] - >>> paired_cosine_distances(X, Y) + >>> _paired_cosine_distances(X, Y) + array([0.5 , 0.18...]) + """ + return _paired_cosine_distances(X, Y) + + +def _paired_cosine_distances(X, Y): + """ + Compute the paired cosine distances between X and Y. + + Read more in the :ref:`User Guide `. + + Parameters + ---------- + X : {array-like, sparse matrix} of shape (n_samples, n_features) + An array where each row is a sample and each column is a feature. + + Y : {array-like, sparse matrix} of shape (n_samples, n_features) + An array where each row is a sample and each column is a feature. + + Returns + ------- + distances : ndarray of shape (n_samples,) + Returns the distances between the row vectors of `X` + and the row vectors of `Y`, where `distances[i]` is the + distance between `X[i]` and `Y[i]`. + + Notes + ----- + The cosine distance is equivalent to the half the squared + euclidean distance if each sample is normalized to unit norm. + + Examples + -------- + >>> from sklearn.metrics.pairwise import _paired_cosine_distances + >>> X = [[0, 0, 0], [1, 1, 1]] + >>> Y = [[1, 0, 0], [1, 1, 0]] + >>> _paired_cosine_distances(X, Y) array([0.5 , 0.18...]) """ X, Y = check_paired_arrays(X, Y) return 0.5 * row_norms(normalize(X) - normalize(Y), squared=True) +# TODO(1.9): Remove PAIRED_DISTANCES dictionary since pairwise_*_distance public +# functions are deprecated in 1.9 PAIRED_DISTANCES = { "cosine": paired_cosine_distances, "euclidean": paired_euclidean_distances, @@ -1317,7 +1440,21 @@ def paired_cosine_distances(X, Y): "cityblock": paired_manhattan_distances, } +_PAIRED_DISTANCES = { + "cosine": _paired_cosine_distances, + "euclidean": _paired_euclidean_distances, + "l2": _paired_euclidean_distances, + "l1": _paired_manhattan_distances, + "manhattan": _paired_manhattan_distances, + "cityblock": _paired_manhattan_distances, +} + +# TODO(1.9): Remove in 1.9 +@deprecated( + "The public function `sklearn.pairwise.paired_distances` has been " + "deprecated in 1.7 and will be removed in 1.9." +) @validate_params( { "X": ["array-like"], @@ -1368,15 +1505,66 @@ def paired_distances(X, Y, *, metric="euclidean", **kwds): Examples -------- - >>> from sklearn.metrics.pairwise import paired_distances + >>> from sklearn.metrics.pairwise import _paired_distances >>> X = [[0, 1], [1, 1]] >>> Y = [[0, 1], [2, 1]] - >>> paired_distances(X, Y) + >>> _paired_distances(X, Y) array([0., 1.]) """ - if metric in PAIRED_DISTANCES: - func = PAIRED_DISTANCES[metric] + return _paired_distances(X, Y, metric=metric, **kwds) + + +def _paired_distances(X, Y, *, metric="euclidean", **kwds): + """ + Compute the paired distances between X and Y. + + Compute the distances between (X[0], Y[0]), (X[1], Y[1]), etc... + + Read more in the :ref:`User Guide `. + + Parameters + ---------- + X : ndarray of shape (n_samples, n_features) + Array 1 for distance computation. + + Y : ndarray of shape (n_samples, n_features) + Array 2 for distance computation. + + metric : str or callable, default="euclidean" + The metric to use when calculating distance between instances in a + feature array. If metric is a string, it must be one of the options + specified in PAIRED_DISTANCES, including "euclidean", + "manhattan", or "cosine". + Alternatively, if metric is a callable function, it is called on each + pair of instances (rows) and the resulting value recorded. The callable + should take two arrays from `X` as input and return a value indicating + the distance between them. + + **kwds : dict + Unused parameters. + + Returns + ------- + distances : ndarray of shape (n_samples,) + Returns the distances between the row vectors of `X` + and the row vectors of `Y`. + + See Also + -------- + sklearn.metrics.pairwise_distances : Computes the distance between every pair of + samples. + + Examples + -------- + >>> from sklearn.metrics.pairwise import _paired_distances + >>> X = [[0, 1], [1, 1]] + >>> Y = [[0, 1], [2, 1]] + >>> _paired_distances(X, Y) + array([0., 1.]) + """ + if metric in _PAIRED_DISTANCES: + func = _PAIRED_DISTANCES[metric] return func(X, Y) elif callable(metric): # Check the matrix first (it is usually done by the metric) diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py index ef8e6ebb2ac2a..2e0623aa50793 100644 --- a/sklearn/metrics/tests/test_common.py +++ b/sklearn/metrics/tests/test_common.py @@ -57,14 +57,14 @@ ) from sklearn.metrics._base import _average_binary_score from sklearn.metrics.pairwise import ( + _paired_cosine_distances, + _paired_euclidean_distances, additive_chi2_kernel, chi2_kernel, cosine_distances, cosine_similarity, euclidean_distances, linear_kernel, - paired_cosine_distances, - paired_euclidean_distances, polynomial_kernel, rbf_kernel, sigmoid_kernel, @@ -2141,7 +2141,7 @@ def check_array_api_metric_pairwise(metric, array_namespace, device, dtype_name) d2_tweedie_score: [ check_array_api_regression_metric, ], - paired_cosine_distances: [check_array_api_metric_pairwise], + _paired_cosine_distances: [check_array_api_metric_pairwise], mean_poisson_deviance: [check_array_api_regression_metric], additive_chi2_kernel: [check_array_api_metric_pairwise], mean_gamma_deviance: [check_array_api_regression_metric], @@ -2151,7 +2151,7 @@ def check_array_api_metric_pairwise(metric, array_namespace, device, dtype_name) check_array_api_regression_metric_multioutput, ], chi2_kernel: [check_array_api_metric_pairwise], - paired_euclidean_distances: [check_array_api_metric_pairwise], + _paired_euclidean_distances: [check_array_api_metric_pairwise], cosine_distances: [check_array_api_metric_pairwise], euclidean_distances: [check_array_api_metric_pairwise], linear_kernel: [check_array_api_metric_pairwise], diff --git a/sklearn/metrics/tests/test_pairwise.py b/sklearn/metrics/tests/test_pairwise.py index 96f9ec256e800..7bc937c4c5559 100644 --- a/sklearn/metrics/tests/test_pairwise.py +++ b/sklearn/metrics/tests/test_pairwise.py @@ -16,12 +16,20 @@ from sklearn import config_context from sklearn.exceptions import DataConversionWarning + +# TODO(1.9): Remove import for paired_distances, paired_cosine_distances, +# paired_euclidean_distances, paired_manhattan_distances since +# paired_*_distances public functions are deprecated in 1.9. from sklearn.metrics.pairwise import ( - PAIRED_DISTANCES, + _PAIRED_DISTANCES, PAIRWISE_BOOLEAN_FUNCTIONS, PAIRWISE_DISTANCE_FUNCTIONS, PAIRWISE_KERNEL_FUNCTIONS, _euclidean_distances_upcast, + _paired_cosine_distances, + _paired_distances, + _paired_euclidean_distances, + _paired_manhattan_distances, additive_chi2_kernel, check_paired_arrays, check_pairwise_arrays, @@ -410,9 +418,9 @@ def test_pairwise_kernels_filter_param(): pairwise_kernels(X, Y, metric="rbf", **params) -@pytest.mark.parametrize("metric, func", PAIRED_DISTANCES.items()) +@pytest.mark.parametrize("metric, func", _PAIRED_DISTANCES.items()) @pytest.mark.parametrize("csr_container", CSR_CONTAINERS) -def test_paired_distances(metric, func, csr_container): +def test__paired_distances(metric, func, csr_container): # Test the pairwise_distance helper function. rng = np.random.RandomState(0) # Euclidean distance should be equivalent to calling the function. @@ -420,7 +428,7 @@ def test_paired_distances(metric, func, csr_container): # Euclidean distance, with Y != X. Y = rng.random_sample((5, 4)) - S = paired_distances(X, Y, metric=metric) + S = _paired_distances(X, Y, metric=metric) S2 = func(X, Y) assert_allclose(S, S2) S3 = func(csr_container(X), csr_container(Y)) @@ -442,15 +450,15 @@ def test_paired_distances_callable(global_dtype): # Euclidean distance, with Y != X. Y = rng.random_sample((5, 4)).astype(global_dtype, copy=False) - S = paired_distances(X, Y, metric="manhattan") - S2 = paired_distances(X, Y, metric=lambda x, y: np.abs(x - y).sum(axis=0)) + S = _paired_distances(X, Y, metric="manhattan") + S2 = _paired_distances(X, Y, metric=lambda x, y: np.abs(x - y).sum(axis=0)) assert_allclose(S, S2) # Test that a value error is raised when the lengths of X and Y should not # differ Y = rng.random_sample((3, 4)) with pytest.raises(ValueError): - paired_distances(X, Y) + _paired_distances(X, Y) @pytest.mark.parametrize("dok_container", DOK_CONTAINERS) @@ -1186,27 +1194,27 @@ def slow_haversine_distances(x, y): # Paired distances -def test_paired_euclidean_distances(): +def test__paired_euclidean_distances(): # Check the paired Euclidean distances computation X = [[0], [0]] Y = [[1], [2]] - D = paired_euclidean_distances(X, Y) + D = _paired_euclidean_distances(X, Y) assert_allclose(D, [1.0, 2.0]) -def test_paired_manhattan_distances(): +def test__paired_manhattan_distances(): # Check the paired manhattan distances computation X = [[0], [0]] Y = [[1], [2]] - D = paired_manhattan_distances(X, Y) + D = _paired_manhattan_distances(X, Y) assert_allclose(D, [1.0, 2.0]) -def test_paired_cosine_distances(): +def test__paired_cosine_distances(): # Check the paired manhattan distances computation X = [[0], [0]] Y = [[1], [2]] - D = paired_cosine_distances(X, Y) + D = _paired_cosine_distances(X, Y) assert_allclose(D, [0.5, 0.5]) @@ -1669,6 +1677,66 @@ def test_sparse_manhattan_readonly_dataset(csr_container): ) +# TODO(1.9): Remove in 1.9 +def test_paired_distances_deprecation(): + """Check that we issue the FutureWarning regarding the deprecation of + paired_distances for gh-26982""" + + rng = np.random.RandomState(0) + X = rng.random_sample((5, 4)) + Y = rng.random_sample((5, 4)) + + warn_msg = "The public function `sklearn.pairwise.paired_distances`" + " has been deprecated in 1.7 and will be removed in 1.9." + with pytest.warns(FutureWarning, match=warn_msg): + paired_distances(X, Y) + + +# TODO(1.9): Remove in 1.9 +def test_paired_cosine_distances_deprecation(): + """Check that we issue the FutureWarning regarding the deprecation of + paired_cosine_distances for gh-26982""" + + rng = np.random.RandomState(0) + X = rng.random_sample((5, 4)) + Y = rng.random_sample((5, 4)) + + warn_msg = "The public function `sklearn.pairwise.paired_cosine_distances`" + " has been deprecated in 1.7 and will be removed in 1.9." + with pytest.warns(FutureWarning, match=warn_msg): + paired_cosine_distances(X, Y) + + +# TODO(1.9): Remove in 1.9 +def test_paired_euclidean_distances_deprecation(): + """Check that we issue the FutureWarning regarding the deprecation of + paired_euclidean_distances for gh-26982""" + + rng = np.random.RandomState(0) + X = rng.random_sample((5, 4)) + Y = rng.random_sample((5, 4)) + + warn_msg = "The public function `sklearn.pairwise.paired_euclidean_distances`" + " has been deprecated in 1.7 and will be removed in 1.9." + with pytest.warns(FutureWarning, match=warn_msg): + paired_euclidean_distances(X, Y) + + +# TODO(1.9): Remove in 1.9 +def test_paired_manhattan_distances_deprecation(): + """Check that we issue the FutureWarning regarding the deprecation of + paired_manhattan_distances for gh-26982""" + + rng = np.random.RandomState(0) + X = rng.random_sample((5, 4)) + Y = rng.random_sample((5, 4)) + + warn_msg = "The public function `sklearn.pairwise.paired_manhattan_distances`" + " has been deprecated in 1.7 and will be removed in 1.9." + with pytest.warns(FutureWarning, match=warn_msg): + paired_manhattan_distances(X, Y) + + # TODO(1.8): remove def test_force_all_finite_rename_warning(): X = np.random.uniform(size=(10, 10)) diff --git a/sklearn/tests/test_public_functions.py b/sklearn/tests/test_public_functions.py index 707aa37737c1b..4e69a0754c002 100644 --- a/sklearn/tests/test_public_functions.py +++ b/sklearn/tests/test_public_functions.py @@ -278,10 +278,6 @@ def _check_function_param_validation( "sklearn.metrics.pairwise.linear_kernel", "sklearn.metrics.pairwise.manhattan_distances", "sklearn.metrics.pairwise.nan_euclidean_distances", - "sklearn.metrics.pairwise.paired_cosine_distances", - "sklearn.metrics.pairwise.paired_distances", - "sklearn.metrics.pairwise.paired_euclidean_distances", - "sklearn.metrics.pairwise.paired_manhattan_distances", "sklearn.metrics.pairwise.pairwise_distances_argmin_min", "sklearn.metrics.pairwise.pairwise_kernels", "sklearn.metrics.pairwise.polynomial_kernel", @@ -348,6 +344,31 @@ def test_function_param_validation(func_module): ) +# TODO(1.9): Remove when paired_distances and paired_*_distances are removed +DEPRECATED_PARAM_VALIDATION_FUNCTION_LIST = [ + "sklearn.metrics.pairwise.paired_cosine_distances", + "sklearn.metrics.pairwise.paired_distances", + "sklearn.metrics.pairwise.paired_euclidean_distances", + "sklearn.metrics.pairwise.paired_manhattan_distances", +] + + +# TODO(1.9): Remove when paired_distances and paired_*_distances are removed +@pytest.mark.parametrize("func_module", DEPRECATED_PARAM_VALIDATION_FUNCTION_LIST) +def test_deprecated_function_param_validation(func_module): + """Check param validation for deprecated public functions that are not wrappers + around estimators. + """ + func, func_name, func_params, required_params = _get_func_info(func_module) + + parameter_constraints = getattr(func, "_skl_parameter_constraints") + + with pytest.warns(FutureWarning): + _check_function_param_validation( + func, func_name, func_params, required_params, parameter_constraints + ) + + PARAM_VALIDATION_CLASS_WRAPPER_LIST = [ ("sklearn.cluster.affinity_propagation", "sklearn.cluster.AffinityPropagation"), ("sklearn.cluster.dbscan", "sklearn.cluster.DBSCAN"),