From 90ae669409cdafd0a66c2709eb52064e8f05ce03 Mon Sep 17 00:00:00 2001 From: Lucy Liu Date: Fri, 2 May 2025 10:28:59 +1000 Subject: [PATCH 01/12] rm entropy --- doc/modules/array_api.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/modules/array_api.rst b/doc/modules/array_api.rst index e7261ea35cc7c..7bb7ff940b854 100644 --- a/doc/modules/array_api.rst +++ b/doc/modules/array_api.rst @@ -132,7 +132,6 @@ base estimator also does: Metrics ------- -- :func:`sklearn.metrics.cluster.entropy` - :func:`sklearn.metrics.accuracy_score` - :func:`sklearn.metrics.d2_tweedie_score` - :func:`sklearn.metrics.explained_variance_score` From 0fb9aebc5a773404aee0f8ddb86e0dfe9a78dc99 Mon Sep 17 00:00:00 2001 From: Lucy Liu Date: Wed, 7 May 2025 13:14:23 +1000 Subject: [PATCH 02/12] make private --- sklearn/metrics/cluster/__init__.py | 4 ++-- sklearn/metrics/cluster/_supervised.py | 16 +++++----------- sklearn/metrics/cluster/tests/test_supervised.py | 16 ++++++++-------- 3 files changed, 15 insertions(+), 21 deletions(-) diff --git a/sklearn/metrics/cluster/__init__.py b/sklearn/metrics/cluster/__init__.py index 76020d80f8eb0..8f9b0ad14abaa 100644 --- a/sklearn/metrics/cluster/__init__.py +++ b/sklearn/metrics/cluster/__init__.py @@ -10,11 +10,11 @@ from ._bicluster import consensus_score from ._supervised import ( + _entropy, adjusted_mutual_info_score, adjusted_rand_score, completeness_score, contingency_matrix, - entropy, expected_mutual_information, fowlkes_mallows_score, homogeneity_completeness_v_measure, @@ -33,6 +33,7 @@ ) __all__ = [ + "_entropy", "adjusted_mutual_info_score", "adjusted_rand_score", "calinski_harabasz_score", @@ -40,7 +41,6 @@ "consensus_score", "contingency_matrix", "davies_bouldin_score", - "entropy", "expected_mutual_information", "fowlkes_mallows_score", "homogeneity_completeness_v_measure", diff --git a/sklearn/metrics/cluster/_supervised.py b/sklearn/metrics/cluster/_supervised.py index b46c76f9feba6..ed1d6c4a9cba0 100644 --- a/sklearn/metrics/cluster/_supervised.py +++ b/sklearn/metrics/cluster/_supervised.py @@ -530,8 +530,8 @@ def homogeneity_completeness_v_measure(labels_true, labels_pred, *, beta=1.0): if len(labels_true) == 0: return 1.0, 1.0, 1.0 - entropy_C = entropy(labels_true) - entropy_K = entropy(labels_pred) + entropy_C = _entropy(labels_true) + entropy_K = _entropy(labels_pred) contingency = contingency_matrix(labels_true, labels_pred, sparse=True) MI = mutual_info_score(None, None, contingency=contingency) @@ -1042,7 +1042,7 @@ def adjusted_mutual_info_score( # Calculate the expected value for the mutual information emi = expected_mutual_information(contingency, n_samples) # Calculate entropy for each labeling - h_true, h_pred = entropy(labels_true), entropy(labels_pred) + h_true, h_pred = _entropy(labels_true), _entropy(labels_pred) normalizer = _generalized_average(h_true, h_pred, average_method) denominator = normalizer - emi # Avoid 0.0 / 0.0 when expectation equals maximum, i.e. a perfect match. @@ -1168,7 +1168,7 @@ def normalized_mutual_info_score( return 0.0 # Calculate entropy for each labeling - h_true, h_pred = entropy(labels_true), entropy(labels_pred) + h_true, h_pred = _entropy(labels_true), _entropy(labels_pred) normalizer = _generalized_average(h_true, h_pred, average_method) return float(mi / normalizer) @@ -1272,13 +1272,7 @@ def fowlkes_mallows_score(labels_true, labels_pred, *, sparse="deprecated"): return float(np.sqrt(tk / pk) * np.sqrt(tk / qk)) if tk != 0.0 else 0.0 -@validate_params( - { - "labels": ["array-like"], - }, - prefer_skip_nested_validation=True, -) -def entropy(labels): +def _entropy(labels): """Calculate the entropy for a labeling. Parameters diff --git a/sklearn/metrics/cluster/tests/test_supervised.py b/sklearn/metrics/cluster/tests/test_supervised.py index 7421b726ebe67..1a264c9287f89 100644 --- a/sklearn/metrics/cluster/tests/test_supervised.py +++ b/sklearn/metrics/cluster/tests/test_supervised.py @@ -6,11 +6,11 @@ from sklearn.base import config_context from sklearn.metrics.cluster import ( + _entropy, adjusted_mutual_info_score, adjusted_rand_score, completeness_score, contingency_matrix, - entropy, expected_mutual_information, fowlkes_mallows_score, homogeneity_completeness_v_measure, @@ -268,9 +268,9 @@ def test_int_overflow_mutual_info_fowlkes_mallows_score(): def test_entropy(): - assert_almost_equal(entropy([0, 0, 42.0]), 0.6365141, 5) - assert_almost_equal(entropy([]), 1) - assert entropy([1, 1, 1, 1]) == 0 + assert_almost_equal(_entropy([0, 0, 42.0]), 0.6365141, 5) + assert_almost_equal(_entropy([]), 1) + assert _entropy([1, 1, 1, 1]) == 0 @pytest.mark.parametrize( @@ -284,9 +284,9 @@ def test_entropy_array_api(array_namespace, device, dtype_name): empty_int32_labels = xp.asarray([], dtype=xp.int32, device=device) int_labels = xp.asarray([1, 1, 1, 1], device=device) with config_context(array_api_dispatch=True): - assert entropy(float_labels) == pytest.approx(0.6365141, abs=1e-5) - assert entropy(empty_int32_labels) == 1 - assert entropy(int_labels) == 0 + assert _entropy(float_labels) == pytest.approx(0.6365141, abs=1e-5) + assert _entropy(empty_int32_labels) == 1 + assert _entropy(int_labels) == 0 def test_contingency_matrix(): @@ -339,7 +339,7 @@ def test_v_measure_and_mutual_information(seed=36): v_measure_score(labels_a, labels_b), 2.0 * mutual_info_score(labels_a, labels_b) - / (entropy(labels_a) + entropy(labels_b)), + / (_entropy(labels_a) + _entropy(labels_b)), 0, ) avg = "arithmetic" From bc84d0abb1411cb8c4a070d931b1286ea91a6222 Mon Sep 17 00:00:00 2001 From: Lucy Liu Date: Wed, 7 May 2025 14:51:20 +1000 Subject: [PATCH 03/12] fix --- sklearn/tests/test_public_functions.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sklearn/tests/test_public_functions.py b/sklearn/tests/test_public_functions.py index 707aa37737c1b..34712d04e9c43 100644 --- a/sklearn/tests/test_public_functions.py +++ b/sklearn/tests/test_public_functions.py @@ -223,7 +223,6 @@ def _check_function_param_validation( "sklearn.metrics.classification_report", "sklearn.metrics.cluster.adjusted_mutual_info_score", "sklearn.metrics.cluster.contingency_matrix", - "sklearn.metrics.cluster.entropy", "sklearn.metrics.cluster.fowlkes_mallows_score", "sklearn.metrics.cluster.homogeneity_completeness_v_measure", "sklearn.metrics.cluster.normalized_mutual_info_score", From d949916c29e63c6c3731c90c3c6f99c44a701040 Mon Sep 17 00:00:00 2001 From: Lucy Liu Date: Wed, 7 May 2025 21:31:32 +1000 Subject: [PATCH 04/12] rm from cluster --- sklearn/metrics/cluster/__init__.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/sklearn/metrics/cluster/__init__.py b/sklearn/metrics/cluster/__init__.py index 8f9b0ad14abaa..fa067624dcc84 100644 --- a/sklearn/metrics/cluster/__init__.py +++ b/sklearn/metrics/cluster/__init__.py @@ -10,7 +10,6 @@ from ._bicluster import consensus_score from ._supervised import ( - _entropy, adjusted_mutual_info_score, adjusted_rand_score, completeness_score, @@ -33,7 +32,6 @@ ) __all__ = [ - "_entropy", "adjusted_mutual_info_score", "adjusted_rand_score", "calinski_harabasz_score", From 160ea9f444518786e3de15427afa88f87fa2c556 Mon Sep 17 00:00:00 2001 From: Lucy Liu Date: Wed, 7 May 2025 21:35:36 +1000 Subject: [PATCH 05/12] fix import --- sklearn/metrics/cluster/tests/test_supervised.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/sklearn/metrics/cluster/tests/test_supervised.py b/sklearn/metrics/cluster/tests/test_supervised.py index 1a264c9287f89..9cdfe4e2bb8ba 100644 --- a/sklearn/metrics/cluster/tests/test_supervised.py +++ b/sklearn/metrics/cluster/tests/test_supervised.py @@ -6,7 +6,6 @@ from sklearn.base import config_context from sklearn.metrics.cluster import ( - _entropy, adjusted_mutual_info_score, adjusted_rand_score, completeness_score, @@ -21,7 +20,11 @@ rand_score, v_measure_score, ) -from sklearn.metrics.cluster._supervised import _generalized_average, check_clusterings +from sklearn.metrics.cluster._supervised import ( + _entropy, + _generalized_average, + check_clusterings, +) from sklearn.utils import assert_all_finite from sklearn.utils._array_api import ( _get_namespace_device_dtype_ids, From dedf049dd7bfa49e6941794f0e9ccc3bcc0ce90a Mon Sep 17 00:00:00 2001 From: Lucy Liu Date: Thu, 29 May 2025 13:24:03 +1000 Subject: [PATCH 06/12] deprecate --- sklearn/metrics/cluster/_supervised.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/sklearn/metrics/cluster/_supervised.py b/sklearn/metrics/cluster/_supervised.py index ed1d6c4a9cba0..422f9b6e02f3d 100644 --- a/sklearn/metrics/cluster/_supervised.py +++ b/sklearn/metrics/cluster/_supervised.py @@ -14,6 +14,7 @@ import numpy as np from scipy import sparse as sp +from ...utils import deprecated from ...utils._array_api import _max_precision_float_dtype, get_namespace_and_device from ...utils._param_validation import Hidden, Interval, StrOptions, validate_params from ...utils.multiclass import type_of_target @@ -1306,3 +1307,12 @@ def _entropy(labels): # Always convert the result as a Python scalar (on CPU) instead of a device # specific scalar array. return float(-xp.sum((pi / pi_sum) * (xp.log(pi) - log(pi_sum)))) + + +# TODO(1.10): Remove +@deprecated( + "`entropy` is deprecated in 1.8 and will become a private function only from 1.10." +) +def entropy(labels): + """Public version of `_entropy`. To be deprecated.""" + return _entropy(labels) From 267eb276b5acebb2c779f8f2a18f1156b91adb1b Mon Sep 17 00:00:00 2001 From: Lucy Liu Date: Thu, 29 May 2025 13:29:04 +1000 Subject: [PATCH 07/12] add test --- sklearn/metrics/cluster/tests/test_supervised.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sklearn/metrics/cluster/tests/test_supervised.py b/sklearn/metrics/cluster/tests/test_supervised.py index 9cdfe4e2bb8ba..fe4bd8b6dd5df 100644 --- a/sklearn/metrics/cluster/tests/test_supervised.py +++ b/sklearn/metrics/cluster/tests/test_supervised.py @@ -24,6 +24,7 @@ _entropy, _generalized_average, check_clusterings, + entropy, ) from sklearn.utils import assert_all_finite from sklearn.utils._array_api import ( @@ -270,6 +271,12 @@ def test_int_overflow_mutual_info_fowlkes_mallows_score(): assert_all_finite(fowlkes_mallows_score(x, y)) +# TODO(1.10): Remove +def test_public_entropy_deprecation(): + with pytest.warns(FutureWarning, match="Function entropy is deprecated"): + entropy([0, 0, 42.0]) + + def test_entropy(): assert_almost_equal(_entropy([0, 0, 42.0]), 0.6365141, 5) assert_almost_equal(_entropy([]), 1) From 048804552785805066937c6c670ea5d6acb3645f Mon Sep 17 00:00:00 2001 From: Lucy Liu Date: Thu, 29 May 2025 13:31:16 +1000 Subject: [PATCH 08/12] amend message --- sklearn/metrics/cluster/_supervised.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sklearn/metrics/cluster/_supervised.py b/sklearn/metrics/cluster/_supervised.py index 422f9b6e02f3d..c256f9f152ffb 100644 --- a/sklearn/metrics/cluster/_supervised.py +++ b/sklearn/metrics/cluster/_supervised.py @@ -1311,7 +1311,8 @@ def _entropy(labels): # TODO(1.10): Remove @deprecated( - "`entropy` is deprecated in 1.8 and will become a private function only from 1.10." + "`entropy` is deprecated in 1.8 and will become a private function only " + "(`_entropy`) from 1.10." ) def entropy(labels): """Public version of `_entropy`. To be deprecated.""" From a83a469fccc205db0f51c008b3dc959d327f6520 Mon Sep 17 00:00:00 2001 From: Lucy Liu Date: Fri, 13 Jun 2025 11:32:11 +1000 Subject: [PATCH 09/12] review --- sklearn/metrics/cluster/__init__.py | 4 ++++ sklearn/metrics/cluster/_supervised.py | 5 +---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/sklearn/metrics/cluster/__init__.py b/sklearn/metrics/cluster/__init__.py index fa067624dcc84..333702f733306 100644 --- a/sklearn/metrics/cluster/__init__.py +++ b/sklearn/metrics/cluster/__init__.py @@ -14,6 +14,8 @@ adjusted_rand_score, completeness_score, contingency_matrix, + # TODO(1.10): Remove + entropy, expected_mutual_information, fowlkes_mallows_score, homogeneity_completeness_v_measure, @@ -39,6 +41,8 @@ "consensus_score", "contingency_matrix", "davies_bouldin_score", + # TODO(1.10): Remove + "entropy", "expected_mutual_information", "fowlkes_mallows_score", "homogeneity_completeness_v_measure", diff --git a/sklearn/metrics/cluster/_supervised.py b/sklearn/metrics/cluster/_supervised.py index 26d9b9fc4084d..82719fb89a5a9 100644 --- a/sklearn/metrics/cluster/_supervised.py +++ b/sklearn/metrics/cluster/_supervised.py @@ -1310,10 +1310,7 @@ def _entropy(labels): # TODO(1.10): Remove -@deprecated( - "`entropy` is deprecated in 1.8 and will become a private function only " - "(`_entropy`) from 1.10." -) +@deprecated("`entropy` is deprecated in 1.8 and will be removed in 1.10.") def entropy(labels): """Public version of `_entropy`. To be deprecated.""" return _entropy(labels) From 19caca63bbc211e23ba84da0f2d6a90e44a9a075 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=A9mie=20du=20Boisberranger?= Date: Fri, 13 Jun 2025 09:49:51 +0200 Subject: [PATCH 10/12] use same docstring for the public function --- sklearn/metrics/cluster/_supervised.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/sklearn/metrics/cluster/_supervised.py b/sklearn/metrics/cluster/_supervised.py index 82719fb89a5a9..ec3b7feaee3ae 100644 --- a/sklearn/metrics/cluster/_supervised.py +++ b/sklearn/metrics/cluster/_supervised.py @@ -1312,5 +1312,20 @@ def _entropy(labels): # TODO(1.10): Remove @deprecated("`entropy` is deprecated in 1.8 and will be removed in 1.10.") def entropy(labels): - """Public version of `_entropy`. To be deprecated.""" + """Calculate the entropy for a labeling. + + Parameters + ---------- + labels : array-like of shape (n_samples,), dtype=int + The labels. + + Returns + ------- + entropy : float + The entropy for a labeling. + + Notes + ----- + The logarithm used is the natural logarithm (base-e). + """ return _entropy(labels) From 810b75b16eb3da0531cf379548c7f9fb09259183 Mon Sep 17 00:00:00 2001 From: Lucy Liu Date: Fri, 13 Jun 2025 23:12:41 +1000 Subject: [PATCH 11/12] add whatsnew --- doc/whats_new/upcoming_changes/sklearn.metrics/31294.api.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 doc/whats_new/upcoming_changes/sklearn.metrics/31294.api.rst diff --git a/doc/whats_new/upcoming_changes/sklearn.metrics/31294.api.rst b/doc/whats_new/upcoming_changes/sklearn.metrics/31294.api.rst new file mode 100644 index 0000000000000..9ad42d90a299e --- /dev/null +++ b/doc/whats_new/upcoming_changes/sklearn.metrics/31294.api.rst @@ -0,0 +1,2 @@ +- :func:`metrics.entropy` is deprecated and will be removed in v1.10. + By :user:`Lucy Liu ` From 7d07e09c131be55b8642954a00502338395c86dd Mon Sep 17 00:00:00 2001 From: Lucy Liu Date: Fri, 13 Jun 2025 23:17:50 +1000 Subject: [PATCH 12/12] fix whats new --- doc/whats_new/upcoming_changes/sklearn.metrics/31294.api.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats_new/upcoming_changes/sklearn.metrics/31294.api.rst b/doc/whats_new/upcoming_changes/sklearn.metrics/31294.api.rst index 9ad42d90a299e..d5afd1d46e6e0 100644 --- a/doc/whats_new/upcoming_changes/sklearn.metrics/31294.api.rst +++ b/doc/whats_new/upcoming_changes/sklearn.metrics/31294.api.rst @@ -1,2 +1,2 @@ -- :func:`metrics.entropy` is deprecated and will be removed in v1.10. +- :func:`metrics.cluster.entropy` is deprecated and will be removed in v1.10. By :user:`Lucy Liu `