From 94cfeabe2a61c80138c81b40f88713efba949fa3 Mon Sep 17 00:00:00 2001 From: Pooja Subramaniam Date: Thu, 23 Feb 2023 20:52:50 +0100 Subject: [PATCH 1/5] validate parameters for jaccard_score --- sklearn/metrics/_classification.py | 37 +++++++++++++++--------------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index 0602ec77aa500..b61746ecc897b 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -697,6 +697,23 @@ class labels [2]_. return 1 - k +@validate_params( + { + "y_true": ["array-like", "sparse matrix"], + "y_pred": ["array-like", "sparse matrix"], + "labels": ["array-like", None], + "pos_label": [Real, str, int, "boolean", None], + "average": [ + StrOptions({"micro", "macro", "samples", "weighted", "binary"}), + None, + ], + "sample_weight": ["array-like", None], + "zero_division": [ + Options(Integral, {0, 1}), + StrOptions({"warn"}), + ], + } +) def jaccard_score( y_true, y_pred, @@ -2622,9 +2639,6 @@ def log_loss( The default value changed from `1e-15` to `"auto"` that is equivalent to `np.finfo(y_pred.dtype).eps`. - .. deprecated:: 1.3 - `eps` is deprecated in 1.3 and will be removed in 1.5. - normalize : bool, default=True If true, return the mean loss per sample. Otherwise, return the sum of the per-sample losses. @@ -2663,16 +2677,7 @@ def log_loss( y_pred = check_array( y_pred, ensure_2d=False, dtype=[np.float64, np.float32, np.float16] ) - if eps == "auto": - eps = np.finfo(y_pred.dtype).eps - else: - # TODO: Remove user defined eps in 1.5 - warnings.warn( - "Setting the eps parameter is deprecated and will " - "be removed in 1.5. Instead eps will always have" - "a default value of `np.finfo(y_pred.dtype).eps`.", - FutureWarning, - ) + eps = np.finfo(y_pred.dtype).eps if eps == "auto" else eps check_consistent_length(y_pred, y_true, sample_weight) lb = LabelBinarizer() @@ -2735,12 +2740,6 @@ def log_loss( # Renormalize y_pred_sum = y_pred.sum(axis=1) - if not np.isclose(y_pred_sum, 1, rtol=1e-15, atol=5 * eps).all(): - warnings.warn( - "The y_pred values do not sum to one. Starting from 1.5 this" - "will result in an error.", - UserWarning, - ) y_pred = y_pred / y_pred_sum[:, np.newaxis] loss = -xlogy(transformed_labels, y_pred).sum(axis=1) From 8697ef9d627e879560edd55595c4bbc6e396b22d Mon Sep 17 00:00:00 2001 From: Pooja Subramaniam Date: Thu, 23 Feb 2023 21:23:16 +0100 Subject: [PATCH 2/5] Revert "validate parameters for jaccard_score" This reverts commit 94cfeabe2a61c80138c81b40f88713efba949fa3. --- sklearn/metrics/_classification.py | 37 +++++++++++++++--------------- 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index b61746ecc897b..0602ec77aa500 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -697,23 +697,6 @@ class labels [2]_. return 1 - k -@validate_params( - { - "y_true": ["array-like", "sparse matrix"], - "y_pred": ["array-like", "sparse matrix"], - "labels": ["array-like", None], - "pos_label": [Real, str, int, "boolean", None], - "average": [ - StrOptions({"micro", "macro", "samples", "weighted", "binary"}), - None, - ], - "sample_weight": ["array-like", None], - "zero_division": [ - Options(Integral, {0, 1}), - StrOptions({"warn"}), - ], - } -) def jaccard_score( y_true, y_pred, @@ -2639,6 +2622,9 @@ def log_loss( The default value changed from `1e-15` to `"auto"` that is equivalent to `np.finfo(y_pred.dtype).eps`. + .. deprecated:: 1.3 + `eps` is deprecated in 1.3 and will be removed in 1.5. + normalize : bool, default=True If true, return the mean loss per sample. Otherwise, return the sum of the per-sample losses. @@ -2677,7 +2663,16 @@ def log_loss( y_pred = check_array( y_pred, ensure_2d=False, dtype=[np.float64, np.float32, np.float16] ) - eps = np.finfo(y_pred.dtype).eps if eps == "auto" else eps + if eps == "auto": + eps = np.finfo(y_pred.dtype).eps + else: + # TODO: Remove user defined eps in 1.5 + warnings.warn( + "Setting the eps parameter is deprecated and will " + "be removed in 1.5. Instead eps will always have" + "a default value of `np.finfo(y_pred.dtype).eps`.", + FutureWarning, + ) check_consistent_length(y_pred, y_true, sample_weight) lb = LabelBinarizer() @@ -2740,6 +2735,12 @@ def log_loss( # Renormalize y_pred_sum = y_pred.sum(axis=1) + if not np.isclose(y_pred_sum, 1, rtol=1e-15, atol=5 * eps).all(): + warnings.warn( + "The y_pred values do not sum to one. Starting from 1.5 this" + "will result in an error.", + UserWarning, + ) y_pred = y_pred / y_pred_sum[:, np.newaxis] loss = -xlogy(transformed_labels, y_pred).sum(axis=1) From 792d3af79c2638f2ac6dcd25186397cc77d53356 Mon Sep 17 00:00:00 2001 From: Pooja Subramaniam Date: Thu, 23 Feb 2023 21:24:43 +0100 Subject: [PATCH 3/5] validate parameters for jaccard_score --- sklearn/metrics/_classification.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index 0602ec77aa500..6df8c0d582df4 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -697,6 +697,23 @@ class labels [2]_. return 1 - k +@validate_params( + { + "y_true": ["array-like", "sparse matrix"], + "y_pred": ["array-like", "sparse matrix"], + "labels": ["array-like", None], + "pos_label": [Real, str, int, "boolean", None], + "average": [ + StrOptions({"micro", "macro", "samples", "weighted", "binary"}), + None, + ], + "sample_weight": ["array-like", None], + "zero_division": [ + Options(Integral, {0, 1}), + StrOptions({"warn"}), + ], + } +) def jaccard_score( y_true, y_pred, From 8ad25303f3a06ffaed1f5f417f4ad6f7655663a2 Mon Sep 17 00:00:00 2001 From: Pooja Subramaniam Date: Thu, 23 Feb 2023 21:25:56 +0100 Subject: [PATCH 4/5] adding the jaccard_score function to the test public functions list --- sklearn/tests/test_public_functions.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sklearn/tests/test_public_functions.py b/sklearn/tests/test_public_functions.py index 9b2b56cdb3eb8..91a87bad52e16 100644 --- a/sklearn/tests/test_public_functions.py +++ b/sklearn/tests/test_public_functions.py @@ -122,6 +122,7 @@ def _check_function_param_validation( "sklearn.metrics.det_curve", "sklearn.metrics.f1_score", "sklearn.metrics.hamming_loss", + "sklearn.metrics.jaccard_score", "sklearn.metrics.log_loss", "sklearn.metrics.mean_absolute_error", "sklearn.metrics.mean_squared_error", From 800103de785833c0d81ac1588941084a88060596 Mon Sep 17 00:00:00 2001 From: jeremie du boisberranger Date: Fri, 24 Feb 2023 10:47:36 +0100 Subject: [PATCH 5/5] cln constraint types --- sklearn/metrics/_classification.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index 6df8c0d582df4..eae89087f8a0b 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -702,14 +702,14 @@ class labels [2]_. "y_true": ["array-like", "sparse matrix"], "y_pred": ["array-like", "sparse matrix"], "labels": ["array-like", None], - "pos_label": [Real, str, int, "boolean", None], + "pos_label": [Real, str, "boolean", None], "average": [ StrOptions({"micro", "macro", "samples", "weighted", "binary"}), None, ], "sample_weight": ["array-like", None], "zero_division": [ - Options(Integral, {0, 1}), + Options(Real, {0, 1}), StrOptions({"warn"}), ], }