diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst index 3e36438dda095..06764d2be6003 100644 --- a/doc/whats_new/v1.0.rst +++ b/doc/whats_new/v1.0.rst @@ -165,6 +165,11 @@ Changelog class methods and will be removed in 1.2. :pr:`18543` by `Guillaume Lemaitre`_. +- |Enhancement| A fix to raise an error in :func:`metrics.hinge_loss` when + ``pred_decision`` is 1d whereas it is a multiclass classification or when + ``pred_decision`` parameter is not consistent with the ``labels`` parameter. + :pr:`19643` by :user:`Pierre Attard `. + - |Feature| :func:`metrics.mean_pinball_loss` exposes the pinball loss for quantile regression. :pr:`19415` by :user:`Xavier Dupré ` and :user:`Oliver Grisel `. diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index 708bde662e765..97ee5a2e01340 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -2370,11 +2370,29 @@ def hinge_loss(y_true, pred_decision, *, labels=None, sample_weight=None): pred_decision = check_array(pred_decision, ensure_2d=False) y_true = column_or_1d(y_true) y_true_unique = np.unique(labels if labels is not None else y_true) + if y_true_unique.size > 2: - if (labels is None and pred_decision.ndim > 1 and - (np.size(y_true_unique) != pred_decision.shape[1])): - raise ValueError("Please include all labels in y_true " - "or pass labels as third argument") + + if pred_decision.ndim <= 1: + raise ValueError("The shape of pred_decision cannot be 1d array" + "with a multiclass target. pred_decision shape " + "must be (n_samples, n_classes), that is " + f"({y_true.shape[0]}, {y_true_unique.size})." + f" Got: {pred_decision.shape}") + + # pred_decision.ndim > 1 is true + if y_true_unique.size != pred_decision.shape[1]: + if labels is None: + raise ValueError("Please include all labels in y_true " + "or pass labels as third argument") + else: + raise ValueError("The shape of pred_decision is not " + "consistent with the number of classes. " + "With a multiclass target, pred_decision " + "shape must be " + "(n_samples, n_classes), that is " + f"({y_true.shape[0]}, {y_true_unique.size}). " + f"Got: {pred_decision.shape}") if labels is None: labels = y_true_unique le = LabelEncoder() diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index c32e9c89ada47..7b634e88f2275 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -4,6 +4,7 @@ from itertools import chain from itertools import permutations import warnings +import re import numpy as np from scipy import linalg @@ -2135,6 +2136,31 @@ def test_hinge_loss_multiclass_missing_labels_with_labels_none(): hinge_loss(y_true, pred_decision) +def test_hinge_loss_multiclass_no_consistent_pred_decision_shape(): + # test for inconsistency between multiclass problem and pred_decision + # argument + y_true = np.array([2, 1, 0, 1, 0, 1, 1]) + pred_decision = np.array([0, 1, 2, 1, 0, 2, 1]) + error_message = ("The shape of pred_decision cannot be 1d array" + "with a multiclass target. pred_decision shape " + "must be (n_samples, n_classes), that is " + "(7, 3). Got: (7,)") + with pytest.raises(ValueError, match=re.escape(error_message)): + hinge_loss(y_true=y_true, pred_decision=pred_decision) + + # test for inconsistency between pred_decision shape and labels number + pred_decision = np.array([[0, 1], [0, 1], [0, 1], [0, 1], + [2, 0], [0, 1], [1, 0]]) + labels = [0, 1, 2] + error_message = ("The shape of pred_decision is not " + "consistent with the number of classes. " + "With a multiclass target, pred_decision " + "shape must be (n_samples, n_classes), that is " + "(7, 3). Got: (7, 2)") + with pytest.raises(ValueError, match=re.escape(error_message)): + hinge_loss(y_true=y_true, pred_decision=pred_decision, labels=labels) + + def test_hinge_loss_multiclass_with_missing_labels(): pred_decision = np.array([ [+0.36, -0.17, -0.58, -0.99],