From f4e771c88dfa7e0dcec035165041b4a4a4324076 Mon Sep 17 00:00:00 2001 From: Lucy Liu Date: Fri, 29 Aug 2025 21:20:10 +1000 Subject: [PATCH 1/3] improve --- sklearn/metrics/_classification.py | 85 ++++++++++++++------ sklearn/metrics/tests/test_classification.py | 26 +++++- sklearn/utils/validation.py | 9 ++- 3 files changed, 91 insertions(+), 29 deletions(-) diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index 4a9c2fe0aef3d..c889d5ffacd17 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -126,9 +126,18 @@ def _check_targets(y_true, y_pred, sample_weight=None): raise ValueError("{0} is not supported".format(y_type)) if y_type in ["binary", "multiclass"]: + try: + y_true = column_or_1d(y_true, input_name="y_true") + y_pred = column_or_1d(y_pred, input_name="y_pred") + except TypeError as e: + if "Sparse data was passed" in str(e): + raise TypeError( + "Sparse input is only supported when targets are of multilabel type" + ) + else: + raise + xp, _ = get_namespace(y_true, y_pred) - y_true = column_or_1d(y_true) - y_pred = column_or_1d(y_pred) if y_type == "binary": try: unique_values = _union1d(y_true, y_pred, xp) @@ -317,10 +326,12 @@ def accuracy_score(y_true, y_pred, *, normalize=True, sample_weight=None): Parameters ---------- y_true : 1d array-like, or label indicator array / sparse matrix - Ground truth (correct) labels. + Ground truth (correct) labels. Sparse matrix is only supported when + labels are of :term:`multilabel` type. y_pred : 1d array-like, or label indicator array / sparse matrix - Predicted labels, as returned by a classifier. + Predicted labels, as returned by a classifier. Sparse matrix is only + supported when labels are of :term:`multilabel` type. normalize : bool, default=True If ``False``, return the number of correctly classified samples. @@ -623,11 +634,13 @@ def multilabel_confusion_matrix( ---------- y_true : {array-like, sparse matrix} of shape (n_samples, n_outputs) or \ (n_samples,) - Ground truth (correct) target values. + Ground truth (correct) target values. Sparse matrix is only supported when + labels are of :term:`multilabel` type. y_pred : {array-like, sparse matrix} of shape (n_samples, n_outputs) or \ (n_samples,) - Estimated targets as returned by a classifier. + Estimated targets as returned by a classifier. Sparse matrix is only + supported when labels are of :term:`multilabel` type. sample_weight : array-like of shape (n_samples,), default=None Sample weights. @@ -991,10 +1004,12 @@ def jaccard_score( Parameters ---------- y_true : 1d array-like, or label indicator array / sparse matrix - Ground truth (correct) labels. + Ground truth (correct) labels. Sparse matrix is only supported when + labels are of :term:`multilabel` type. y_pred : 1d array-like, or label indicator array / sparse matrix - Predicted labels, as returned by a classifier. + Predicted labels, as returned by a classifier. Sparse matrix is only + supported when labels are of :term:`multilabel` type. labels : array-like of shape (n_classes,), default=None The set of labels to include when `average != 'binary'`, and their @@ -1262,10 +1277,12 @@ def zero_one_loss(y_true, y_pred, *, normalize=True, sample_weight=None): Parameters ---------- y_true : 1d array-like, or label indicator array / sparse matrix - Ground truth (correct) labels. + Ground truth (correct) labels. Sparse matrix is only supported when + labels are of :term:`multilabel` type. y_pred : 1d array-like, or label indicator array / sparse matrix - Predicted labels, as returned by a classifier. + Predicted labels, as returned by a classifier. Sparse matrix is only + supported when labels are of :term:`multilabel` type. normalize : bool, default=True If ``False``, return the number of misclassifications. @@ -1386,10 +1403,12 @@ def f1_score( Parameters ---------- y_true : 1d array-like, or label indicator array / sparse matrix - Ground truth (correct) target values. + Ground truth (correct) target values. Sparse matrix is only supported when + targets are of :term:`multilabel` type. y_pred : 1d array-like, or label indicator array / sparse matrix - Estimated targets as returned by a classifier. + Estimated targets as returned by a classifier. Sparse matrix is only + supported when targets are of :term:`multilabel` type. labels : array-like, default=None The set of labels to include when `average != 'binary'`, and their @@ -1586,10 +1605,12 @@ def fbeta_score( Parameters ---------- y_true : 1d array-like, or label indicator array / sparse matrix - Ground truth (correct) target values. + Ground truth (correct) target values. Sparse matrix is only supported when + targets are of :term:`multilabel` type. y_pred : 1d array-like, or label indicator array / sparse matrix - Estimated targets as returned by a classifier. + Estimated targets as returned by a classifier. Sparse matrix is only + supported when targets are of :term:`multilabel` type. beta : float Determines the weight of recall in the combined score. @@ -1902,10 +1923,12 @@ def precision_recall_fscore_support( Parameters ---------- y_true : 1d array-like, or label indicator array / sparse matrix - Ground truth (correct) target values. + Ground truth (correct) target values. Sparse matrix is only supported when + targets are of :term:`multilabel` type. y_pred : 1d array-like, or label indicator array / sparse matrix - Estimated targets as returned by a classifier. + Estimated targets as returned by a classifier. Sparse matrix is only + supported when targets are of :term:`multilabel` type. beta : float, default=1.0 The strength of recall versus precision in the F-score. @@ -2176,10 +2199,12 @@ class after being classified as negative. This is the case when the Parameters ---------- y_true : 1d array-like, or label indicator array / sparse matrix - Ground truth (correct) target values. + Ground truth (correct) target values. Sparse matrix is only supported when + targets are of :term:`multilabel` type. y_pred : 1d array-like, or label indicator array / sparse matrix - Estimated targets as returned by a classifier. + Estimated targets as returned by a classifier. Sparse matrix is only + supported when targets are of :term:`multilabel` type. labels : array-like, default=None List of labels to index the matrix. This may be used to select the @@ -2452,10 +2477,12 @@ def precision_score( Parameters ---------- y_true : 1d array-like, or label indicator array / sparse matrix - Ground truth (correct) target values. + Ground truth (correct) target values. Sparse matrix is only supported when + targets are of :term:`multilabel` type. y_pred : 1d array-like, or label indicator array / sparse matrix - Estimated targets as returned by a classifier. + Estimated targets as returned by a classifier. Sparse matrix is only + supported when targets are of :term:`multilabel` type. labels : array-like, default=None The set of labels to include when `average != 'binary'`, and their @@ -2631,10 +2658,12 @@ def recall_score( Parameters ---------- y_true : 1d array-like, or label indicator array / sparse matrix - Ground truth (correct) target values. + Ground truth (correct) target values. Sparse matrix is only supported when + targets are of :term:`multilabel` type. y_pred : 1d array-like, or label indicator array / sparse matrix - Estimated targets as returned by a classifier. + Estimated targets as returned by a classifier. Sparse matrix is only + supported when targets are of :term:`multilabel` type. labels : array-like, default=None The set of labels to include when `average != 'binary'`, and their @@ -2890,10 +2919,12 @@ def classification_report( Parameters ---------- y_true : 1d array-like, or label indicator array / sparse matrix - Ground truth (correct) target values. + Ground truth (correct) target values. Sparse matrix is only supported when + targets are of :term:`multilabel` type. y_pred : 1d array-like, or label indicator array / sparse matrix - Estimated targets as returned by a classifier. + Estimated targets as returned by a classifier. Sparse matrix is only + supported when targets are of :term:`multilabel` type. labels : array-like of shape (n_labels,), default=None Optional list of label indices to include in the report. @@ -3116,10 +3147,12 @@ def hamming_loss(y_true, y_pred, *, sample_weight=None): Parameters ---------- y_true : 1d array-like, or label indicator array / sparse matrix - Ground truth (correct) labels. + Ground truth (correct) labels. Sparse matrix is only supported when + targets are of :term:`multilabel` type. y_pred : 1d array-like, or label indicator array / sparse matrix - Predicted labels, as returned by a classifier. + Predicted labels, as returned by a classifier. Sparse matrix is only + supported when targets are of :term:`multilabel` type. sample_weight : array-like of shape (n_samples,), default=None Sample weights. diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index cdb64d9c1530a..ccae5b561445c 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -5,7 +5,7 @@ import numpy as np import pytest -from scipy import linalg +from scipy import linalg, sparse from scipy.spatial.distance import hamming as sp_hamming from scipy.stats import bernoulli @@ -2589,6 +2589,30 @@ def test__check_targets_multiclass_with_both_y_true_and_y_pred_binary(): assert _check_targets(y_true, y_pred)[0] == "multiclass" +@pytest.mark.parametrize( + "y, target_type", + [ + (sparse.csr_matrix([[1], [0], [1], [0]]), "binary"), + (sparse.csr_matrix([[0], [1], [2], [1]]), "multiclass"), + (sparse.csr_matrix([[1, 0, 1], [0, 1, 0], [1, 1, 0]]), "multilabel"), + ], +) +def test__check_targets_sparse_inputs(y, target_type): + """Check correct behaviour when different target types are sparse.""" + if target_type in ("binary", "multiclass"): + with pytest.raises( + TypeError, match="Sparse input is only supported when targets" + ): + _check_targets(y, y) + else: + # This should not raise an error + y_type, y_true_out, y_pred_out, _ = _check_targets(y, y) + + assert y_type == "multilabel-indicator" + assert y_true_out.format == "csr" + assert y_pred_out.format == "csr" + + def test_hinge_loss_binary(): y_true = np.array([-1, 1, 1, -1]) pred_decision = np.array([-8.5, 0.5, 1.5, -0.3]) diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py index f1c3d11de13b2..03656582609f4 100644 --- a/sklearn/utils/validation.py +++ b/sklearn/utils/validation.py @@ -1427,7 +1427,7 @@ def _check_y(y, multi_output=False, y_numeric=False, estimator=None): return y -def column_or_1d(y, *, dtype=None, warn=False, device=None): +def column_or_1d(y, *, dtype=None, input_name="y", warn=False, device=None): """Ravel column or 1d numpy array, else raises an error. Parameters @@ -1440,6 +1440,11 @@ def column_or_1d(y, *, dtype=None, warn=False, device=None): .. versionadded:: 1.2 + input_name : str, default="y" + The data name used to construct the error message. + + .. versionadded:: 1.8 + warn : bool, default=False To control display of warnings. @@ -1470,7 +1475,7 @@ def column_or_1d(y, *, dtype=None, warn=False, device=None): y, ensure_2d=False, dtype=dtype, - input_name="y", + input_name=input_name, ensure_all_finite=False, ensure_min_samples=0, ) From feb31cad289d0966a791b46f353b9229d7237818 Mon Sep 17 00:00:00 2001 From: Lucy Liu Date: Sat, 30 Aug 2025 09:40:29 +1000 Subject: [PATCH 2/3] add whats new --- .../sklearn.metrics/32047.enhancement.rst | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 doc/whats_new/upcoming_changes/sklearn.metrics/32047.enhancement.rst diff --git a/doc/whats_new/upcoming_changes/sklearn.metrics/32047.enhancement.rst b/doc/whats_new/upcoming_changes/sklearn.metrics/32047.enhancement.rst new file mode 100644 index 0000000000000..7fcad9a062ce7 --- /dev/null +++ b/doc/whats_new/upcoming_changes/sklearn.metrics/32047.enhancement.rst @@ -0,0 +1,9 @@ +- Improved the error message for sparse inputs for the following metrics: + :func:`metrics.accuracy_score`, + :func:`metrics.multilabel_confusion_matrix`, :func:`metrics.jaccard_score`, + :func:`metrics.zero_one_loss`, :func:`metrics.f1_score`, + :func:`metrics.fbeta_score`, :func:`metrics.precision_recall_fscore_support`, + :func:`metrics.class_likelihood_ratios`, :func:`metrics.precision_score`, + :func:`metrics.recall_score`, :func:`metrics.classification_report`, + :func:`metrics.hamming_loss`. + By :user:`Lucy Liu `. From 1247a266ff3b7d5a9a49b10f85b674d0bb8a2753 Mon Sep 17 00:00:00 2001 From: Lucy Liu Date: Sat, 30 Aug 2025 09:40:48 +1000 Subject: [PATCH 3/3] review --- sklearn/metrics/_classification.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index c889d5ffacd17..fb5fad066f881 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -133,7 +133,7 @@ def _check_targets(y_true, y_pred, sample_weight=None): if "Sparse data was passed" in str(e): raise TypeError( "Sparse input is only supported when targets are of multilabel type" - ) + ) from e else: raise