Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
- Improved the error message for sparse inputs for the following metrics:
:func:`metrics.accuracy_score`,
:func:`metrics.multilabel_confusion_matrix`, :func:`metrics.jaccard_score`,
:func:`metrics.zero_one_loss`, :func:`metrics.f1_score`,
:func:`metrics.fbeta_score`, :func:`metrics.precision_recall_fscore_support`,
:func:`metrics.class_likelihood_ratios`, :func:`metrics.precision_score`,
:func:`metrics.recall_score`, :func:`metrics.classification_report`,
:func:`metrics.hamming_loss`.
By :user:`Lucy Liu <lucyleeow>`.
85 changes: 59 additions & 26 deletions sklearn/metrics/_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,9 +126,18 @@ def _check_targets(y_true, y_pred, sample_weight=None):
raise ValueError("{0} is not supported".format(y_type))

if y_type in ["binary", "multiclass"]:
try:
y_true = column_or_1d(y_true, input_name="y_true")
y_pred = column_or_1d(y_pred, input_name="y_pred")
except TypeError as e:
if "Sparse data was passed" in str(e):
raise TypeError(
"Sparse input is only supported when targets are of multilabel type"
) from e
else:
raise

xp, _ = get_namespace(y_true, y_pred)
y_true = column_or_1d(y_true)
y_pred = column_or_1d(y_pred)
if y_type == "binary":
try:
unique_values = _union1d(y_true, y_pred, xp)
Expand Down Expand Up @@ -317,10 +326,12 @@ def accuracy_score(y_true, y_pred, *, normalize=True, sample_weight=None):
Parameters
----------
y_true : 1d array-like, or label indicator array / sparse matrix
Ground truth (correct) labels.
Ground truth (correct) labels. Sparse matrix is only supported when
labels are of :term:`multilabel` type.

y_pred : 1d array-like, or label indicator array / sparse matrix
Predicted labels, as returned by a classifier.
Predicted labels, as returned by a classifier. Sparse matrix is only
supported when labels are of :term:`multilabel` type.

normalize : bool, default=True
If ``False``, return the number of correctly classified samples.
Expand Down Expand Up @@ -623,11 +634,13 @@ def multilabel_confusion_matrix(
----------
y_true : {array-like, sparse matrix} of shape (n_samples, n_outputs) or \
(n_samples,)
Ground truth (correct) target values.
Ground truth (correct) target values. Sparse matrix is only supported when
labels are of :term:`multilabel` type.

y_pred : {array-like, sparse matrix} of shape (n_samples, n_outputs) or \
(n_samples,)
Estimated targets as returned by a classifier.
Estimated targets as returned by a classifier. Sparse matrix is only
supported when labels are of :term:`multilabel` type.

sample_weight : array-like of shape (n_samples,), default=None
Sample weights.
Expand Down Expand Up @@ -991,10 +1004,12 @@ def jaccard_score(
Parameters
----------
y_true : 1d array-like, or label indicator array / sparse matrix
Ground truth (correct) labels.
Ground truth (correct) labels. Sparse matrix is only supported when
labels are of :term:`multilabel` type.

y_pred : 1d array-like, or label indicator array / sparse matrix
Predicted labels, as returned by a classifier.
Predicted labels, as returned by a classifier. Sparse matrix is only
supported when labels are of :term:`multilabel` type.

labels : array-like of shape (n_classes,), default=None
The set of labels to include when `average != 'binary'`, and their
Expand Down Expand Up @@ -1262,10 +1277,12 @@ def zero_one_loss(y_true, y_pred, *, normalize=True, sample_weight=None):
Parameters
----------
y_true : 1d array-like, or label indicator array / sparse matrix
Ground truth (correct) labels.
Ground truth (correct) labels. Sparse matrix is only supported when
labels are of :term:`multilabel` type.

y_pred : 1d array-like, or label indicator array / sparse matrix
Predicted labels, as returned by a classifier.
Predicted labels, as returned by a classifier. Sparse matrix is only
supported when labels are of :term:`multilabel` type.

normalize : bool, default=True
If ``False``, return the number of misclassifications.
Expand Down Expand Up @@ -1386,10 +1403,12 @@ def f1_score(
Parameters
----------
y_true : 1d array-like, or label indicator array / sparse matrix
Ground truth (correct) target values.
Ground truth (correct) target values. Sparse matrix is only supported when
targets are of :term:`multilabel` type.

y_pred : 1d array-like, or label indicator array / sparse matrix
Estimated targets as returned by a classifier.
Estimated targets as returned by a classifier. Sparse matrix is only
supported when targets are of :term:`multilabel` type.

labels : array-like, default=None
The set of labels to include when `average != 'binary'`, and their
Expand Down Expand Up @@ -1586,10 +1605,12 @@ def fbeta_score(
Parameters
----------
y_true : 1d array-like, or label indicator array / sparse matrix
Ground truth (correct) target values.
Ground truth (correct) target values. Sparse matrix is only supported when
targets are of :term:`multilabel` type.

y_pred : 1d array-like, or label indicator array / sparse matrix
Estimated targets as returned by a classifier.
Estimated targets as returned by a classifier. Sparse matrix is only
supported when targets are of :term:`multilabel` type.

beta : float
Determines the weight of recall in the combined score.
Expand Down Expand Up @@ -1902,10 +1923,12 @@ def precision_recall_fscore_support(
Parameters
----------
y_true : 1d array-like, or label indicator array / sparse matrix
Ground truth (correct) target values.
Ground truth (correct) target values. Sparse matrix is only supported when
targets are of :term:`multilabel` type.

y_pred : 1d array-like, or label indicator array / sparse matrix
Estimated targets as returned by a classifier.
Estimated targets as returned by a classifier. Sparse matrix is only
supported when targets are of :term:`multilabel` type.

beta : float, default=1.0
The strength of recall versus precision in the F-score.
Expand Down Expand Up @@ -2176,10 +2199,12 @@ class after being classified as negative. This is the case when the
Parameters
----------
y_true : 1d array-like, or label indicator array / sparse matrix
Ground truth (correct) target values.
Ground truth (correct) target values. Sparse matrix is only supported when
targets are of :term:`multilabel` type.

y_pred : 1d array-like, or label indicator array / sparse matrix
Estimated targets as returned by a classifier.
Estimated targets as returned by a classifier. Sparse matrix is only
supported when targets are of :term:`multilabel` type.

labels : array-like, default=None
List of labels to index the matrix. This may be used to select the
Expand Down Expand Up @@ -2452,10 +2477,12 @@ def precision_score(
Parameters
----------
y_true : 1d array-like, or label indicator array / sparse matrix
Ground truth (correct) target values.
Ground truth (correct) target values. Sparse matrix is only supported when
targets are of :term:`multilabel` type.

y_pred : 1d array-like, or label indicator array / sparse matrix
Estimated targets as returned by a classifier.
Estimated targets as returned by a classifier. Sparse matrix is only
supported when targets are of :term:`multilabel` type.

labels : array-like, default=None
The set of labels to include when `average != 'binary'`, and their
Expand Down Expand Up @@ -2631,10 +2658,12 @@ def recall_score(
Parameters
----------
y_true : 1d array-like, or label indicator array / sparse matrix
Ground truth (correct) target values.
Ground truth (correct) target values. Sparse matrix is only supported when
targets are of :term:`multilabel` type.

y_pred : 1d array-like, or label indicator array / sparse matrix
Estimated targets as returned by a classifier.
Estimated targets as returned by a classifier. Sparse matrix is only
supported when targets are of :term:`multilabel` type.

labels : array-like, default=None
The set of labels to include when `average != 'binary'`, and their
Expand Down Expand Up @@ -2890,10 +2919,12 @@ def classification_report(
Parameters
----------
y_true : 1d array-like, or label indicator array / sparse matrix
Ground truth (correct) target values.
Ground truth (correct) target values. Sparse matrix is only supported when
targets are of :term:`multilabel` type.

y_pred : 1d array-like, or label indicator array / sparse matrix
Estimated targets as returned by a classifier.
Estimated targets as returned by a classifier. Sparse matrix is only
supported when targets are of :term:`multilabel` type.

labels : array-like of shape (n_labels,), default=None
Optional list of label indices to include in the report.
Expand Down Expand Up @@ -3116,10 +3147,12 @@ def hamming_loss(y_true, y_pred, *, sample_weight=None):
Parameters
----------
y_true : 1d array-like, or label indicator array / sparse matrix
Ground truth (correct) labels.
Ground truth (correct) labels. Sparse matrix is only supported when
targets are of :term:`multilabel` type.

y_pred : 1d array-like, or label indicator array / sparse matrix
Predicted labels, as returned by a classifier.
Predicted labels, as returned by a classifier. Sparse matrix is only
supported when targets are of :term:`multilabel` type.

sample_weight : array-like of shape (n_samples,), default=None
Sample weights.
Expand Down
26 changes: 25 additions & 1 deletion sklearn/metrics/tests/test_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

import numpy as np
import pytest
from scipy import linalg
from scipy import linalg, sparse
from scipy.spatial.distance import hamming as sp_hamming
from scipy.stats import bernoulli

Expand Down Expand Up @@ -2589,6 +2589,30 @@ def test__check_targets_multiclass_with_both_y_true_and_y_pred_binary():
assert _check_targets(y_true, y_pred)[0] == "multiclass"


@pytest.mark.parametrize(
"y, target_type",
[
(sparse.csr_matrix([[1], [0], [1], [0]]), "binary"),
(sparse.csr_matrix([[0], [1], [2], [1]]), "multiclass"),
(sparse.csr_matrix([[1, 0, 1], [0, 1, 0], [1, 1, 0]]), "multilabel"),
],
)
def test__check_targets_sparse_inputs(y, target_type):
"""Check correct behaviour when different target types are sparse."""
if target_type in ("binary", "multiclass"):
with pytest.raises(
TypeError, match="Sparse input is only supported when targets"
):
_check_targets(y, y)
else:
# This should not raise an error
y_type, y_true_out, y_pred_out, _ = _check_targets(y, y)

assert y_type == "multilabel-indicator"
assert y_true_out.format == "csr"
assert y_pred_out.format == "csr"


def test_hinge_loss_binary():
y_true = np.array([-1, 1, 1, -1])
pred_decision = np.array([-8.5, 0.5, 1.5, -0.3])
Expand Down
9 changes: 7 additions & 2 deletions sklearn/utils/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -1427,7 +1427,7 @@ def _check_y(y, multi_output=False, y_numeric=False, estimator=None):
return y


def column_or_1d(y, *, dtype=None, warn=False, device=None):
def column_or_1d(y, *, dtype=None, input_name="y", warn=False, device=None):
"""Ravel column or 1d numpy array, else raises an error.

Parameters
Expand All @@ -1440,6 +1440,11 @@ def column_or_1d(y, *, dtype=None, warn=False, device=None):

.. versionadded:: 1.2

input_name : str, default="y"
The data name used to construct the error message.

.. versionadded:: 1.8

warn : bool, default=False
To control display of warnings.

Expand Down Expand Up @@ -1470,7 +1475,7 @@ def column_or_1d(y, *, dtype=None, warn=False, device=None):
y,
ensure_2d=False,
dtype=dtype,
input_name="y",
input_name=input_name,
ensure_all_finite=False,
ensure_min_samples=0,
)
Expand Down
Loading