From d6eff7974ade038fc5e918f66645570232e338be Mon Sep 17 00:00:00 2001 From: Sean Atukorala Date: Sun, 22 May 2022 12:08:45 -0400 Subject: [PATCH 01/11] reverted sklearn/metrics/_ranking.py to original content --- sklearn/metrics/_ranking.py | 5 +++-- sklearn/metrics/tests/test_ranking.py | 7 +++++++ sklearn/utils/validation.py | 4 ++++ 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/sklearn/metrics/_ranking.py b/sklearn/metrics/_ranking.py index 4e88bd5edc888..0d201bf99bc10 100644 --- a/sklearn/metrics/_ranking.py +++ b/sklearn/metrics/_ranking.py @@ -55,14 +55,15 @@ def auc(x, y): Parameters ---------- x : ndarray of shape (n,) - x coordinates. These must be either monotonic increasing or monotonic + X coordinates. These must be either monotonic increasing or monotonic decreasing. y : ndarray of shape, (n,) - y coordinates. + Y coordinates. Returns ------- auc : float + Area Under the Curve. See Also -------- diff --git a/sklearn/metrics/tests/test_ranking.py b/sklearn/metrics/tests/test_ranking.py index 7d2338337b83d..c2c8adbe39015 100644 --- a/sklearn/metrics/tests/test_ranking.py +++ b/sklearn/metrics/tests/test_ranking.py @@ -2015,3 +2015,10 @@ def test_top_k_accuracy_score_warning(y_true, k): def test_top_k_accuracy_score_error(y_true, y_score, labels, msg): with pytest.raises(ValueError, match=msg): top_k_accuracy_score(y_true, y_score, k=2, labels=labels) + + +def test_label_ranking_avg_precision_score_should_allow_csr_matrix_for_y_true_input(): + y_true = csr_matrix(np.array([[1, 0, 0], [0, 0, 1]])) + y_score = np.array([[0.5, 0.9, 0.6], [0, 0, 1]]) + result = label_ranking_average_precision_score(y_true, y_score) + assert result == pytest.approx(0.6666666666666666) diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py index 841cc0583cb7c..432210e262c2f 100644 --- a/sklearn/utils/validation.py +++ b/sklearn/utils/validation.py @@ -811,6 +811,10 @@ def check_array( if sp.issparse(array): _ensure_no_complex_data(array) + + if accept_sparse is False: + accept_sparse = ["csr", "csc"] + array = _ensure_sparse_format( array, accept_sparse=accept_sparse, From fb5cb583d6279c3a91514b5243f39cfdb3a1e8f7 Mon Sep 17 00:00:00 2001 From: Sean Atukorala Date: Mon, 23 May 2022 11:20:08 -0400 Subject: [PATCH 02/11] made changes to label_ranking_average_precision_score metric by adding 'if not issparse(y_true)' line --- sklearn/metrics/_ranking.py | 16 +++++++++------- sklearn/utils/validation.py | 4 ++-- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/sklearn/metrics/_ranking.py b/sklearn/metrics/_ranking.py index 0d201bf99bc10..4f09624923eb2 100644 --- a/sklearn/metrics/_ranking.py +++ b/sklearn/metrics/_ranking.py @@ -23,7 +23,7 @@ from functools import partial import numpy as np -from scipy.sparse import csr_matrix +from scipy.sparse import csr_matrix, issparse from scipy.stats import rankdata from ..utils import assert_all_finite @@ -37,6 +37,7 @@ from ..preprocessing import label_binarize from ..utils._encode import _encode, _unique + from ._base import ( _average_binary_score, _average_multiclass_ovo_score, @@ -1070,18 +1071,19 @@ def label_ranking_average_precision_score(y_true, y_score, *, sample_weight=None """ check_consistent_length(y_true, y_score, sample_weight) - y_true = check_array(y_true, ensure_2d=False) + y_true = check_array(y_true, ensure_2d=False, accept_sparse="csr") y_score = check_array(y_score, ensure_2d=False) if y_true.shape != y_score.shape: raise ValueError("y_true and y_score have different shape") # Handle badly formatted array and the degenerate case with one label - y_type = type_of_target(y_true, input_name="y_true") - if y_type != "multilabel-indicator" and not ( - y_type == "binary" and y_true.ndim == 2 - ): - raise ValueError("{0} format is not supported".format(y_type)) + if not issparse(y_true): + y_type = type_of_target(y_true, input_name="y_true") + if y_type != "multilabel-indicator" and not ( + y_type == "binary" and y_true.ndim == 2 + ): + raise ValueError("{0} format is not supported".format(y_type)) y_true = csr_matrix(y_true) y_score = -y_score diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py index 432210e262c2f..5cf30083e62f4 100644 --- a/sklearn/utils/validation.py +++ b/sklearn/utils/validation.py @@ -812,8 +812,8 @@ def check_array( if sp.issparse(array): _ensure_no_complex_data(array) - if accept_sparse is False: - accept_sparse = ["csr", "csc"] + # if accept_sparse is False: + # accept_sparse = ["csr", "csc"] array = _ensure_sparse_format( array, From aa67c760a71a27393d0273b09414658330e3a6ef Mon Sep 17 00:00:00 2001 From: Sean Atukorala Date: Mon, 23 May 2022 11:31:42 -0400 Subject: [PATCH 03/11] removed redundant comment from scikit-learn/sklearn/utils/validation.py ln ~815 --- sklearn/utils/validation.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py index 5cf30083e62f4..b7372f2399a70 100644 --- a/sklearn/utils/validation.py +++ b/sklearn/utils/validation.py @@ -812,9 +812,6 @@ def check_array( if sp.issparse(array): _ensure_no_complex_data(array) - # if accept_sparse is False: - # accept_sparse = ["csr", "csc"] - array = _ensure_sparse_format( array, accept_sparse=accept_sparse, From 6de41d4f88264896fe5b923bbf1a8f6d4eb33ec4 Mon Sep 17 00:00:00 2001 From: Sean Atukorala Date: Tue, 24 May 2022 11:53:45 -0400 Subject: [PATCH 04/11] added docstring to test in sklearn/metrics/tests/test_ranking.py file; reverted unwanted diffs --- sklearn/metrics/tests/test_ranking.py | 6 +++++- sklearn/utils/validation.py | 1 - 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/sklearn/metrics/tests/test_ranking.py b/sklearn/metrics/tests/test_ranking.py index c2c8adbe39015..02e50f8dff4e6 100644 --- a/sklearn/metrics/tests/test_ranking.py +++ b/sklearn/metrics/tests/test_ranking.py @@ -2018,7 +2018,11 @@ def test_top_k_accuracy_score_error(y_true, y_score, labels, msg): def test_label_ranking_avg_precision_score_should_allow_csr_matrix_for_y_true_input(): + # """ + # Test that label_ranking_avg_precision_score accept sparse y_true. + # Non-regression test for #22575 + # """ y_true = csr_matrix(np.array([[1, 0, 0], [0, 0, 1]])) y_score = np.array([[0.5, 0.9, 0.6], [0, 0, 1]]) result = label_ranking_average_precision_score(y_true, y_score) - assert result == pytest.approx(0.6666666666666666) + assert result == pytest.approx(2 / 3) diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py index b7372f2399a70..841cc0583cb7c 100644 --- a/sklearn/utils/validation.py +++ b/sklearn/utils/validation.py @@ -811,7 +811,6 @@ def check_array( if sp.issparse(array): _ensure_no_complex_data(array) - array = _ensure_sparse_format( array, accept_sparse=accept_sparse, From dd977f23f753ab6f2c5f26cbabc91d472ec7f002 Mon Sep 17 00:00:00 2001 From: Sean Atukorala Date: Tue, 24 May 2022 11:58:50 -0400 Subject: [PATCH 05/11] added docstring to test in sklearn/metrics/tests/test_ranking.py file; reverted unwanted diffs --- sklearn/metrics/tests/test_ranking.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/sklearn/metrics/tests/test_ranking.py b/sklearn/metrics/tests/test_ranking.py index 02e50f8dff4e6..f6b141f6ecaf5 100644 --- a/sklearn/metrics/tests/test_ranking.py +++ b/sklearn/metrics/tests/test_ranking.py @@ -2018,10 +2018,8 @@ def test_top_k_accuracy_score_error(y_true, y_score, labels, msg): def test_label_ranking_avg_precision_score_should_allow_csr_matrix_for_y_true_input(): - # """ # Test that label_ranking_avg_precision_score accept sparse y_true. # Non-regression test for #22575 - # """ y_true = csr_matrix(np.array([[1, 0, 0], [0, 0, 1]])) y_score = np.array([[0.5, 0.9, 0.6], [0, 0, 1]]) result = label_ranking_average_precision_score(y_true, y_score) From 4313e683b27803a60351b2737a48caa753ed212b Mon Sep 17 00:00:00 2001 From: Sean Atukorala Date: Tue, 24 May 2022 15:01:36 -0400 Subject: [PATCH 06/11] added entry to doc/whats_new/v1.2.rst, removed redundant diffs --- doc/whats_new/v1.2.rst | 3 +++ sklearn/metrics/_ranking.py | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/doc/whats_new/v1.2.rst b/doc/whats_new/v1.2.rst index 8e5851cca632f..7111d0377cfa9 100644 --- a/doc/whats_new/v1.2.rst +++ b/doc/whats_new/v1.2.rst @@ -71,6 +71,9 @@ Changelog negative likelihood ratios derived from the confusion matrix of a binary classification problem. :pr:`22518` by :user:`Arturo Amor `. +- |Fix| Allows `csr_matrix` as input for parameter: `y_true` of the `label_ranking_average_precision_score` metric. + Also added default parameter of `accept_sparse="csr"` to `check_array()`, which is used to calcuate the `y_true` value. + :pr:`23442` by :user:`Sean Atukorala ` :mod:`sklearn.neighbors` ........................ diff --git a/sklearn/metrics/_ranking.py b/sklearn/metrics/_ranking.py index 4f09624923eb2..6af5b45f77e89 100644 --- a/sklearn/metrics/_ranking.py +++ b/sklearn/metrics/_ranking.py @@ -37,7 +37,6 @@ from ..preprocessing import label_binarize from ..utils._encode import _encode, _unique - from ._base import ( _average_binary_score, _average_multiclass_ovo_score, @@ -1085,7 +1084,8 @@ def label_ranking_average_precision_score(y_true, y_score, *, sample_weight=None ): raise ValueError("{0} format is not supported".format(y_type)) - y_true = csr_matrix(y_true) + y_true = csr_matrix(y_true) + y_score = -y_score n_samples, n_labels = y_true.shape From 799aa1cf3e98c021e4344c2f044341f05583ce31 Mon Sep 17 00:00:00 2001 From: Sean Atukorala Date: Tue, 24 May 2022 15:06:49 -0400 Subject: [PATCH 07/11] editted entry to doc/whats_new/v1.2.rst --- doc/whats_new/v1.2.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/whats_new/v1.2.rst b/doc/whats_new/v1.2.rst index 7111d0377cfa9..85b93f1e7d711 100644 --- a/doc/whats_new/v1.2.rst +++ b/doc/whats_new/v1.2.rst @@ -71,8 +71,8 @@ Changelog negative likelihood ratios derived from the confusion matrix of a binary classification problem. :pr:`22518` by :user:`Arturo Amor `. -- |Fix| Allows `csr_matrix` as input for parameter: `y_true` of the `label_ranking_average_precision_score` metric. - Also added default parameter of `accept_sparse="csr"` to `check_array()`, which is used to calcuate the `y_true` value. +- |Fix| Allows `csr_matrix` as input for parameter: `y_true` of the :func:`label_ranking_average_precision_score` metric. + Also added default parameter of `accept_sparse="csr"` to :func:`check_array()`, which is used to calcuate the `y_true` value. :pr:`23442` by :user:`Sean Atukorala ` :mod:`sklearn.neighbors` From 9578f3889fcd24b2c3a7c0aa4b6d6106fbf8e02d Mon Sep 17 00:00:00 2001 From: Sean Atukorala Date: Tue, 24 May 2022 16:03:34 -0400 Subject: [PATCH 08/11] Update sklearn/metrics/tests/test_ranking.py Co-authored-by: Thomas J. Fan --- sklearn/metrics/tests/test_ranking.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/metrics/tests/test_ranking.py b/sklearn/metrics/tests/test_ranking.py index f6b141f6ecaf5..f73648e8c8707 100644 --- a/sklearn/metrics/tests/test_ranking.py +++ b/sklearn/metrics/tests/test_ranking.py @@ -2020,7 +2020,7 @@ def test_top_k_accuracy_score_error(y_true, y_score, labels, msg): def test_label_ranking_avg_precision_score_should_allow_csr_matrix_for_y_true_input(): # Test that label_ranking_avg_precision_score accept sparse y_true. # Non-regression test for #22575 - y_true = csr_matrix(np.array([[1, 0, 0], [0, 0, 1]])) + y_true = csr_matrix([[1, 0, 0], [0, 0, 1]]) y_score = np.array([[0.5, 0.9, 0.6], [0, 0, 1]]) result = label_ranking_average_precision_score(y_true, y_score) assert result == pytest.approx(2 / 3) From a29b92da1b1501b27ad147997b24dda28ea5b792 Mon Sep 17 00:00:00 2001 From: Sean Atukorala Date: Tue, 24 May 2022 16:03:46 -0400 Subject: [PATCH 09/11] Update doc/whats_new/v1.2.rst Co-authored-by: Thomas J. Fan --- doc/whats_new/v1.2.rst | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/doc/whats_new/v1.2.rst b/doc/whats_new/v1.2.rst index 85b93f1e7d711..0a14f00184985 100644 --- a/doc/whats_new/v1.2.rst +++ b/doc/whats_new/v1.2.rst @@ -71,8 +71,10 @@ Changelog negative likelihood ratios derived from the confusion matrix of a binary classification problem. :pr:`22518` by :user:`Arturo Amor `. -- |Fix| Allows `csr_matrix` as input for parameter: `y_true` of the :func:`label_ranking_average_precision_score` metric. - Also added default parameter of `accept_sparse="csr"` to :func:`check_array()`, which is used to calcuate the `y_true` value. + +- |Fix| Allows `csr_matrix` as input for parameter: `y_true` of + the :func:`metrics.label_ranking_average_precision_score` metric. + :pr:`23442` by :user:`Sean Atukorala ` :pr:`23442` by :user:`Sean Atukorala ` :mod:`sklearn.neighbors` From 2a45e24cad6496ab9faf20b7c7c97d3465a00914 Mon Sep 17 00:00:00 2001 From: Sean Atukorala Date: Fri, 19 Aug 2022 17:08:23 -0400 Subject: [PATCH 10/11] Update doc/whats_new/v1.2.rst Co-authored-by: Chiara Marmo --- doc/whats_new/v1.2.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/whats_new/v1.2.rst b/doc/whats_new/v1.2.rst index b557bc0f4abfe..8ec08af05dcb1 100644 --- a/doc/whats_new/v1.2.rst +++ b/doc/whats_new/v1.2.rst @@ -235,7 +235,6 @@ Changelog - |Fix| Allows `csr_matrix` as input for parameter: `y_true` of the :func:`metrics.label_ranking_average_precision_score` metric. :pr:`23442` by :user:`Sean Atukorala ` - :pr:`23442` by :user:`Sean Atukorala ` - |Fix| :func:`metrics.ndcg_score` will now trigger a warning when the `y_true` value contains a negative value. Users may still use negative values, but the From 2b56a6d237fbdd8a0bbc0d5c735f006557042270 Mon Sep 17 00:00:00 2001 From: Sean Atukorala Date: Sat, 20 Aug 2022 16:24:19 -0400 Subject: [PATCH 11/11] Update sklearn/metrics/_ranking.py Co-authored-by: Christian Lorentzen --- sklearn/metrics/_ranking.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sklearn/metrics/_ranking.py b/sklearn/metrics/_ranking.py index b7ef0b8475170..4976e220f7be4 100644 --- a/sklearn/metrics/_ranking.py +++ b/sklearn/metrics/_ranking.py @@ -1078,13 +1078,13 @@ def label_ranking_average_precision_score(y_true, y_score, *, sample_weight=None raise ValueError("y_true and y_score have different shape") # Handle badly formatted array and the degenerate case with one label - if not issparse(y_true): - y_type = type_of_target(y_true, input_name="y_true") - if y_type != "multilabel-indicator" and not ( - y_type == "binary" and y_true.ndim == 2 - ): - raise ValueError("{0} format is not supported".format(y_type)) + y_type = type_of_target(y_true, input_name="y_true") + if y_type != "multilabel-indicator" and not ( + y_type == "binary" and y_true.ndim == 2 + ): + raise ValueError("{0} format is not supported".format(y_type)) + if not issparse(y_true): y_true = csr_matrix(y_true) y_score = -y_score