scikit-learn · trinhcon · Feb 27, 2022 · Mar 2, 2022 · Mar 4, 2022 · Mar 4, 2022
diff --git a/doc/whats_new/v1.1.rst b/doc/whats_new/v1.1.rst
@@ -610,6 +610,11 @@ Changelog
 - |Fix| Fixed a bug in :func:`metrics.normalized_mutual_info_score` which could return
   unbounded values. :pr:`22635` by :user:`Jérémie du Boisberranger <jeremiedbb>`.
 
+- |Fix| :func:`metrics.ndcg_score` will now trigger a warning when the y_true
+  value contains a negative value. It will allow the user to still use negative
+  values, but the result may not be between 0 and 1.
+  :pr:`22710` by :user:`Conroy Trinh <trinhcon>`
- |Fix| :func:`metrics.ndcg_score` will now trigger a warning when the y_true
-  value contains a negative value. It will allow the user to still use negative
-  values, but the result may not be between 0 and 1.
-  :pr:`22710` by :user:`Conroy Trinh <trinhcon>`
+- |Fix| :func:`metrics.ndcg_score` will now trigger a warning when `y_true`
+  contains a negative value. The user may still use negative
+  values, but the result may not be between 0 and 1. Begins deprecation
+  of negative values in `y_true`.
+  :pr:`22710` by :user:`Conroy Trinh <trinhcon>`
- |Fix| :func:`metrics.ndcg_score` will now trigger a warning when the y_true
-  value contains a negative value. It will allow the user to still use negative
-  values, but the result may not be between 0 and 1.
-  :pr:`22710` by :user:`Conroy Trinh <trinhcon>`
+- |Fix| :func:`metrics.ndcg_score` will now trigger a warning when `y_true`
+  contains a negative value. The user may still use negative
+  values, but the result may not be between 0 and 1. Begins deprecation
+  of negative values in `y_true`.
+  :pr:`22710` by :user:`Conroy Trinh <trinhcon>`
+
 :mod:`sklearn.model_selection`
 ..............................
 

diff --git a/sklearn/metrics/_ranking.py b/sklearn/metrics/_ranking.py
@@ -1538,7 +1538,9 @@ def ndcg_score(y_true, y_score, *, k=None, sample_weight=None, ignore_ties=False
     ----------
     y_true : ndarray of shape (n_samples, n_labels)
         True targets of multilabel classification, or true scores of entities
-        to be ranked.
+        to be ranked. Negative values in y_true may result in an output
-        to be ranked. Negative values in y_true may result in an output
+        to be ranked. Negative values in `y_true` may result in an output
-        to be ranked. Negative values in y_true may result in an output
+        to be ranked. Negative values in `y_true` may result in an output
+        that is not between 0 and 1. These negative values are deprecated, and
+        may cause an error in the future.
 
     y_score : ndarray of shape (n_samples, n_labels)
         Target scores, can either be probability estimates, confidence values,
@@ -1616,11 +1618,26 @@ def ndcg_score(y_true, y_score, *, k=None, sample_weight=None, ignore_ties=False
     ...           scores, k=1, ignore_ties=True)
     0.5
     """
+
     y_true = check_array(y_true, ensure_2d=False)
     y_score = check_array(y_score, ensure_2d=False)
     check_consistent_length(y_true, y_score, sample_weight)
     _check_dcg_target_type(y_true)
     gain = _ndcg_sample_scores(y_true, y_score, k=k, ignore_ties=ignore_ties)
+
+    if (isinstance(y_true, np.ndarray)):
+        if (y_true.min() < 0):
+            warnings.warn(
+                "ndcg_score should not use negative y_true values",
+                DeprecationWarning,
+            )
+    else:
+        for value in y_true:
+            if (value < 0):
+                warnings.warn(
+                    "ndcg_score should not use negative y_true values",
+                    DeprecationWarning,
+                )
     return np.average(gain, weights=sample_weight)
 
 

diff --git a/sklearn/metrics/tests/test_ranking.py b/sklearn/metrics/tests/test_ranking.py
@@ -1650,6 +1650,23 @@ def test_ndcg_ignore_ties_with_k():
         ndcg_score(a, a, k=3, ignore_ties=True)
     )
 
+def test_ndcg_negative_ndarray_warn():
+    y_true  = np.array([-0.89, -0.53, -0.47, 0.39, 0.56]).reshape(1,-1)
+    y_score = np.array([0.07,0.31,0.75,0.33,0.27]).reshape(1,-1)
-    y_true  = np.array([-0.89, -0.53, -0.47, 0.39, 0.56]).reshape(1,-1)
-    y_score = np.array([0.07,0.31,0.75,0.33,0.27]).reshape(1,-1)
+    y_true  = np.array([[-0.89, -0.53, -0.47, 0.39, 0.56]])
+    y_score = np.array([[0.07,0.31,0.75,0.33,0.27]])
-    y_true  = np.array([-0.89, -0.53, -0.47, 0.39, 0.56]).reshape(1,-1)
-    y_score = np.array([0.07,0.31,0.75,0.33,0.27]).reshape(1,-1)
+    y_true  = np.array([[-0.89, -0.53, -0.47, 0.39, 0.56]])
+    y_score = np.array([[0.07,0.31,0.75,0.33,0.27]])
+    expected_message = "ndcg_score should not use negative y_true values"
+    with pytest.warns(DeprecationWarning, match=expected_message):
+        ndcg_score(y_true, y_score)
+
+def test_ndcg_negative_output():
+    y_true  = np.array([-0.89, -0.53, -0.47, 0.39, 0.56]).reshape(1,-1)
+    y_score = np.array([0.07,0.31,0.75,0.33,0.27]).reshape(1,-1)
-    y_true  = np.array([-0.89, -0.53, -0.47, 0.39, 0.56]).reshape(1,-1)
-    y_score = np.array([0.07,0.31,0.75,0.33,0.27]).reshape(1,-1)
+    y_true  = np.array([[-0.89, -0.53, -0.47, 0.39, 0.56]])
+    y_score = np.array([[0.07,0.31,0.75,0.33,0.27]])
-    y_true  = np.array([-0.89, -0.53, -0.47, 0.39, 0.56]).reshape(1,-1)
-    y_score = np.array([0.07,0.31,0.75,0.33,0.27]).reshape(1,-1)
+    y_true  = np.array([[-0.89, -0.53, -0.47, 0.39, 0.56]])
+    y_score = np.array([[0.07,0.31,0.75,0.33,0.27]])
+    assert ndcg_score(y_true, y_score) == pytest.approx(396.0329)
+
+def test_ndcg_positive_ndarray():
+    y_true  = np.array([0.11, 0.47, 0.53, 1.39, 1.56]).reshape(1,-1)
+    y_score = np.array([1.07, 1.31, 1.75, 1.33, 1.27]).reshape(1,-1)
-    y_true  = np.array([0.11, 0.47, 0.53, 1.39, 1.56]).reshape(1,-1)
-    y_score = np.array([1.07, 1.31, 1.75, 1.33, 1.27]).reshape(1,-1)
+    y_true  = np.array([[0.11, 0.47, 0.53, 1.39, 1.56]])
+    y_score = np.array([[1.07, 1.31, 1.75, 1.33, 1.27]])
-    y_true  = np.array([0.11, 0.47, 0.53, 1.39, 1.56]).reshape(1,-1)
-    y_score = np.array([1.07, 1.31, 1.75, 1.33, 1.27]).reshape(1,-1)
+    y_true  = np.array([[0.11, 0.47, 0.53, 1.39, 1.56]])
+    y_score = np.array([[1.07, 1.31, 1.75, 1.33, 1.27]])
+    with pytest.warns(None):
+        ndcg_score(y_true, y_score)
 
 def test_ndcg_invariant():
     y_true = np.arange(70).reshape(7, 10)