From 89139ea2f65bfbd42b5fd42a0667f99d883ea8e9 Mon Sep 17 00:00:00 2001
From: Omar Salman <omar.salman@arbisoft>
Date: Fri, 3 May 2024 10:52:26 +0500
Subject: [PATCH 1/6] DOC move d2_log_loss_score in the classification metrics
 section

---
 doc/modules/model_evaluation.rst | 92 ++++++++++++++++----------------
 1 file changed, 47 insertions(+), 45 deletions(-)

diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst
index 7caacd697ea1c..72340d35b1a71 100644
--- a/doc/modules/model_evaluation.rst
+++ b/doc/modules/model_evaluation.rst
@@ -77,6 +77,7 @@ Scoring                                Function
 'roc_auc_ovo'                          :func:`metrics.roc_auc_score`
 'roc_auc_ovr_weighted'                 :func:`metrics.roc_auc_score`
 'roc_auc_ovo_weighted'                 :func:`metrics.roc_auc_score`
+'d2_log_loss_score'                    :func:`d2_log_loss_score`
 
 **Clustering**
 'adjusted_mutual_info_score'           :func:`metrics.adjusted_mutual_info_score`
@@ -377,6 +378,7 @@ Some also work in the multilabel case:
    recall_score
    roc_auc_score
    zero_one_loss
+   d2_log_loss_score
 
 And some work with binary and multilabel (but not multiclass) problems:
 
@@ -1986,6 +1988,51 @@ see the example below.
 
 |details-end|
 
+.. _d2_log_loss_score:
+
+D2 log loss score
+-----------------------
+
+The :func:`d2_log_loss_score` function implements the special case
+of D² with the log loss, see :ref:`log_loss`, i.e.:
+
+.. math::
+
+  \text{dev}(y, \hat{y}) = \text{log_loss}(y, \hat{y}).
+
+The :math:`y_{\text{null}}` for the :func:`log_loss` is the per-class
+proportion.
+
+Here are some usage examples of the :func:`d2_log_loss_score` function::
+
+  >>> from sklearn.metrics import d2_log_loss_score
+  >>> y_true = [1, 1, 2, 3]
+  >>> y_pred = [
+  ...    [0.5, 0.25, 0.25],
+  ...    [0.5, 0.25, 0.25],
+  ...    [0.5, 0.25, 0.25],
+  ...    [0.5, 0.25, 0.25],
+  ... ]
+  >>> d2_log_loss_score(y_true, y_pred)
+  0.0
+  >>> y_true = [1, 2, 3]
+  >>> y_pred = [
+  ...     [0.98, 0.01, 0.01],
+  ...     [0.01, 0.98, 0.01],
+  ...     [0.01, 0.01, 0.98],
+  ... ]
+  >>> d2_log_loss_score(y_true, y_pred)
+  0.981...
+  >>> y_true = [1, 2, 3]
+  >>> y_pred = [
+  ...     [0.1, 0.6, 0.3],
+  ...     [0.1, 0.6, 0.3],
+  ...     [0.4, 0.5, 0.1],
+  ... ]
+  >>> d2_log_loss_score(y_true, y_pred)
+  -0.552...
+
+
 .. _multilabel_ranking_metrics:
 
 Multilabel ranking metrics
@@ -2826,51 +2873,6 @@ Here are some usage examples of the :func:`d2_absolute_error_score` function::
 
 |details-end|
 
-|details-start|
-**D² log loss score**
-|details-split|
-
-The :func:`d2_log_loss_score` function implements the special case
-of D² with the log loss, see :ref:`log_loss`, i.e.:
-
-.. math::
-
-  \text{dev}(y, \hat{y}) = \text{log_loss}(y, \hat{y}).
-
-The :math:`y_{\text{null}}` for the :func:`log_loss` is the per-class
-proportion.
-
-Here are some usage examples of the :func:`d2_log_loss_score` function::
-
-  >>> from sklearn.metrics import d2_log_loss_score
-  >>> y_true = [1, 1, 2, 3]
-  >>> y_pred = [
-  ...    [0.5, 0.25, 0.25],
-  ...    [0.5, 0.25, 0.25],
-  ...    [0.5, 0.25, 0.25],
-  ...    [0.5, 0.25, 0.25],
-  ... ]
-  >>> d2_log_loss_score(y_true, y_pred)
-  0.0
-  >>> y_true = [1, 2, 3]
-  >>> y_pred = [
-  ...     [0.98, 0.01, 0.01],
-  ...     [0.01, 0.98, 0.01],
-  ...     [0.01, 0.01, 0.98],
-  ... ]
-  >>> d2_log_loss_score(y_true, y_pred)
-  0.981...
-  >>> y_true = [1, 2, 3]
-  >>> y_pred = [
-  ...     [0.1, 0.6, 0.3],
-  ...     [0.1, 0.6, 0.3],
-  ...     [0.4, 0.5, 0.1],
-  ... ]
-  >>> d2_log_loss_score(y_true, y_pred)
-  -0.552...
-
-|details-end|
-
 .. _visualization_regression_evaluation:
 
 Visual evaluation of regression models

From 171e73331b64f5e07d19b5bf496ff6df90a10bdc Mon Sep 17 00:00:00 2001
From: Omar Salman <omar.salman@arbisoft>
Date: Fri, 3 May 2024 10:57:19 +0500
Subject: [PATCH 2/6] Minor refactor

---
 doc/modules/model_evaluation.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst
index 72340d35b1a71..555fe34b36571 100644
--- a/doc/modules/model_evaluation.rst
+++ b/doc/modules/model_evaluation.rst
@@ -1991,7 +1991,7 @@ see the example below.
 .. _d2_log_loss_score:
 
 D2 log loss score
------------------------
+-----------------
 
 The :func:`d2_log_loss_score` function implements the special case
 of D² with the log loss, see :ref:`log_loss`, i.e.:

From 03a5cb91df6a0543495aeb7d5ff561ba8fe3dcb7 Mon Sep 17 00:00:00 2001
From: Omar Salman <omar.salman@arbisoft>
Date: Fri, 3 May 2024 11:26:47 +0500
Subject: [PATCH 3/6] Fix

---
 doc/modules/model_evaluation.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst
index 555fe34b36571..f6362ee71c504 100644
--- a/doc/modules/model_evaluation.rst
+++ b/doc/modules/model_evaluation.rst
@@ -77,7 +77,7 @@ Scoring                                Function
 'roc_auc_ovo'                          :func:`metrics.roc_auc_score`
 'roc_auc_ovr_weighted'                 :func:`metrics.roc_auc_score`
 'roc_auc_ovo_weighted'                 :func:`metrics.roc_auc_score`
-'d2_log_loss_score'                    :func:`d2_log_loss_score`
+'d2_log_loss_score'                    :func:`metrics.d2_log_loss_score`
 
 **Clustering**
 'adjusted_mutual_info_score'           :func:`metrics.adjusted_mutual_info_score`

From 6910f935453e25e58a7cef5b9feaad384ff8b05a Mon Sep 17 00:00:00 2001
From: Omar Salman <omar.salman@arbisoft>
Date: Fri, 3 May 2024 11:53:33 +0500
Subject: [PATCH 4/6] Add d2_log_loss_score in classes.rst

---
 doc/modules/classes.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst
index 55336389f93d5..ff395dda54038 100644
--- a/doc/modules/classes.rst
+++ b/doc/modules/classes.rst
@@ -982,6 +982,7 @@ details.
    metrics.classification_report
    metrics.cohen_kappa_score
    metrics.confusion_matrix
+   metrics.d2_log_loss_score
    metrics.dcg_score
    metrics.det_curve
    metrics.f1_score

From 8e018d7d6f2bd992f73e96c7081c7ffc6da643af Mon Sep 17 00:00:00 2001
From: Omar Salman <omar.salman@arbisoft.com>
Date: Fri, 3 May 2024 15:19:51 +0500
Subject: [PATCH 5/6] Add D2 section for classification and add d2 log loss
 within that

---
 doc/modules/model_evaluation.rst | 32 ++++++++++++++++++++++++++------
 1 file changed, 26 insertions(+), 6 deletions(-)

diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst
index f6362ee71c504..d2e0203424c64 100644
--- a/doc/modules/model_evaluation.rst
+++ b/doc/modules/model_evaluation.rst
@@ -1988,10 +1988,32 @@ see the example below.
 
 |details-end|
 
-.. _d2_log_loss_score:
+.. _d2_score_classification:
 
-D2 log loss score
------------------
+D² score for classification
+---------------------------
+
+The D² score computes the fraction of deviance explained.
+It is a generalization of R², where the squared error is generalized and replaced
+by a classification deviance of choice :math:`\text{dev}(y, \hat{y})`
+(e.g., Log loss). D² is a form of a *skill score*.
+It is calculated as
+
+.. math::
+
+  D^2(y, \hat{y}) = 1 - \frac{\text{dev}(y, \hat{y})}{\text{dev}(y, y_{\text{null}})} \,.
+
+Where :math:`y_{\text{null}}` is the optimal prediction of an intercept-only model
+(e.g., the per-class proportion of `y_true` in the case of the Log loss).
+
+Like R², the best possible score is 1.0 and it can be negative (because the
+model can be arbitrarily worse). A constant model that always predicts
+:math:`y_{\text{null}}`, disregarding the input features, would get a D² score
+of 0.0.
+
+|details-start|
+**D2 log loss score**
+|details-split|
 
 The :func:`d2_log_loss_score` function implements the special case
 of D² with the log loss, see :ref:`log_loss`, i.e.:
@@ -2000,9 +2022,6 @@ of D² with the log loss, see :ref:`log_loss`, i.e.:
 
   \text{dev}(y, \hat{y}) = \text{log_loss}(y, \hat{y}).
 
-The :math:`y_{\text{null}}` for the :func:`log_loss` is the per-class
-proportion.
-
 Here are some usage examples of the :func:`d2_log_loss_score` function::
 
   >>> from sklearn.metrics import d2_log_loss_score
@@ -2032,6 +2051,7 @@ Here are some usage examples of the :func:`d2_log_loss_score` function::
   >>> d2_log_loss_score(y_true, y_pred)
   -0.552...
 
+|details-end|
 
 .. _multilabel_ranking_metrics:
 

From 41e4a8769e28918573e05013e71017c4c42ef4c6 Mon Sep 17 00:00:00 2001
From: Omar Salman <omar.salman@arbisoft.com>
Date: Fri, 3 May 2024 18:43:16 +0500
Subject: [PATCH 6/6] Update: Add d2_log_loss_score in test_public_functions

---
 sklearn/tests/test_public_functions.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sklearn/tests/test_public_functions.py b/sklearn/tests/test_public_functions.py
index 41629aa189941..707aa37737c1b 100644
--- a/sklearn/tests/test_public_functions.py
+++ b/sklearn/tests/test_public_functions.py
@@ -234,6 +234,7 @@ def _check_function_param_validation(
     "sklearn.metrics.consensus_score",
     "sklearn.metrics.coverage_error",
     "sklearn.metrics.d2_absolute_error_score",
+    "sklearn.metrics.d2_log_loss_score",
     "sklearn.metrics.d2_pinball_score",
     "sklearn.metrics.d2_tweedie_score",
     "sklearn.metrics.davies_bouldin_score",