diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst index 55336389f93d5..ff395dda54038 100644 --- a/doc/modules/classes.rst +++ b/doc/modules/classes.rst @@ -982,6 +982,7 @@ details. metrics.classification_report metrics.cohen_kappa_score metrics.confusion_matrix + metrics.d2_log_loss_score metrics.dcg_score metrics.det_curve metrics.f1_score diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst index 7caacd697ea1c..d2e0203424c64 100644 --- a/doc/modules/model_evaluation.rst +++ b/doc/modules/model_evaluation.rst @@ -77,6 +77,7 @@ Scoring Function 'roc_auc_ovo' :func:`metrics.roc_auc_score` 'roc_auc_ovr_weighted' :func:`metrics.roc_auc_score` 'roc_auc_ovo_weighted' :func:`metrics.roc_auc_score` +'d2_log_loss_score' :func:`metrics.d2_log_loss_score` **Clustering** 'adjusted_mutual_info_score' :func:`metrics.adjusted_mutual_info_score` @@ -377,6 +378,7 @@ Some also work in the multilabel case: recall_score roc_auc_score zero_one_loss + d2_log_loss_score And some work with binary and multilabel (but not multiclass) problems: @@ -1986,6 +1988,71 @@ see the example below. |details-end| +.. _d2_score_classification: + +D² score for classification +--------------------------- + +The D² score computes the fraction of deviance explained. +It is a generalization of R², where the squared error is generalized and replaced +by a classification deviance of choice :math:`\text{dev}(y, \hat{y})` +(e.g., Log loss). D² is a form of a *skill score*. +It is calculated as + +.. math:: + + D^2(y, \hat{y}) = 1 - \frac{\text{dev}(y, \hat{y})}{\text{dev}(y, y_{\text{null}})} \,. + +Where :math:`y_{\text{null}}` is the optimal prediction of an intercept-only model +(e.g., the per-class proportion of `y_true` in the case of the Log loss). + +Like R², the best possible score is 1.0 and it can be negative (because the +model can be arbitrarily worse). A constant model that always predicts +:math:`y_{\text{null}}`, disregarding the input features, would get a D² score +of 0.0. + +|details-start| +**D2 log loss score** +|details-split| + +The :func:`d2_log_loss_score` function implements the special case +of D² with the log loss, see :ref:`log_loss`, i.e.: + +.. math:: + + \text{dev}(y, \hat{y}) = \text{log_loss}(y, \hat{y}). + +Here are some usage examples of the :func:`d2_log_loss_score` function:: + + >>> from sklearn.metrics import d2_log_loss_score + >>> y_true = [1, 1, 2, 3] + >>> y_pred = [ + ... [0.5, 0.25, 0.25], + ... [0.5, 0.25, 0.25], + ... [0.5, 0.25, 0.25], + ... [0.5, 0.25, 0.25], + ... ] + >>> d2_log_loss_score(y_true, y_pred) + 0.0 + >>> y_true = [1, 2, 3] + >>> y_pred = [ + ... [0.98, 0.01, 0.01], + ... [0.01, 0.98, 0.01], + ... [0.01, 0.01, 0.98], + ... ] + >>> d2_log_loss_score(y_true, y_pred) + 0.981... + >>> y_true = [1, 2, 3] + >>> y_pred = [ + ... [0.1, 0.6, 0.3], + ... [0.1, 0.6, 0.3], + ... [0.4, 0.5, 0.1], + ... ] + >>> d2_log_loss_score(y_true, y_pred) + -0.552... + +|details-end| + .. _multilabel_ranking_metrics: Multilabel ranking metrics @@ -2826,51 +2893,6 @@ Here are some usage examples of the :func:`d2_absolute_error_score` function:: |details-end| -|details-start| -**D² log loss score** -|details-split| - -The :func:`d2_log_loss_score` function implements the special case -of D² with the log loss, see :ref:`log_loss`, i.e.: - -.. math:: - - \text{dev}(y, \hat{y}) = \text{log_loss}(y, \hat{y}). - -The :math:`y_{\text{null}}` for the :func:`log_loss` is the per-class -proportion. - -Here are some usage examples of the :func:`d2_log_loss_score` function:: - - >>> from sklearn.metrics import d2_log_loss_score - >>> y_true = [1, 1, 2, 3] - >>> y_pred = [ - ... [0.5, 0.25, 0.25], - ... [0.5, 0.25, 0.25], - ... [0.5, 0.25, 0.25], - ... [0.5, 0.25, 0.25], - ... ] - >>> d2_log_loss_score(y_true, y_pred) - 0.0 - >>> y_true = [1, 2, 3] - >>> y_pred = [ - ... [0.98, 0.01, 0.01], - ... [0.01, 0.98, 0.01], - ... [0.01, 0.01, 0.98], - ... ] - >>> d2_log_loss_score(y_true, y_pred) - 0.981... - >>> y_true = [1, 2, 3] - >>> y_pred = [ - ... [0.1, 0.6, 0.3], - ... [0.1, 0.6, 0.3], - ... [0.4, 0.5, 0.1], - ... ] - >>> d2_log_loss_score(y_true, y_pred) - -0.552... - -|details-end| - .. _visualization_regression_evaluation: Visual evaluation of regression models diff --git a/sklearn/tests/test_public_functions.py b/sklearn/tests/test_public_functions.py index 41629aa189941..707aa37737c1b 100644 --- a/sklearn/tests/test_public_functions.py +++ b/sklearn/tests/test_public_functions.py @@ -234,6 +234,7 @@ def _check_function_param_validation( "sklearn.metrics.consensus_score", "sklearn.metrics.coverage_error", "sklearn.metrics.d2_absolute_error_score", + "sklearn.metrics.d2_log_loss_score", "sklearn.metrics.d2_pinball_score", "sklearn.metrics.d2_tweedie_score", "sklearn.metrics.davies_bouldin_score",