diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py index 04894a4d7a7e7..b68f1593e317e 100644 --- a/sklearn/metrics/_classification.py +++ b/sklearn/metrics/_classification.py @@ -3277,10 +3277,10 @@ def d2_log_loss_score(y_true, y_pred, *, sample_weight=None, labels=None): :math:`D^2` score function, fraction of log loss explained. Best possible score is 1.0 and it can be negative (because the model can be - arbitrarily worse). A model that always uses the empirical mean of `y_true` as - constant prediction, disregarding the input features, gets a D^2 score of 0.0. + arbitrarily worse). A model that always predicts the per-class proportions + of `y_true`, disregarding the input features, gets a D^2 score of 0.0. - Read more in the :ref:`User Guide `. + Read more in the :ref:`User Guide `. .. versionadded:: 1.5 diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index 40b762bfa7308..b87e76ba2fb42 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -3048,7 +3048,8 @@ def test_d2_log_loss_score(): def test_d2_log_loss_score_raises(): - """Test that d2_log_loss raises error on invalid input.""" + """Test that d2_log_loss_score raises the appropriate errors on + invalid inputs.""" y_true = [0, 1, 2] y_pred = [[0.2, 0.8], [0.5, 0.5], [0.4, 0.6]] err = "contain different number of classes"