Thanks to visit codestin.com
Credit goes to github.com

Skip to content

calibration_loss calculator added #10971

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 33 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
d71c057
caliberation_los calculator added
aishgrt1 Apr 13, 2018
ce6ef9e
Update classification.py
aishgrt1 Apr 13, 2018
63c6f95
Update classification.py
aishgrt1 Apr 13, 2018
fcc7cf4
Update classification.py
aishgrt1 Apr 14, 2018
c9c4b64
Update classification.py
aishgrt1 Apr 14, 2018
1617026
Update classification.py
aishgrt1 Apr 14, 2018
df89090
added calibration loss
aishgrt1 Apr 14, 2018
d545bba
added calibration_loss
aishgrt1 Apr 14, 2018
54c4941
Update __init__.py
aishgrt1 Apr 14, 2018
3ddb56c
Update classification.py
aishgrt1 Apr 14, 2018
fea917e
Update classification.py
aishgrt1 Apr 14, 2018
138801f
Update classification.py
aishgrt1 Apr 14, 2018
170375c
Update classification.py
aishgrt1 Apr 14, 2018
fc2da9e
Update classification.py
aishgrt1 Apr 14, 2018
09e8613
Update classification.py
aishgrt1 Apr 14, 2018
afda36e
Update classification.py
aishgrt1 Apr 14, 2018
908779d
Update classification.py
aishgrt1 Apr 14, 2018
9005a00
Update classification.py
aishgrt1 Apr 14, 2018
500ca14
Update classification.py
aishgrt1 Apr 14, 2018
30779c7
Update classification.py
aishgrt1 Apr 14, 2018
0ab5bc0
calibration_loss test added
aishgrt1 May 7, 2018
aa9bd3b
Update test_classification.py
aishgrt1 May 7, 2018
f863d3a
Update test_classification.py
aishgrt1 May 7, 2018
519c546
Update test_classification.py
aishgrt1 May 7, 2018
dc267e2
Update test_classification.py
aishgrt1 May 8, 2018
9de655f
Update classification.py
aishgrt1 May 8, 2018
04090b6
Update classification.py
aishgrt1 May 8, 2018
1484806
Update classification.py
aishgrt1 May 25, 2018
624a197
Update classification.py
aishgrt1 Sep 27, 2018
c02e66b
Update classification.py
aishgrt1 Sep 27, 2018
fe9e2d5
Update classification.py
aishgrt1 Oct 26, 2018
f3196d9
Update test_classification.py
aishgrt1 Oct 26, 2018
fb2bf49
Update test_classification.py
aishgrt1 Oct 29, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions sklearn/metrics/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
from .classification import recall_score
from .classification import zero_one_loss
from .classification import brier_score_loss
from .classification import calibration_loss

from . import cluster
from .cluster import adjusted_mutual_info_score
Expand Down Expand Up @@ -120,4 +121,5 @@
'v_measure_score',
'zero_one_loss',
'brier_score_loss',
'calibration_loss',
]
62 changes: 62 additions & 0 deletions sklearn/metrics/classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
from ..utils.validation import _num_samples
from ..utils.sparsefuncs import count_nonzero
from ..exceptions import UndefinedMetricWarning
from __future__ import division


def _check_targets(y_true, y_pred):
Expand Down Expand Up @@ -1993,3 +1994,64 @@ def brier_score_loss(y_true, y_prob, sample_weight=None, pos_label=None):
y_true = np.array(y_true == pos_label, int)
y_true = _check_binary_probabilistic_predictions(y_true, y_prob)
return np.average((y_true - y_prob) ** 2, weights=sample_weight)


def calibration_loss(y_true, y_prob, bin_size=2):

"""Compute Calibration score by bins.
The calibration loss is defined as the measure to access the quality of
learning methods and learned models. A calibration measure based on
overlaping binning is CAL (Caruana and Niculescu-Mizil, 2004).

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"read more in the User Guide"

Parameters
----------
y_true : array, shape (n_samples,)
True targets.

y_prob : array, shape (n_samples,)
Probabilities of the positive class.

bin_size : int
Size of the bin (samples) analysed in one iteration

Returns
-------
score : float
Calibration loss

Examples
--------
>>> import numpy as np
>>> from sklearn.metrics import calibration_loss
>>> y_true = np.array([0, 1, 1, 0])
>>> y_true_categorical = np.array(["spam", "ham", "ham", "spam"])
>>> y_prob = np.array([0.1, 0.9, 0.8, 0.3])
>>> calibration_loss(y_true, y_prob, bin_size=1)
0.175
>>> calibration_loss(y_true, y_prob, bin_size=2)
0.5333333333333333
"""
pos_loss = 0.0
neg_loss = 0.0

for bin_start in range(0, len(y_true) - bin_size + 1):

bin_end = bin_start + bin_size
actual_per_pos_class = (y_true[bin_start:bin_end]
.sum()) / bin_size
bin_error_pos = abs(y_prob[bin_start:bin_end]
- actual_per_pos_class).sum()
pos_loss += bin_error_pos

actual_per_neg_class = (bin_size - y_true[bin_start:bin_end]
.sum()) / bin_size
bin_error_neg = abs((1-y_prob[bin_start:bin_end])
- actual_per_neg_class).sum()
neg_loss += bin_error_neg

pos_loss /= (len(y_true) - bin_size + 1)
neg_loss /= (len(y_true) - bin_size + 1)
loss = (0.5) * (pos_loss + neg_loss)

return loss

34 changes: 34 additions & 0 deletions sklearn/metrics/tests/test_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
from sklearn.utils.mocking import MockDataFrame

from sklearn.metrics import accuracy_score
from sklearn.metrics import calibration_loss
from sklearn.metrics import average_precision_score
from sklearn.metrics import classification_report
from sklearn.metrics import cohen_kappa_score
Expand Down Expand Up @@ -1635,3 +1636,36 @@ def test_brier_score_loss():
# calculate even if only single class in y_true (#6980)
assert_almost_equal(brier_score_loss([0], [0.5]), 0.25)
assert_almost_equal(brier_score_loss([1], [0.5]), 0.25)


def test_calibration_loss():
# Check calibration_loss function
y_true = np.array([0, 1, 1, 0, 1, 1])
y_pred = np.array([0.1, 0.8, 0.9, 0.3, 1.0, 0.95])
calibration_loss_val = calibration_loss(y_true, y_pred, bin_size=2)
assert_almost_equal(calibration_loss_val, 0.46999, decimal=4)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Where is this example from? Either cite a reference, or show how you calculated it


def test_balanced_accuracy_score_unseen():
assert_warns_message(UserWarning, 'y_pred contains classes not in y_true',
balanced_accuracy_score, [0, 0, 0], [0, 0, 1])


@pytest.mark.parametrize('y_true,y_pred',
[
(['a', 'b', 'a', 'b'], ['a', 'a', 'a', 'b']),
(['a', 'b', 'c', 'b'], ['a', 'a', 'a', 'b']),
(['a', 'a', 'a', 'b'], ['a', 'b', 'c', 'b']),
])
def test_balanced_accuracy_score(y_true, y_pred):
macro_recall = recall_score(y_true, y_pred, average='macro',
labels=np.unique(y_true))
with ignore_warnings():
# Warnings are tested in test_balanced_accuracy_score_unseen
balanced = balanced_accuracy_score(y_true, y_pred)
assert balanced == pytest.approx(macro_recall)
adjusted = balanced_accuracy_score(y_true, y_pred, adjusted=True)
chance = balanced_accuracy_score(y_true, np.full_like(y_true, y_true[0]))
assert adjusted == (balanced - chance) / (1 - chance)
def test_balanced_accuracy_score_unseen():
assert_warns_message(UserWarning, 'y_pred contains classes not in y_true',
balanced_accuracy_score, [0, 0, 0], [0, 0, 1])