From d71c057f957be8fe36776194581e1dd694f4ea17 Mon Sep 17 00:00:00 2001 From: Aishwarya Date: Fri, 13 Apr 2018 03:15:26 -0400 Subject: [PATCH 01/33] caliberation_los calculator added --- sklearn/metrics/classification.py | 72 +++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index e0055e3476f04..e4996735e39e3 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -1993,3 +1993,75 @@ def brier_score_loss(y_true, y_prob, sample_weight=None, pos_label=None): y_true = np.array(y_true == pos_label, int) y_true = _check_binary_probabilistic_predictions(y_true, y_prob) return np.average((y_true - y_prob) ** 2, weights=sample_weight) + + +# Caliberation by bin- two class + +def calibration_loss(y_true,y_prob, bin_size=2): + """Compute Calibration score by bins. + The calibration loss is defined as the measure to access the quality of learning methods and learned models. + A calibration measure based on overlaping binning is CAL (Caruana and Niculescu-Mizil, 2004). + + Parameters + ---------- + y_true : array, shape (n_samples,) + True targets. + + y_prob : array, shape (n_samples,) + Probabilities of the positive class. + + bin_size : int + Size of the bin (samples) analysed in one iteration + + + Returns + ------- + score : float + Calibration loss + + Examples + -------- + >>> import numpy as np + >>> from sklearn.metrics import calibration_loss + >>> y_true = np.array([0, 1, 1, 0]) + >>> y_true_categorical = np.array(["spam", "ham", "ham", "spam"]) + >>> y_prob = np.array([0.1, 0.9, 0.8, 0.3]) + >>> calibration_loss(y_true, y_prob, bin_size=1) + 0.174.. + >>> calibration_loss(y_true, y_prob, bin_size=2) + 0.53... + >>> calibration_loss(y_true, y_prob, bin_size=3) + 0.833... + + """ + + loss = 0.0 + pos_loss=0.0 + neg_loss=0.0 + + for bin_start in range(0,len(y_true)-bin_size + 1): + + + bin_end= bin_start + bin_size + + actual_per_pos_class= (y_true[bin_start:bin_end].sum())/float(bin_size) + print(actual_per_pos_class) + bin_error_pos = abs(y_prob[bin_start:bin_end]-actual_per_pos_class).sum() + pos_loss += bin_error_pos + + + actual_per_neg_class= (bin_size - y_true[bin_start:bin_end].sum())/float(bin_size) + print(actual_per_neg_class) + bin_error_neg = abs((1-y_prob)[bin_start:bin_end]-actual_per_neg_class).sum() + neg_loss += bin_error_neg + + + pos_loss /= (len(y_true)-bin_size+1) + neg_loss /= (len(y_true)-bin_size+1) + loss = (0.5)*(pos_loss+neg_loss) + + + return loss + + + From ce6ef9e720797354f903360081d9399a13099cef Mon Sep 17 00:00:00 2001 From: aishgrt1 <31906139+aishgrt1@users.noreply.github.com> Date: Fri, 13 Apr 2018 03:28:11 -0400 Subject: [PATCH 02/33] Update classification.py --- sklearn/metrics/classification.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index e4996735e39e3..af152a3b12b3c 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -1995,7 +1995,6 @@ def brier_score_loss(y_true, y_prob, sample_weight=None, pos_label=None): return np.average((y_true - y_prob) ** 2, weights=sample_weight) -# Caliberation by bin- two class def calibration_loss(y_true,y_prob, bin_size=2): """Compute Calibration score by bins. From 63c6f95f4ea7009387454d22954bbc0da426b7f9 Mon Sep 17 00:00:00 2001 From: aishgrt1 <31906139+aishgrt1@users.noreply.github.com> Date: Fri, 13 Apr 2018 12:07:17 -0400 Subject: [PATCH 03/33] Update classification.py --- sklearn/metrics/classification.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index af152a3b12b3c..67e3500b0e258 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -2034,7 +2034,6 @@ def calibration_loss(y_true,y_prob, bin_size=2): """ - loss = 0.0 pos_loss=0.0 neg_loss=0.0 From fcc7cf4150ae19bc62c3af7784adfb5b33d52a53 Mon Sep 17 00:00:00 2001 From: aishgrt1 <31906139+aishgrt1@users.noreply.github.com> Date: Fri, 13 Apr 2018 22:45:38 -0400 Subject: [PATCH 04/33] Update classification.py --- sklearn/metrics/classification.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index 67e3500b0e258..61da9866a5de4 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -1998,8 +1998,10 @@ def brier_score_loss(y_true, y_prob, sample_weight=None, pos_label=None): def calibration_loss(y_true,y_prob, bin_size=2): """Compute Calibration score by bins. - The calibration loss is defined as the measure to access the quality of learning methods and learned models. - A calibration measure based on overlaping binning is CAL (Caruana and Niculescu-Mizil, 2004). + The calibration loss is defined as the measure to access + the quality of learning methods and learned models. + A calibration measure based on overlaping binning + is CAL (Caruana and Niculescu-Mizil, 2004). Parameters ---------- @@ -2025,6 +2027,7 @@ def calibration_loss(y_true,y_prob, bin_size=2): >>> y_true = np.array([0, 1, 1, 0]) >>> y_true_categorical = np.array(["spam", "ham", "ham", "spam"]) >>> y_prob = np.array([0.1, 0.9, 0.8, 0.3]) + >>> calibration_loss(y_true, y_prob, bin_size=1) 0.174.. >>> calibration_loss(y_true, y_prob, bin_size=2) From c9c4b644bc8ac329287761a61d3fcd7e9083458f Mon Sep 17 00:00:00 2001 From: aishgrt1 <31906139+aishgrt1@users.noreply.github.com> Date: Fri, 13 Apr 2018 23:01:47 -0400 Subject: [PATCH 05/33] Update classification.py --- sklearn/metrics/classification.py | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index 61da9866a5de4..e46a5ffcc29c4 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -1996,12 +1996,11 @@ def brier_score_loss(y_true, y_prob, sample_weight=None, pos_label=None): -def calibration_loss(y_true,y_prob, bin_size=2): +def calibration_loss(y_true,y_prob, bin_size=2.0): """Compute Calibration score by bins. - The calibration loss is defined as the measure to access - the quality of learning methods and learned models. - A calibration measure based on overlaping binning - is CAL (Caruana and Niculescu-Mizil, 2004). + The calibration loss is defined as the measure to access the + quality of learning methods and learned models. A calibration + measure based on overlaping binning is CAL (Caruana and Niculescu-Mizil, 2004). Parameters ---------- @@ -2053,7 +2052,7 @@ def calibration_loss(y_true,y_prob, bin_size=2): actual_per_neg_class= (bin_size - y_true[bin_start:bin_end].sum())/float(bin_size) print(actual_per_neg_class) - bin_error_neg = abs((1-y_prob)[bin_start:bin_end]-actual_per_neg_class).sum() + bin_error_neg = abs((1-y_prob[bin_start:bin_end])-actual_per_neg_class).sum() neg_loss += bin_error_neg @@ -2061,8 +2060,4 @@ def calibration_loss(y_true,y_prob, bin_size=2): neg_loss /= (len(y_true)-bin_size+1) loss = (0.5)*(pos_loss+neg_loss) - - return loss - - - + return loss From 161702673b0f28e1f35204a085fc81b4d3e82371 Mon Sep 17 00:00:00 2001 From: aishgrt1 <31906139+aishgrt1@users.noreply.github.com> Date: Sat, 14 Apr 2018 03:42:05 -0400 Subject: [PATCH 06/33] Update classification.py --- sklearn/metrics/classification.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index e46a5ffcc29c4..2fb1c9aa9802a 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -1996,7 +1996,8 @@ def brier_score_loss(y_true, y_prob, sample_weight=None, pos_label=None): -def calibration_loss(y_true,y_prob, bin_size=2.0): +def calibration_loss(y_true, y_prob, bin_size=2.0): + """Compute Calibration score by bins. The calibration loss is defined as the measure to access the quality of learning methods and learned models. A calibration From df890904390078e03c0787cc8692c1ccf7a94a36 Mon Sep 17 00:00:00 2001 From: aishgrt1 <31906139+aishgrt1@users.noreply.github.com> Date: Sat, 14 Apr 2018 12:28:46 -0400 Subject: [PATCH 07/33] added calibration loss --- sklearn/metrics/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sklearn/metrics/__init__.py b/sklearn/metrics/__init__.py index c98b0e14493c6..e4c060f4bcae5 100644 --- a/sklearn/metrics/__init__.py +++ b/sklearn/metrics/__init__.py @@ -30,6 +30,7 @@ from .classification import recall_score from .classification import zero_one_loss from .classification import brier_score_loss +from .classification import calibration_loss from . import cluster from .cluster import adjusted_mutual_info_score From d545bbae7e8ccd890da379de9fcbbc5c2628ed00 Mon Sep 17 00:00:00 2001 From: aishgrt1 <31906139+aishgrt1@users.noreply.github.com> Date: Sat, 14 Apr 2018 12:31:07 -0400 Subject: [PATCH 08/33] added calibration_loss --- sklearn/metrics/classification.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index 2fb1c9aa9802a..06777e850a0f2 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -1995,7 +1995,6 @@ def brier_score_loss(y_true, y_prob, sample_weight=None, pos_label=None): return np.average((y_true - y_prob) ** 2, weights=sample_weight) - def calibration_loss(y_true, y_prob, bin_size=2.0): """Compute Calibration score by bins. @@ -2040,8 +2039,7 @@ def calibration_loss(y_true, y_prob, bin_size=2.0): pos_loss=0.0 neg_loss=0.0 - for bin_start in range(0,len(y_true)-bin_size + 1): - + for bin_start in range(0,len(y_true)-bin_size + 1): bin_end= bin_start + bin_size @@ -2049,14 +2047,12 @@ def calibration_loss(y_true, y_prob, bin_size=2.0): print(actual_per_pos_class) bin_error_pos = abs(y_prob[bin_start:bin_end]-actual_per_pos_class).sum() pos_loss += bin_error_pos - - + actual_per_neg_class= (bin_size - y_true[bin_start:bin_end].sum())/float(bin_size) print(actual_per_neg_class) bin_error_neg = abs((1-y_prob[bin_start:bin_end])-actual_per_neg_class).sum() neg_loss += bin_error_neg - - + pos_loss /= (len(y_true)-bin_size+1) neg_loss /= (len(y_true)-bin_size+1) loss = (0.5)*(pos_loss+neg_loss) From 54c49416a919e0e58f477ca0fca794020464ff07 Mon Sep 17 00:00:00 2001 From: aishgrt1 <31906139+aishgrt1@users.noreply.github.com> Date: Sat, 14 Apr 2018 12:48:05 -0400 Subject: [PATCH 09/33] Update __init__.py --- sklearn/metrics/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sklearn/metrics/__init__.py b/sklearn/metrics/__init__.py index e4c060f4bcae5..142f7e50f1903 100644 --- a/sklearn/metrics/__init__.py +++ b/sklearn/metrics/__init__.py @@ -121,4 +121,5 @@ 'v_measure_score', 'zero_one_loss', 'brier_score_loss', + 'calibration_loss', ] From 3ddb56c14be6db766f8e524c91807007cd72d928 Mon Sep 17 00:00:00 2001 From: aishgrt1 <31906139+aishgrt1@users.noreply.github.com> Date: Sat, 14 Apr 2018 12:59:05 -0400 Subject: [PATCH 10/33] Update classification.py --- sklearn/metrics/classification.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index 06777e850a0f2..4e32ebcb09506 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -1994,13 +1994,12 @@ def brier_score_loss(y_true, y_prob, sample_weight=None, pos_label=None): y_true = _check_binary_probabilistic_predictions(y_true, y_prob) return np.average((y_true - y_prob) ** 2, weights=sample_weight) - def calibration_loss(y_true, y_prob, bin_size=2.0): - """Compute Calibration score by bins. - The calibration loss is defined as the measure to access the - quality of learning methods and learned models. A calibration - measure based on overlaping binning is CAL (Caruana and Niculescu-Mizil, 2004). + """Compute Calibration score by bins. + The calibration loss is defined as the measure to access the quality of + learning methods and learned models. A calibration measure based on + overlaping binning is CAL (Caruana and Niculescu-Mizil, 2004). Parameters ---------- From fea917e03c6a5691c7b35a3befc9878f78193945 Mon Sep 17 00:00:00 2001 From: aishgrt1 <31906139+aishgrt1@users.noreply.github.com> Date: Sat, 14 Apr 2018 13:51:40 -0400 Subject: [PATCH 11/33] Update classification.py --- sklearn/metrics/classification.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index 4e32ebcb09506..6cd42892c98f5 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -2027,7 +2027,7 @@ def calibration_loss(y_true, y_prob, bin_size=2.0): >>> y_prob = np.array([0.1, 0.9, 0.8, 0.3]) >>> calibration_loss(y_true, y_prob, bin_size=1) - 0.174.. + 0.175.. >>> calibration_loss(y_true, y_prob, bin_size=2) 0.53... >>> calibration_loss(y_true, y_prob, bin_size=3) @@ -2043,12 +2043,10 @@ def calibration_loss(y_true, y_prob, bin_size=2.0): bin_end= bin_start + bin_size actual_per_pos_class= (y_true[bin_start:bin_end].sum())/float(bin_size) - print(actual_per_pos_class) bin_error_pos = abs(y_prob[bin_start:bin_end]-actual_per_pos_class).sum() pos_loss += bin_error_pos actual_per_neg_class= (bin_size - y_true[bin_start:bin_end].sum())/float(bin_size) - print(actual_per_neg_class) bin_error_neg = abs((1-y_prob[bin_start:bin_end])-actual_per_neg_class).sum() neg_loss += bin_error_neg From 138801f40c25dcc995bb9cec1ed9d3a20d407b26 Mon Sep 17 00:00:00 2001 From: aishgrt1 <31906139+aishgrt1@users.noreply.github.com> Date: Sat, 14 Apr 2018 14:15:20 -0400 Subject: [PATCH 12/33] Update classification.py --- sklearn/metrics/classification.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index 6cd42892c98f5..ed1f63673088d 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -1994,6 +1994,7 @@ def brier_score_loss(y_true, y_prob, sample_weight=None, pos_label=None): y_true = _check_binary_probabilistic_predictions(y_true, y_prob) return np.average((y_true - y_prob) ** 2, weights=sample_weight) + def calibration_loss(y_true, y_prob, bin_size=2.0): """Compute Calibration score by bins. From 170375c97396daa6dd8fe6dbd58f4babada2ae22 Mon Sep 17 00:00:00 2001 From: aishgrt1 <31906139+aishgrt1@users.noreply.github.com> Date: Sat, 14 Apr 2018 14:55:05 -0400 Subject: [PATCH 13/33] Update classification.py --- sklearn/metrics/classification.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index ed1f63673088d..27e95527c31a5 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -2028,11 +2028,11 @@ def calibration_loss(y_true, y_prob, bin_size=2.0): >>> y_prob = np.array([0.1, 0.9, 0.8, 0.3]) >>> calibration_loss(y_true, y_prob, bin_size=1) - 0.175.. + 0.175 >>> calibration_loss(y_true, y_prob, bin_size=2) - 0.53... + 0.53 >>> calibration_loss(y_true, y_prob, bin_size=3) - 0.833... + 0.833 """ From fc2da9e664af12bef4a4c122669275702c327479 Mon Sep 17 00:00:00 2001 From: aishgrt1 <31906139+aishgrt1@users.noreply.github.com> Date: Sat, 14 Apr 2018 14:59:53 -0400 Subject: [PATCH 14/33] Update classification.py --- sklearn/metrics/classification.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index 27e95527c31a5..2120c3cf90ce4 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -1996,11 +1996,11 @@ def brier_score_loss(y_true, y_prob, sample_weight=None, pos_label=None): def calibration_loss(y_true, y_prob, bin_size=2.0): - + """Compute Calibration score by bins. - The calibration loss is defined as the measure to access the quality of - learning methods and learned models. A calibration measure based on - overlaping binning is CAL (Caruana and Niculescu-Mizil, 2004). + The calibration loss is defined as the measure to access the quality of + learning methods and learned models. A calibration measure based on + overlaping binning is CAL (Caruana and Niculescu-Mizil, 2004). Parameters ---------- From 09e86134a3c63e37733718cd87f0b69c4401dddf Mon Sep 17 00:00:00 2001 From: aishgrt1 <31906139+aishgrt1@users.noreply.github.com> Date: Sat, 14 Apr 2018 15:08:22 -0400 Subject: [PATCH 15/33] Update classification.py --- sklearn/metrics/classification.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index 2120c3cf90ce4..4ff567f4c4d74 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -2001,7 +2001,7 @@ def calibration_loss(y_true, y_prob, bin_size=2.0): The calibration loss is defined as the measure to access the quality of learning methods and learned models. A calibration measure based on overlaping binning is CAL (Caruana and Niculescu-Mizil, 2004). - + Parameters ---------- y_true : array, shape (n_samples,) @@ -2012,8 +2012,7 @@ def calibration_loss(y_true, y_prob, bin_size=2.0): bin_size : int Size of the bin (samples) analysed in one iteration - - + Returns ------- score : float From afda36eea8125448c84f575e321fdacfa0e2e9e5 Mon Sep 17 00:00:00 2001 From: aishgrt1 <31906139+aishgrt1@users.noreply.github.com> Date: Sat, 14 Apr 2018 15:23:35 -0400 Subject: [PATCH 16/33] Update classification.py --- sklearn/metrics/classification.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index 4ff567f4c4d74..45033f33e7346 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -2017,8 +2017,8 @@ def calibration_loss(y_true, y_prob, bin_size=2.0): ------- score : float Calibration loss - - Examples + + Examples -------- >>> import numpy as np >>> from sklearn.metrics import calibration_loss @@ -2032,9 +2032,9 @@ def calibration_loss(y_true, y_prob, bin_size=2.0): 0.53 >>> calibration_loss(y_true, y_prob, bin_size=3) 0.833 - + """ - + pos_loss=0.0 neg_loss=0.0 From 908779deb2b75118e42c00eb8e03adc0f322d46f Mon Sep 17 00:00:00 2001 From: aishgrt1 <31906139+aishgrt1@users.noreply.github.com> Date: Sat, 14 Apr 2018 15:45:55 -0400 Subject: [PATCH 17/33] Update classification.py --- sklearn/metrics/classification.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index 45033f33e7346..fe8fc129fdc5c 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -2025,7 +2025,6 @@ def calibration_loss(y_true, y_prob, bin_size=2.0): >>> y_true = np.array([0, 1, 1, 0]) >>> y_true_categorical = np.array(["spam", "ham", "ham", "spam"]) >>> y_prob = np.array([0.1, 0.9, 0.8, 0.3]) - >>> calibration_loss(y_true, y_prob, bin_size=1) 0.175 >>> calibration_loss(y_true, y_prob, bin_size=2) @@ -2033,23 +2032,22 @@ def calibration_loss(y_true, y_prob, bin_size=2.0): >>> calibration_loss(y_true, y_prob, bin_size=3) 0.833 - """ + """ pos_loss=0.0 neg_loss=0.0 - + for bin_start in range(0,len(y_true)-bin_size + 1): - + bin_end= bin_start + bin_size - actual_per_pos_class= (y_true[bin_start:bin_end].sum())/float(bin_size) bin_error_pos = abs(y_prob[bin_start:bin_end]-actual_per_pos_class).sum() pos_loss += bin_error_pos - + actual_per_neg_class= (bin_size - y_true[bin_start:bin_end].sum())/float(bin_size) bin_error_neg = abs((1-y_prob[bin_start:bin_end])-actual_per_neg_class).sum() neg_loss += bin_error_neg - + pos_loss /= (len(y_true)-bin_size+1) neg_loss /= (len(y_true)-bin_size+1) loss = (0.5)*(pos_loss+neg_loss) From 9005a00708df5338c844131503239f4a26c9c23f Mon Sep 17 00:00:00 2001 From: aishgrt1 <31906139+aishgrt1@users.noreply.github.com> Date: Sat, 14 Apr 2018 17:17:04 -0400 Subject: [PATCH 18/33] Update classification.py --- sklearn/metrics/classification.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index fe8fc129fdc5c..071be74c45ecd 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -2028,10 +2028,7 @@ def calibration_loss(y_true, y_prob, bin_size=2.0): >>> calibration_loss(y_true, y_prob, bin_size=1) 0.175 >>> calibration_loss(y_true, y_prob, bin_size=2) - 0.53 - >>> calibration_loss(y_true, y_prob, bin_size=3) - 0.833 - + 0.5333333333333333 """ pos_loss=0.0 From 500ca141bf2b50a497be990eb9cf831a21cbdba2 Mon Sep 17 00:00:00 2001 From: aishgrt1 <31906139+aishgrt1@users.noreply.github.com> Date: Sat, 14 Apr 2018 17:25:59 -0400 Subject: [PATCH 19/33] Update classification.py --- sklearn/metrics/classification.py | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index 071be74c45ecd..65a101a213cd7 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -2027,26 +2027,30 @@ def calibration_loss(y_true, y_prob, bin_size=2.0): >>> y_prob = np.array([0.1, 0.9, 0.8, 0.3]) >>> calibration_loss(y_true, y_prob, bin_size=1) 0.175 - >>> calibration_loss(y_true, y_prob, bin_size=2) + >>> calibration_loss(y_true, y_prob, bin_size=2) 0.5333333333333333 """ - pos_loss=0.0 - neg_loss=0.0 + pos_loss = 0.0 + neg_loss = 0.0 - for bin_start in range(0,len(y_true)-bin_size + 1): + for bin_start in range(0, len(y_true)-bin_size + 1): - bin_end= bin_start + bin_size - actual_per_pos_class= (y_true[bin_start:bin_end].sum())/float(bin_size) - bin_error_pos = abs(y_prob[bin_start:bin_end]-actual_per_pos_class).sum() + bin_end = bin_start + bin_size + actual_per_pos_class = (y_true[bin_start:bin_end] + .sum())/float(bin_size) + bin_error_pos = abs(y_prob[bin_start:bin_end] + -actual_per_pos_class).sum() pos_loss += bin_error_pos - actual_per_neg_class= (bin_size - y_true[bin_start:bin_end].sum())/float(bin_size) - bin_error_neg = abs((1-y_prob[bin_start:bin_end])-actual_per_neg_class).sum() + actual_per_neg_class = (bin_size - y_true[bin_start:bin_end] + .sum())/float(bin_size) + bin_error_neg = abs((1-y_prob[bin_start:bin_end]) + -actual_per_neg_class).sum() neg_loss += bin_error_neg pos_loss /= (len(y_true)-bin_size+1) neg_loss /= (len(y_true)-bin_size+1) loss = (0.5)*(pos_loss+neg_loss) - + return loss From 30779c7000a644e0204b3a7829716b08f7ba3bfc Mon Sep 17 00:00:00 2001 From: aishgrt1 <31906139+aishgrt1@users.noreply.github.com> Date: Sat, 14 Apr 2018 17:39:13 -0400 Subject: [PATCH 20/33] Update classification.py --- sklearn/metrics/classification.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index 65a101a213cd7..0008ed2c56389 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -2038,19 +2038,19 @@ def calibration_loss(y_true, y_prob, bin_size=2.0): bin_end = bin_start + bin_size actual_per_pos_class = (y_true[bin_start:bin_end] - .sum())/float(bin_size) + .sum()) / float(bin_size) bin_error_pos = abs(y_prob[bin_start:bin_end] - -actual_per_pos_class).sum() + - actual_per_pos_class).sum() pos_loss += bin_error_pos actual_per_neg_class = (bin_size - y_true[bin_start:bin_end] - .sum())/float(bin_size) + .sum()) / float(bin_size) bin_error_neg = abs((1-y_prob[bin_start:bin_end]) - -actual_per_neg_class).sum() + - actual_per_neg_class).sum() neg_loss += bin_error_neg - pos_loss /= (len(y_true)-bin_size+1) - neg_loss /= (len(y_true)-bin_size+1) - loss = (0.5)*(pos_loss+neg_loss) + pos_loss /= (len(y_true) - bin_size + 1) + neg_loss /= (len(y_true) - bin_size + 1) + loss = (0.5) * (pos_loss + neg_loss) return loss From 0ab5bc04eed2925e51862f9216afd3736d1b8556 Mon Sep 17 00:00:00 2001 From: aishgrt1 <31906139+aishgrt1@users.noreply.github.com> Date: Mon, 7 May 2018 02:57:21 -0400 Subject: [PATCH 21/33] calibration_loss test added --- sklearn/metrics/tests/test_classification.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index cae78e721bc8e..5174a38760241 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -28,6 +28,7 @@ from sklearn.utils.mocking import MockDataFrame from sklearn.metrics import accuracy_score +from sklearn.metrics import calibration_loss from sklearn.metrics import average_precision_score from sklearn.metrics import classification_report from sklearn.metrics import cohen_kappa_score @@ -1635,3 +1636,10 @@ def test_brier_score_loss(): # calculate even if only single class in y_true (#6980) assert_almost_equal(brier_score_loss([0], [0.5]), 0.25) assert_almost_equal(brier_score_loss([1], [0.5]), 0.25) + +def test_calibration_loss(): + # Check calibration_loss function + y_true = np.array([0, 1, 1, 0, 1, 1]) + y_pred = np.array([0.1, 0.8, 0.9, 0.3, 1.0, 0.95]) + calibration_loss_val = calibration_loss(y_true, y_pred, bin_size=2) + assert_almost_equal(calibration_loss_val, 0.469) From aa9bd3b9412f5a28d949ba6d06b9cbc8ca2d8270 Mon Sep 17 00:00:00 2001 From: aishgrt1 <31906139+aishgrt1@users.noreply.github.com> Date: Mon, 7 May 2018 11:52:12 -0400 Subject: [PATCH 22/33] Update test_classification.py --- sklearn/metrics/tests/test_classification.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index 5174a38760241..9b33c4394753e 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -1637,6 +1637,7 @@ def test_brier_score_loss(): assert_almost_equal(brier_score_loss([0], [0.5]), 0.25) assert_almost_equal(brier_score_loss([1], [0.5]), 0.25) + def test_calibration_loss(): # Check calibration_loss function y_true = np.array([0, 1, 1, 0, 1, 1]) From f863d3a74006d3e917dcf7525cd9fdc7eb64d3ed Mon Sep 17 00:00:00 2001 From: aishgrt1 <31906139+aishgrt1@users.noreply.github.com> Date: Mon, 7 May 2018 12:17:37 -0400 Subject: [PATCH 23/33] Update test_classification.py --- sklearn/metrics/tests/test_classification.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index 9b33c4394753e..e30d064b41b80 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -1643,4 +1643,4 @@ def test_calibration_loss(): y_true = np.array([0, 1, 1, 0, 1, 1]) y_pred = np.array([0.1, 0.8, 0.9, 0.3, 1.0, 0.95]) calibration_loss_val = calibration_loss(y_true, y_pred, bin_size=2) - assert_almost_equal(calibration_loss_val, 0.469) + assert_almost_equal(calibration_loss_val, 0.4699999999999) From 519c54662bd8843a2ea159b38008911affafed48 Mon Sep 17 00:00:00 2001 From: aishgrt1 <31906139+aishgrt1@users.noreply.github.com> Date: Mon, 7 May 2018 19:17:01 -0400 Subject: [PATCH 24/33] Update test_classification.py --- sklearn/metrics/tests/test_classification.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index e30d064b41b80..02f87b082745f 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -1643,4 +1643,4 @@ def test_calibration_loss(): y_true = np.array([0, 1, 1, 0, 1, 1]) y_pred = np.array([0.1, 0.8, 0.9, 0.3, 1.0, 0.95]) calibration_loss_val = calibration_loss(y_true, y_pred, bin_size=2) - assert_almost_equal(calibration_loss_val, 0.4699999999999) + assert_almost_equal(calibration_loss_val, 0.46999, decimal = 4) From dc267e24ce7334436b4574660464eee0d09f5161 Mon Sep 17 00:00:00 2001 From: aishgrt1 <31906139+aishgrt1@users.noreply.github.com> Date: Tue, 8 May 2018 16:27:00 -0400 Subject: [PATCH 25/33] Update test_classification.py --- sklearn/metrics/tests/test_classification.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index 02f87b082745f..f00a931346e4e 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -1643,4 +1643,4 @@ def test_calibration_loss(): y_true = np.array([0, 1, 1, 0, 1, 1]) y_pred = np.array([0.1, 0.8, 0.9, 0.3, 1.0, 0.95]) calibration_loss_val = calibration_loss(y_true, y_pred, bin_size=2) - assert_almost_equal(calibration_loss_val, 0.46999, decimal = 4) + assert_almost_equal(calibration_loss_val, 0.46999, decimal=4) From 9de655fb34efeba3a69bb2a24808de60a030e0cd Mon Sep 17 00:00:00 2001 From: aishgrt1 <31906139+aishgrt1@users.noreply.github.com> Date: Tue, 8 May 2018 16:30:54 -0400 Subject: [PATCH 26/33] Update classification.py --- sklearn/metrics/classification.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index 0008ed2c56389..f4ade6f4671d6 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -1995,7 +1995,7 @@ def brier_score_loss(y_true, y_prob, sample_weight=None, pos_label=None): return np.average((y_true - y_prob) ** 2, weights=sample_weight) -def calibration_loss(y_true, y_prob, bin_size=2.0): +def calibration_loss(y_true, y_prob, bin_size=2): """Compute Calibration score by bins. The calibration loss is defined as the measure to access the quality of From 04090b653d8c8cfae1a55c156fad70ae95553ccd Mon Sep 17 00:00:00 2001 From: aishgrt1 <31906139+aishgrt1@users.noreply.github.com> Date: Tue, 8 May 2018 17:59:24 -0400 Subject: [PATCH 27/33] Update classification.py --- sklearn/metrics/classification.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index f4ade6f4671d6..9fde7150423a9 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -1996,7 +1996,6 @@ def brier_score_loss(y_true, y_prob, sample_weight=None, pos_label=None): def calibration_loss(y_true, y_prob, bin_size=2): - """Compute Calibration score by bins. The calibration loss is defined as the measure to access the quality of learning methods and learned models. A calibration measure based on From 14848067db809c9f6463b51bce64c88b0efff7d2 Mon Sep 17 00:00:00 2001 From: aishgrt1 <31906139+aishgrt1@users.noreply.github.com> Date: Fri, 25 May 2018 10:34:05 +0530 Subject: [PATCH 28/33] Update classification.py --- sklearn/metrics/classification.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index 9fde7150423a9..9133c1f06f78e 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -2029,7 +2029,6 @@ def calibration_loss(y_true, y_prob, bin_size=2): >>> calibration_loss(y_true, y_prob, bin_size=2) 0.5333333333333333 """ - pos_loss = 0.0 neg_loss = 0.0 From 624a197457252828f1277d8c0aa679c22f91437b Mon Sep 17 00:00:00 2001 From: aishgrt1 <31906139+aishgrt1@users.noreply.github.com> Date: Thu, 27 Sep 2018 15:51:45 -0400 Subject: [PATCH 29/33] Update classification.py --- sklearn/metrics/classification.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index 9133c1f06f78e..40ab61f662a94 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -39,6 +39,7 @@ from ..utils.validation import _num_samples from ..utils.sparsefuncs import count_nonzero from ..exceptions import UndefinedMetricWarning +from __future__ import division def _check_targets(y_true, y_pred): @@ -1996,6 +1997,7 @@ def brier_score_loss(y_true, y_prob, sample_weight=None, pos_label=None): def calibration_loss(y_true, y_prob, bin_size=2): + """Compute Calibration score by bins. The calibration loss is defined as the measure to access the quality of learning methods and learned models. A calibration measure based on @@ -2032,11 +2034,11 @@ def calibration_loss(y_true, y_prob, bin_size=2): pos_loss = 0.0 neg_loss = 0.0 - for bin_start in range(0, len(y_true)-bin_size + 1): + for bin_start in range(0, len(y_true) - bin_size + 1): bin_end = bin_start + bin_size actual_per_pos_class = (y_true[bin_start:bin_end] - .sum()) / float(bin_size) + .sum()) / bin_size bin_error_pos = abs(y_prob[bin_start:bin_end] - actual_per_pos_class).sum() pos_loss += bin_error_pos From c02e66b8c3549b888f2455d0706ed15945725fc8 Mon Sep 17 00:00:00 2001 From: aishgrt1 <31906139+aishgrt1@users.noreply.github.com> Date: Thu, 27 Sep 2018 15:56:04 -0400 Subject: [PATCH 30/33] Update classification.py --- sklearn/metrics/classification.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index 40ab61f662a94..321a085068076 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -2044,7 +2044,7 @@ def calibration_loss(y_true, y_prob, bin_size=2): pos_loss += bin_error_pos actual_per_neg_class = (bin_size - y_true[bin_start:bin_end] - .sum()) / float(bin_size) + .sum()) / bin_size bin_error_neg = abs((1-y_prob[bin_start:bin_end]) - actual_per_neg_class).sum() neg_loss += bin_error_neg From fe9e2d5038342999423006597be8455be449f69f Mon Sep 17 00:00:00 2001 From: aishgrt1 <31906139+aishgrt1@users.noreply.github.com> Date: Fri, 26 Oct 2018 10:43:06 -0400 Subject: [PATCH 31/33] Update classification.py --- sklearn/metrics/classification.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sklearn/metrics/classification.py b/sklearn/metrics/classification.py index 321a085068076..1b6b4455b93cb 100644 --- a/sklearn/metrics/classification.py +++ b/sklearn/metrics/classification.py @@ -2054,3 +2054,4 @@ def calibration_loss(y_true, y_prob, bin_size=2): loss = (0.5) * (pos_loss + neg_loss) return loss + From f3196d99b50ed7f55896be977899cff93978c1d1 Mon Sep 17 00:00:00 2001 From: aishgrt1 <31906139+aishgrt1@users.noreply.github.com> Date: Fri, 26 Oct 2018 10:44:54 -0400 Subject: [PATCH 32/33] Update test_classification.py --- sklearn/metrics/tests/test_classification.py | 22 ++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index f00a931346e4e..84bd8e9feec61 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -1644,3 +1644,25 @@ def test_calibration_loss(): y_pred = np.array([0.1, 0.8, 0.9, 0.3, 1.0, 0.95]) calibration_loss_val = calibration_loss(y_true, y_pred, bin_size=2) assert_almost_equal(calibration_loss_val, 0.46999, decimal=4) + +def test_balanced_accuracy_score_unseen(): + assert_warns_message(UserWarning, 'y_pred contains classes not in y_true', + balanced_accuracy_score, [0, 0, 0], [0, 0, 1]) + + +@pytest.mark.parametrize('y_true,y_pred', + [ + (['a', 'b', 'a', 'b'], ['a', 'a', 'a', 'b']), + (['a', 'b', 'c', 'b'], ['a', 'a', 'a', 'b']), + (['a', 'a', 'a', 'b'], ['a', 'b', 'c', 'b']), + ]) +def test_balanced_accuracy_score(y_true, y_pred): + macro_recall = recall_score(y_true, y_pred, average='macro', + labels=np.unique(y_true)) + with ignore_warnings(): + # Warnings are tested in test_balanced_accuracy_score_unseen + balanced = balanced_accuracy_score(y_true, y_pred) + assert balanced == pytest.approx(macro_recall) + adjusted = balanced_accuracy_score(y_true, y_pred, adjusted=True) + chance = balanced_accuracy_score(y_true, np.full_like(y_true, y_true[0])) + assert adjusted == (balanced - chance) / (1 - chance) From fb2bf49fa0f7750300fa472d2fe924edc306a3f8 Mon Sep 17 00:00:00 2001 From: aishgrt1 <31906139+aishgrt1@users.noreply.github.com> Date: Sun, 28 Oct 2018 22:13:46 -0400 Subject: [PATCH 33/33] Update test_classification.py --- sklearn/metrics/tests/test_classification.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index 84bd8e9feec61..aca45c889815b 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -1666,3 +1666,6 @@ def test_balanced_accuracy_score(y_true, y_pred): adjusted = balanced_accuracy_score(y_true, y_pred, adjusted=True) chance = balanced_accuracy_score(y_true, np.full_like(y_true, y_true[0])) assert adjusted == (balanced - chance) / (1 - chance) +def test_balanced_accuracy_score_unseen(): + assert_warns_message(UserWarning, 'y_pred contains classes not in y_true', + balanced_accuracy_score, [0, 0, 0], [0, 0, 1])