From 0832bdac941cfa49bf2319c861775435cdc7294d Mon Sep 17 00:00:00 2001
From: srivatsan-ramesh <sriramesh4@gmail.com>
Date: Mon, 31 Oct 2016 18:14:18 +0530
Subject: [PATCH 01/13] label binarizer not used consistently in
 CalibratedClassifierCV

---
 sklearn/calibration.py | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/sklearn/calibration.py b/sklearn/calibration.py
index ed3e85b643815..8e475982b6b82 100644
--- a/sklearn/calibration.py
+++ b/sklearn/calibration.py
@@ -178,9 +178,11 @@ def fit(self, X, y, sample_weight=None):
                     this_estimator, method=self.method)
                 if sample_weight is not None:
                     calibrated_classifier.fit(X[test], y[test],
+                                              np.unique(y[train]),
                                               sample_weight[test])
                 else:
-                    calibrated_classifier.fit(X[test], y[test])
+                    calibrated_classifier.fit(X[test], y[test],
+                                              np.unique(y[train]))
                 self.calibrated_classifiers_.append(calibrated_classifier)
 
         return self
@@ -289,7 +291,7 @@ def _preproc(self, X):
 
         return df, idx_pos_class
 
-    def fit(self, X, y, sample_weight=None):
+    def fit(self, X, y, classes=None, sample_weight=None):
         """Calibrate the fitted model
 
         Parameters
@@ -300,6 +302,10 @@ def fit(self, X, y, sample_weight=None):
         y : array-like, shape (n_samples,)
             Target values.
 
+        classes : array-like, shape (n_classes,)
+            Contains unique classes used to fit the base estimator.
+            if None, then classes is extracted from the given target values.
+
         sample_weight : array-like, shape = [n_samples] or None
             Sample weights. If None, then samples are equally weighted.
 
@@ -309,7 +315,11 @@ def fit(self, X, y, sample_weight=None):
             Returns an instance of self.
         """
         lb = LabelBinarizer()
-        Y = lb.fit_transform(y)
+        if classes is None:
+            lb.fit(y)
+        else:
+            lb.fit(classes)
+        Y = lb.transform(y)
         self.classes_ = lb.classes_
 
         df, idx_pos_class = self._preproc(X)

From c57c4f141aa6838211f621c5d38559a835e2fe51 Mon Sep 17 00:00:00 2001
From: srivatsan-ramesh <sriramesh4@gmail.com>
Date: Mon, 31 Oct 2016 18:37:08 +0530
Subject: [PATCH 02/13] changed position of classes argument to make old tests
 run

---
 sklearn/calibration.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/sklearn/calibration.py b/sklearn/calibration.py
index 8e475982b6b82..2a83ce8e50535 100644
--- a/sklearn/calibration.py
+++ b/sklearn/calibration.py
@@ -178,11 +178,11 @@ def fit(self, X, y, sample_weight=None):
                     this_estimator, method=self.method)
                 if sample_weight is not None:
                     calibrated_classifier.fit(X[test], y[test],
-                                              np.unique(y[train]),
-                                              sample_weight[test])
+                                              sample_weight[test],
+                                              np.unique(y[train]))
                 else:
                     calibrated_classifier.fit(X[test], y[test],
-                                              np.unique(y[train]))
+                                              classes=np.unique(y[train]))
                 self.calibrated_classifiers_.append(calibrated_classifier)
 
         return self
@@ -291,7 +291,7 @@ def _preproc(self, X):
 
         return df, idx_pos_class
 
-    def fit(self, X, y, classes=None, sample_weight=None):
+    def fit(self, X, y, sample_weight=None, classes=None):
         """Calibrate the fitted model
 
         Parameters
@@ -302,13 +302,13 @@ def fit(self, X, y, classes=None, sample_weight=None):
         y : array-like, shape (n_samples,)
             Target values.
 
+        sample_weight : array-like, shape = [n_samples] or None
+            Sample weights. If None, then samples are equally weighted.
+
         classes : array-like, shape (n_classes,)
             Contains unique classes used to fit the base estimator.
             if None, then classes is extracted from the given target values.
 
-        sample_weight : array-like, shape = [n_samples] or None
-            Sample weights. If None, then samples are equally weighted.
-
         Returns
         -------
         self : object

From 2b26c8232042b6b148b7e241734a340b3fd16930 Mon Sep 17 00:00:00 2001
From: srivatsan-ramesh <sriramesh4@gmail.com>
Date: Mon, 31 Oct 2016 19:30:58 +0530
Subject: [PATCH 03/13] moved parameter to constructor and added test

---
 sklearn/calibration.py            | 29 ++++++++++++++++-------------
 sklearn/tests/test_calibration.py | 16 ++++++++++++++++
 2 files changed, 32 insertions(+), 13 deletions(-)

diff --git a/sklearn/calibration.py b/sklearn/calibration.py
index 2a83ce8e50535..1700d4cf5de2f 100644
--- a/sklearn/calibration.py
+++ b/sklearn/calibration.py
@@ -175,14 +175,13 @@ def fit(self, X, y, sample_weight=None):
                     this_estimator.fit(X[train], y[train])
 
                 calibrated_classifier = _CalibratedClassifier(
-                    this_estimator, method=self.method)
+                    this_estimator, method=self.method,
+                    classes=np.unique(y[train]))
                 if sample_weight is not None:
                     calibrated_classifier.fit(X[test], y[test],
-                                              sample_weight[test],
-                                              np.unique(y[train]))
+                                              sample_weight[test])
                 else:
-                    calibrated_classifier.fit(X[test], y[test],
-                                              classes=np.unique(y[train]))
+                    calibrated_classifier.fit(X[test], y[test])
                 self.calibrated_classifiers_.append(calibrated_classifier)
 
         return self
@@ -255,6 +254,11 @@ class _CalibratedClassifier(object):
         corresponds to Platt's method or 'isotonic' which is a
         non-parametric approach based on isotonic regression.
 
+    classes : array-like, shape (n_classes,)
+            Contains unique classes used to fit the base estimator.
+            if None, then classes is extracted from the given target values
+            in fit().
+
     References
     ----------
     .. [1] Obtaining calibrated probability estimates from decision trees
@@ -269,9 +273,10 @@ class _CalibratedClassifier(object):
     .. [4] Predicting Good Probabilities with Supervised Learning,
            A. Niculescu-Mizil & R. Caruana, ICML 2005
     """
-    def __init__(self, base_estimator, method='sigmoid'):
+    def __init__(self, base_estimator, method='sigmoid', classes=None):
         self.base_estimator = base_estimator
         self.method = method
+        self.classes = classes
 
     def _preproc(self, X):
         n_classes = len(self.classes_)
@@ -291,7 +296,7 @@ def _preproc(self, X):
 
         return df, idx_pos_class
 
-    def fit(self, X, y, sample_weight=None, classes=None):
+    def fit(self, X, y, sample_weight=None):
         """Calibrate the fitted model
 
         Parameters
@@ -305,20 +310,18 @@ def fit(self, X, y, sample_weight=None, classes=None):
         sample_weight : array-like, shape = [n_samples] or None
             Sample weights. If None, then samples are equally weighted.
 
-        classes : array-like, shape (n_classes,)
-            Contains unique classes used to fit the base estimator.
-            if None, then classes is extracted from the given target values.
-
         Returns
         -------
         self : object
             Returns an instance of self.
         """
+
         lb = LabelBinarizer()
-        if classes is None:
+        if self.classes is None:
             lb.fit(y)
         else:
-            lb.fit(classes)
+            lb.fit(self.classes)
+
         Y = lb.transform(y)
         self.classes_ = lb.classes_
 
diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py
index 68a6efb395971..6981fc23190d3 100644
--- a/sklearn/tests/test_calibration.py
+++ b/sklearn/tests/test_calibration.py
@@ -3,6 +3,7 @@
 
 import numpy as np
 from scipy import sparse
+from sklearn.model_selection import LeaveOneOut
 
 from sklearn.utils.testing import (assert_array_almost_equal, assert_equal,
                                    assert_greater, assert_almost_equal,
@@ -159,6 +160,7 @@ def test_calibration_multiclass():
         def softmax(y_pred):
             e = np.exp(-y_pred)
             return e / e.sum(axis=1).reshape(-1, 1)
+
         uncalibrated_log_loss = \
             log_loss(y_test, softmax(clf.decision_function(X_test)))
         calibrated_log_loss = log_loss(y_test, probas)
@@ -275,3 +277,17 @@ def test_calibration_nan_imputer():
     clf_c = CalibratedClassifierCV(clf, cv=2, method='isotonic')
     clf_c.fit(X, y)
     clf_c.predict(X)
+
+
+def test_calibration_prob_sum():
+    """Test that sum of probabilities is 1"""
+    num_classes = 2
+    X, y = make_classification(n_samples=100, n_features=20,
+                               n_informative=18, n_redundant=2,
+                               n_classes=num_classes)
+    clf = LinearSVC(C=1.0)
+    clf_prob = CalibratedClassifierCV(clf, method="sigmoid", cv=LeaveOneOut())
+    clf_prob.fit(X, y)
+
+    probs = clf_prob.predict_proba(X)
+    assert_array_equal(probs.sum(axis=1), np.ones(probs.shape[0]))

From 693f3a89efe2fd7f3c10822519c996a9e8ee8f58 Mon Sep 17 00:00:00 2001
From: srivatsan-ramesh <sriramesh4@gmail.com>
Date: Tue, 1 Nov 2016 17:21:23 +0530
Subject: [PATCH 04/13] added test where train set doesnt have all classes

---
 sklearn/calibration.py            |  2 +-
 sklearn/tests/test_calibration.py | 20 ++++++++++++++++----
 2 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/sklearn/calibration.py b/sklearn/calibration.py
index 1700d4cf5de2f..0148f61a065ed 100644
--- a/sklearn/calibration.py
+++ b/sklearn/calibration.py
@@ -176,7 +176,7 @@ def fit(self, X, y, sample_weight=None):
 
                 calibrated_classifier = _CalibratedClassifier(
                     this_estimator, method=self.method,
-                    classes=np.unique(y[train]))
+                    classes=self.classes_)
                 if sample_weight is not None:
                     calibrated_classifier.fit(X[test], y[test],
                                               sample_weight[test])
diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py
index 6981fc23190d3..7aa8a3a46fb81 100644
--- a/sklearn/tests/test_calibration.py
+++ b/sklearn/tests/test_calibration.py
@@ -280,14 +280,26 @@ def test_calibration_nan_imputer():
 
 
 def test_calibration_prob_sum():
-    """Test that sum of probabilities is 1"""
+    # Test that sum of probabilities is 1. A non-regression test for
+    # issue #7796
     num_classes = 2
-    X, y = make_classification(n_samples=100, n_features=20,
-                               n_informative=18, n_redundant=2,
+    X, y = make_classification(n_samples=10, n_features=5,
                                n_classes=num_classes)
     clf = LinearSVC(C=1.0)
     clf_prob = CalibratedClassifierCV(clf, method="sigmoid", cv=LeaveOneOut())
     clf_prob.fit(X, y)
 
     probs = clf_prob.predict_proba(X)
-    assert_array_equal(probs.sum(axis=1), np.ones(probs.shape[0]))
+    assert_array_almost_equal(probs.sum(axis=1), np.ones(probs.shape[0]))
+
+    # Test to check calibration works fine when train set in a test-train
+    # split does not contain all classes
+    # Since this test uses LOO, at each iteration train set will not contain a
+    # class label
+    X = np.random.randn(10, 5)
+    y = np.arange(10)
+    clf = LinearSVC(C=1.0)
+    clf_prob = CalibratedClassifierCV(clf, method="sigmoid", cv=LeaveOneOut())
+    clf_prob.fit(X, y)
+    probs = clf_prob.predict_proba(X)
+    assert_array_almost_equal(probs.sum(axis=1), np.ones(probs.shape[0]))

From b62cb4ccf1e2ad29d33dd1d0a089a9b2c1aa0bd3 Mon Sep 17 00:00:00 2001
From: srivatsan-ramesh <sriramesh4@gmail.com>
Date: Thu, 3 Nov 2016 01:54:38 +0530
Subject: [PATCH 05/13] CalibratedClassifierCV can now handle cases where train
 set doesnt contain all labels

---
 sklearn/calibration.py            | 37 ++++++++++++++++++++-----------
 sklearn/tests/test_calibration.py |  4 +++-
 2 files changed, 27 insertions(+), 14 deletions(-)

diff --git a/sklearn/calibration.py b/sklearn/calibration.py
index 0148f61a065ed..33ab6eaaaae6a 100644
--- a/sklearn/calibration.py
+++ b/sklearn/calibration.py
@@ -14,9 +14,10 @@
 import numpy as np
 
 from scipy.optimize import fmin_bfgs
+from sklearn.preprocessing import LabelEncoder
 
 from .base import BaseEstimator, ClassifierMixin, RegressorMixin, clone
-from .preprocessing import LabelBinarizer
+from .preprocessing import label_binarize, LabelBinarizer
 from .utils import check_X_y, check_array, indexable, column_or_1d
 from .utils.validation import check_is_fitted
 from .utils.fixes import signature
@@ -50,7 +51,8 @@ class CalibratedClassifierCV(BaseEstimator, ClassifierMixin):
         The method to use for calibration. Can be 'sigmoid' which
         corresponds to Platt's method or 'isotonic' which is a
         non-parametric approach. It is not advised to use isotonic calibration
-        with too few calibration samples ``(<<1000)`` since it tends to overfit.
+        with too few calibration samples ``(<<1000)`` since it tends to
+        overfit.
         Use sigmoids (Platt's calibration) in this case.
 
     cv : integer, cross-validation generator, iterable or "prefit", optional
@@ -64,7 +66,8 @@ class CalibratedClassifierCV(BaseEstimator, ClassifierMixin):
 
         For integer/None inputs, if ``y`` is binary or multiclass,
         :class:`sklearn.model_selection.StratifiedKFold` is used. If ``y`` 
-        is neither binary nor multiclass, :class:`sklearn.model_selection.KFold` 
+        is neither binary nor multiclass,
+        :class:`sklearn.model_selection.KFold`
         is used.
 
         Refer :ref:`User Guide <cross_validation>` for the various
@@ -97,6 +100,7 @@ class CalibratedClassifierCV(BaseEstimator, ClassifierMixin):
     .. [4] Predicting Good Probabilities with Supervised Learning,
            A. Niculescu-Mizil & R. Caruana, ICML 2005
     """
+
     def __init__(self, base_estimator=None, method='sigmoid', cv=3):
         self.base_estimator = base_estimator
         self.method = method
@@ -124,15 +128,16 @@ def fit(self, X, y, sample_weight=None):
         X, y = check_X_y(X, y, accept_sparse=['csc', 'csr', 'coo'],
                          force_all_finite=False)
         X, y = indexable(X, y)
-        lb = LabelBinarizer().fit(y)
-        self.classes_ = lb.classes_
+        le = LabelBinarizer().fit(y)
+        self.classes_ = le.classes_
 
         # Check that each cross-validation fold can have at least one
         # example per class
         n_folds = self.cv if isinstance(self.cv, int) \
             else self.cv.n_folds if hasattr(self.cv, "n_folds") else None
         if n_folds and \
-           np.any([np.sum(y == class_) < n_folds for class_ in self.classes_]):
+                np.any([np.sum(y == class_) < n_folds for class_ in
+                        self.classes_]):
             raise ValueError("Requesting %d-fold cross-validation but provided"
                              " less than %d examples for at least one class."
                              % (n_folds, n_folds))
@@ -158,7 +163,7 @@ def fit(self, X, y, sample_weight=None):
             fit_parameters = signature(base_estimator.fit).parameters
             estimator_name = type(base_estimator).__name__
             if (sample_weight is not None
-                    and "sample_weight" not in fit_parameters):
+                and "sample_weight" not in fit_parameters):
                 warnings.warn("%s does not support sample_weight. Samples"
                               " weights are only used for the calibration"
                               " itself." % estimator_name)
@@ -273,6 +278,7 @@ class _CalibratedClassifier(object):
     .. [4] Predicting Good Probabilities with Supervised Learning,
            A. Niculescu-Mizil & R. Caruana, ICML 2005
     """
+
     def __init__(self, base_estimator, method='sigmoid', classes=None):
         self.base_estimator = base_estimator
         self.method = method
@@ -292,7 +298,11 @@ def _preproc(self, X):
             raise RuntimeError('classifier has no decision_function or '
                                'predict_proba method.')
 
-        idx_pos_class = np.arange(df.shape[1])
+        if hasattr(self.base_estimator, "classes_"):
+            idx_pos_class = self.label_encoder_. \
+                transform(self.base_estimator.classes_)
+        else:
+            idx_pos_class = np.arange(df.shape[1])
 
         return df, idx_pos_class
 
@@ -316,14 +326,14 @@ def fit(self, X, y, sample_weight=None):
             Returns an instance of self.
         """
 
-        lb = LabelBinarizer()
+        self.label_encoder_ = LabelEncoder()
         if self.classes is None:
-            lb.fit(y)
+            self.label_encoder_.fit(y)
         else:
-            lb.fit(self.classes)
+            self.label_encoder_.fit(self.classes)
 
-        Y = lb.transform(y)
-        self.classes_ = lb.classes_
+        self.classes_ = self.label_encoder_.classes_
+        Y = label_binarize(y, self.classes_)
 
         df, idx_pos_class = self._preproc(X)
         self.calibrators_ = []
@@ -460,6 +470,7 @@ class _SigmoidCalibration(BaseEstimator, RegressorMixin):
     b_ : float
         The intercept.
     """
+
     def fit(self, X, y, sample_weight=None):
         """Fit the model using X, y as training data.
 
diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py
index 7aa8a3a46fb81..ff64eef8a4fd5 100644
--- a/sklearn/tests/test_calibration.py
+++ b/sklearn/tests/test_calibration.py
@@ -302,4 +302,6 @@ def test_calibration_prob_sum():
     clf_prob = CalibratedClassifierCV(clf, method="sigmoid", cv=LeaveOneOut())
     clf_prob.fit(X, y)
     probs = clf_prob.predict_proba(X)
-    assert_array_almost_equal(probs.sum(axis=1), np.ones(probs.shape[0]))
+    n_classes = len(y)
+    assert_array_almost_equal(probs, np.full((X.shape[0], n_classes),
+                                             1/n_classes))

From 4910004e1b0131a409f21834724e95ad17bad648 Mon Sep 17 00:00:00 2001
From: srivatsan-ramesh <sriramesh4@gmail.com>
Date: Thu, 3 Nov 2016 02:01:18 +0530
Subject: [PATCH 06/13] fixing flake error

---
 sklearn/calibration.py | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/sklearn/calibration.py b/sklearn/calibration.py
index 33ab6eaaaae6a..b491afcf7a652 100644
--- a/sklearn/calibration.py
+++ b/sklearn/calibration.py
@@ -51,8 +51,7 @@ class CalibratedClassifierCV(BaseEstimator, ClassifierMixin):
         The method to use for calibration. Can be 'sigmoid' which
         corresponds to Platt's method or 'isotonic' which is a
         non-parametric approach. It is not advised to use isotonic calibration
-        with too few calibration samples ``(<<1000)`` since it tends to
-        overfit.
+        with too few calibration samples ``(<<1000)`` since it tends to overfit.
         Use sigmoids (Platt's calibration) in this case.
 
     cv : integer, cross-validation generator, iterable or "prefit", optional
@@ -66,8 +65,7 @@ class CalibratedClassifierCV(BaseEstimator, ClassifierMixin):
 
         For integer/None inputs, if ``y`` is binary or multiclass,
         :class:`sklearn.model_selection.StratifiedKFold` is used. If ``y`` 
-        is neither binary nor multiclass,
-        :class:`sklearn.model_selection.KFold`
+        is neither binary nor multiclass, :class:`sklearn.model_selection.KFold`
         is used.
 
         Refer :ref:`User Guide <cross_validation>` for the various
@@ -100,7 +98,6 @@ class CalibratedClassifierCV(BaseEstimator, ClassifierMixin):
     .. [4] Predicting Good Probabilities with Supervised Learning,
            A. Niculescu-Mizil & R. Caruana, ICML 2005
     """
-
     def __init__(self, base_estimator=None, method='sigmoid', cv=3):
         self.base_estimator = base_estimator
         self.method = method
@@ -163,7 +160,7 @@ def fit(self, X, y, sample_weight=None):
             fit_parameters = signature(base_estimator.fit).parameters
             estimator_name = type(base_estimator).__name__
             if (sample_weight is not None
-                and "sample_weight" not in fit_parameters):
+                    and "sample_weight" not in fit_parameters):
                 warnings.warn("%s does not support sample_weight. Samples"
                               " weights are only used for the calibration"
                               " itself." % estimator_name)
@@ -278,7 +275,6 @@ class _CalibratedClassifier(object):
     .. [4] Predicting Good Probabilities with Supervised Learning,
            A. Niculescu-Mizil & R. Caruana, ICML 2005
     """
-
     def __init__(self, base_estimator, method='sigmoid', classes=None):
         self.base_estimator = base_estimator
         self.method = method
@@ -299,7 +295,7 @@ def _preproc(self, X):
                                'predict_proba method.')
 
         if hasattr(self.base_estimator, "classes_"):
-            idx_pos_class = self.label_encoder_. \
+            idx_pos_class = self.label_encoder_.\
                 transform(self.base_estimator.classes_)
         else:
             idx_pos_class = np.arange(df.shape[1])
@@ -470,7 +466,6 @@ class _SigmoidCalibration(BaseEstimator, RegressorMixin):
     b_ : float
         The intercept.
     """
-
     def fit(self, X, y, sample_weight=None):
         """Fit the model using X, y as training data.
 

From ee98a8dd96790ce5cdb54017bc723bb3a7194912 Mon Sep 17 00:00:00 2001
From: srivatsan-ramesh <sriramesh4@gmail.com>
Date: Thu, 3 Nov 2016 02:22:45 +0530
Subject: [PATCH 07/13] fixing line lengths

---
 sklearn/calibration.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/sklearn/calibration.py b/sklearn/calibration.py
index b491afcf7a652..ab63ca2cc5ebe 100644
--- a/sklearn/calibration.py
+++ b/sklearn/calibration.py
@@ -51,7 +51,8 @@ class CalibratedClassifierCV(BaseEstimator, ClassifierMixin):
         The method to use for calibration. Can be 'sigmoid' which
         corresponds to Platt's method or 'isotonic' which is a
         non-parametric approach. It is not advised to use isotonic calibration
-        with too few calibration samples ``(<<1000)`` since it tends to overfit.
+        with too few calibration samples ``(<<1000)`` since it tends to
+        overfit.
         Use sigmoids (Platt's calibration) in this case.
 
     cv : integer, cross-validation generator, iterable or "prefit", optional
@@ -64,8 +65,8 @@ class CalibratedClassifierCV(BaseEstimator, ClassifierMixin):
         - An iterable yielding train/test splits.
 
         For integer/None inputs, if ``y`` is binary or multiclass,
-        :class:`sklearn.model_selection.StratifiedKFold` is used. If ``y`` 
-        is neither binary nor multiclass, :class:`sklearn.model_selection.KFold`
+        :class:`sklearn.model_selection.StratifiedKFold` is used. If ``y`` is
+        neither binary nor multiclass, :class:`sklearn.model_selection.KFold`
         is used.
 
         Refer :ref:`User Guide <cross_validation>` for the various

From 95ba6ea40b77bfde73c8b3c2095271c5ff7391cb Mon Sep 17 00:00:00 2001
From: srivatsan-ramesh <sriramesh4@gmail.com>
Date: Thu, 3 Nov 2016 02:38:55 +0530
Subject: [PATCH 08/13] removing np.full()

---
 sklearn/tests/test_calibration.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py
index ff64eef8a4fd5..4dab698a412ca 100644
--- a/sklearn/tests/test_calibration.py
+++ b/sklearn/tests/test_calibration.py
@@ -303,5 +303,4 @@ def test_calibration_prob_sum():
     clf_prob.fit(X, y)
     probs = clf_prob.predict_proba(X)
     n_classes = len(y)
-    assert_array_almost_equal(probs, np.full((X.shape[0], n_classes),
-                                             1/n_classes))
+    assert_array_almost_equal(probs, 1/n_classes)

From 5ae793cc505df711ea3e4c437ce2236f74a426ba Mon Sep 17 00:00:00 2001
From: srivatsan-ramesh <sriramesh4@gmail.com>
Date: Thu, 3 Nov 2016 03:25:12 +0530
Subject: [PATCH 09/13] from __future__ import division for py2.7

---
 sklearn/tests/test_calibration.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py
index 4dab698a412ca..bc92c776592c2 100644
--- a/sklearn/tests/test_calibration.py
+++ b/sklearn/tests/test_calibration.py
@@ -1,6 +1,7 @@
 # Authors: Alexandre Gramfort <alexandre.gramfort@telecom-paristech.fr>
 # License: BSD 3 clause
 
+from __future__ import division
 import numpy as np
 from scipy import sparse
 from sklearn.model_selection import LeaveOneOut
@@ -304,3 +305,4 @@ def test_calibration_prob_sum():
     probs = clf_prob.predict_proba(X)
     n_classes = len(y)
     assert_array_almost_equal(probs, 1/n_classes)
+test_calibration_prob_sum()
\ No newline at end of file

From 1e50a6c55441ae07bcff91d3a13a5febc7be98d6 Mon Sep 17 00:00:00 2001
From: srivatsan-ramesh <sriramesh4@gmail.com>
Date: Thu, 3 Nov 2016 03:26:42 +0530
Subject: [PATCH 10/13] change is test file

---
 sklearn/tests/test_calibration.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py
index bc92c776592c2..e90b0ca81c24d 100644
--- a/sklearn/tests/test_calibration.py
+++ b/sklearn/tests/test_calibration.py
@@ -305,4 +305,3 @@ def test_calibration_prob_sum():
     probs = clf_prob.predict_proba(X)
     n_classes = len(y)
     assert_array_almost_equal(probs, 1/n_classes)
-test_calibration_prob_sum()
\ No newline at end of file

From 466e6a06bbdaf2ea9193ba8476cb48403b4300a7 Mon Sep 17 00:00:00 2001
From: srivatsan-ramesh <sriramesh4@gmail.com>
Date: Sun, 6 Nov 2016 14:16:31 +0530
Subject: [PATCH 11/13] added an extra test and removed a test with Ridge

---
 sklearn/calibration.py            |  7 ++-----
 sklearn/tests/test_calibration.py | 21 +++++++++------------
 2 files changed, 11 insertions(+), 17 deletions(-)

diff --git a/sklearn/calibration.py b/sklearn/calibration.py
index ab63ca2cc5ebe..1bbec9bac912f 100644
--- a/sklearn/calibration.py
+++ b/sklearn/calibration.py
@@ -295,11 +295,8 @@ def _preproc(self, X):
             raise RuntimeError('classifier has no decision_function or '
                                'predict_proba method.')
 
-        if hasattr(self.base_estimator, "classes_"):
-            idx_pos_class = self.label_encoder_.\
-                transform(self.base_estimator.classes_)
-        else:
-            idx_pos_class = np.arange(df.shape[1])
+        idx_pos_class = self.label_encoder_.\
+            transform(self.base_estimator.classes_)
 
         return df, idx_pos_class
 
diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py
index e90b0ca81c24d..13b131a62b4aa 100644
--- a/sklearn/tests/test_calibration.py
+++ b/sklearn/tests/test_calibration.py
@@ -16,7 +16,6 @@
 from sklearn.naive_bayes import MultinomialNB
 from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
 from sklearn.svm import LinearSVC
-from sklearn.linear_model import Ridge
 from sklearn.pipeline import Pipeline
 from sklearn.preprocessing import Imputer
 from sklearn.metrics import brier_score_loss, log_loss
@@ -89,12 +88,6 @@ def test_calibration():
                                brier_score_loss((y_test + 1) % 2,
                                                 prob_pos_pc_clf_relabeled))
 
-        # check that calibration can also deal with regressors that have
-        # a decision_function
-        clf_base_regressor = CalibratedClassifierCV(Ridge())
-        clf_base_regressor.fit(X_train, y_train)
-        clf_base_regressor.predict(X_test)
-
         # Check failure cases:
         # only "isotonic" and "sigmoid" should be accepted as methods
         clf_invalid_method = CalibratedClassifierCV(clf, method="foo")
@@ -293,6 +286,8 @@ def test_calibration_prob_sum():
     probs = clf_prob.predict_proba(X)
     assert_array_almost_equal(probs.sum(axis=1), np.ones(probs.shape[0]))
 
+
+def test_calibration_less_classes():
     # Test to check calibration works fine when train set in a test-train
     # split does not contain all classes
     # Since this test uses LOO, at each iteration train set will not contain a
@@ -300,8 +295,10 @@ def test_calibration_prob_sum():
     X = np.random.randn(10, 5)
     y = np.arange(10)
     clf = LinearSVC(C=1.0)
-    clf_prob = CalibratedClassifierCV(clf, method="sigmoid", cv=LeaveOneOut())
-    clf_prob.fit(X, y)
-    probs = clf_prob.predict_proba(X)
-    n_classes = len(y)
-    assert_array_almost_equal(probs, 1/n_classes)
+    cal_clf = CalibratedClassifierCV(clf, method="sigmoid", cv=LeaveOneOut())
+    cal_clf.fit(X, y)
+
+    for i, calibrated_classifier in \
+            enumerate(cal_clf.calibrated_classifiers_):
+        assert_array_equal(calibrated_classifier.predict_proba(X)[:, i],
+                           np.zeros(len(y)))

From 12be4ff14e82f97d38791eb9cfb84c1986169a79 Mon Sep 17 00:00:00 2001
From: srivatsan-ramesh <sriramesh4@gmail.com>
Date: Sun, 6 Nov 2016 23:03:03 +0530
Subject: [PATCH 12/13] stronger test

---
 sklearn/calibration.py            | 2 +-
 sklearn/tests/test_calibration.py | 6 ++++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/sklearn/calibration.py b/sklearn/calibration.py
index 1bbec9bac912f..b96799f73d13d 100644
--- a/sklearn/calibration.py
+++ b/sklearn/calibration.py
@@ -257,7 +257,7 @@ class _CalibratedClassifier(object):
         corresponds to Platt's method or 'isotonic' which is a
         non-parametric approach based on isotonic regression.
 
-    classes : array-like, shape (n_classes,)
+    classes : array-like, shape (n_classes,), optional
             Contains unique classes used to fit the base estimator.
             if None, then classes is extracted from the given target values
             in fit().
diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py
index 13b131a62b4aa..e4499e35d5a67 100644
--- a/sklearn/tests/test_calibration.py
+++ b/sklearn/tests/test_calibration.py
@@ -300,5 +300,7 @@ def test_calibration_less_classes():
 
     for i, calibrated_classifier in \
             enumerate(cal_clf.calibrated_classifiers_):
-        assert_array_equal(calibrated_classifier.predict_proba(X)[:, i],
-                           np.zeros(len(y)))
+        proba = calibrated_classifier.predict_proba(X)
+        assert_array_equal(proba[:, i], np.zeros(len(y)))
+        assert_equal(np.all(np.hstack([proba[:, :i],
+                                       proba[:, i + 1:]])), True)

From 6d9b675825fa733c177dfed52226b1c7f3a22180 Mon Sep 17 00:00:00 2001
From: srivatsan-ramesh <sriramesh4@gmail.com>
Date: Sun, 6 Nov 2016 23:27:23 +0530
Subject: [PATCH 13/13] whats new entry

---
 doc/whats_new.rst | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/doc/whats_new.rst b/doc/whats_new.rst
index 1911cc5cbde57..d676312e240de 100644
--- a/doc/whats_new.rst
+++ b/doc/whats_new.rst
@@ -138,6 +138,12 @@ Bug fixes
      ``partial_fit`` was less than the total number of classes in the
      data. :issue:`7786` by `Srivatsan Ramesh`_
 
+   - Fixes issue in :class:`calibration.CalibratedClassifierCV` where
+     the sum of probabilities of each class for a data was not 1, and
+     ``CalibratedClassifierCV`` now handles the case where the training set
+     has less number of classes than the total data. :issue:`7799` by
+     `Srivatsan Ramesh`_
+
 
 API changes summary
 -------------------