Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit c1fa16f

Browse files
ogriselamueller
authored andcommitted
ENH no need for tie breaking jitter in calibration
The isotonic regression routine now implements deterministic tie-breaking by default.
1 parent ab556be commit c1fa16f

File tree

2 files changed

+5
-26
lines changed

2 files changed

+5
-26
lines changed

sklearn/calibration.py

Lines changed: 4 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818

1919
from .base import BaseEstimator, ClassifierMixin, RegressorMixin, clone
2020
from .preprocessing import LabelBinarizer
21-
from .utils import check_random_state
2221
from .utils import check_X_y, check_array, indexable, column_or_1d
2322
from .utils.validation import check_is_fitted
2423
from .isotonic import IsotonicRegression
@@ -59,9 +58,6 @@ class CalibratedClassifierCV(BaseEstimator, ClassifierMixin):
5958
If "prefit" is passed, it is assumed that base_estimator has been
6059
fitted already and all data is used for calibration.
6160
62-
random_state : int, RandomState instance or None (default=None)
63-
Used to randomly break ties when method is 'isotonic'.
64-
6561
Attributes
6662
----------
6763
classes_ : array, shape (n_classes)
@@ -86,12 +82,10 @@ class CalibratedClassifierCV(BaseEstimator, ClassifierMixin):
8682
.. [4] Predicting Good Probabilities with Supervised Learning,
8783
A. Niculescu-Mizil & R. Caruana, ICML 2005
8884
"""
89-
def __init__(self, base_estimator=None, method='sigmoid', cv=3,
90-
random_state=None):
85+
def __init__(self, base_estimator=None, method='sigmoid', cv=3):
9186
self.base_estimator = base_estimator
9287
self.method = method
9388
self.cv = cv
94-
self.random_state = random_state
9589

9690
def fit(self, X, y, sample_weight=None):
9791
"""Fit the calibrated model
@@ -116,7 +110,6 @@ def fit(self, X, y, sample_weight=None):
116110
X, y = indexable(X, y)
117111
lb = LabelBinarizer().fit(y)
118112
self.classes_ = lb.classes_
119-
random_state = check_random_state(self.random_state)
120113

121114
# Check that we each cross-validation fold can have at least one
122115
# example per class
@@ -136,7 +129,7 @@ def fit(self, X, y, sample_weight=None):
136129

137130
if self.cv == "prefit":
138131
calibrated_classifier = _CalibratedClassifier(
139-
base_estimator, method=self.method, random_state=random_state)
132+
base_estimator, method=self.method)
140133
if sample_weight is not None:
141134
calibrated_classifier.fit(X, y, sample_weight)
142135
else:
@@ -164,8 +157,7 @@ def fit(self, X, y, sample_weight=None):
164157
this_estimator.fit(X[train], y[train])
165158

166159
calibrated_classifier = _CalibratedClassifier(
167-
this_estimator, method=self.method,
168-
random_state=random_state)
160+
this_estimator, method=self.method)
169161
if sample_weight is not None:
170162
calibrated_classifier.fit(X[test], y[test],
171163
sample_weight[test])
@@ -242,9 +234,6 @@ class _CalibratedClassifier(object):
242234
corresponds to Platt's method or 'isotonic' which is a
243235
non-parameteric approach based on isotonic regression.
244236
245-
random_state : int, RandomState instance or None (default=None)
246-
Used to randomly break ties when method is 'isotonic'.
247-
248237
References
249238
----------
250239
.. [1] Obtaining calibrated probability estimates from decision trees
@@ -259,11 +248,9 @@ class _CalibratedClassifier(object):
259248
.. [4] Predicting Good Probabilities with Supervised Learning,
260249
A. Niculescu-Mizil & R. Caruana, ICML 2005
261250
"""
262-
def __init__(self, base_estimator, method='sigmoid',
263-
random_state=None):
251+
def __init__(self, base_estimator, method='sigmoid'):
264252
self.base_estimator = base_estimator
265253
self.method = method
266-
self.random_state = random_state
267254

268255
def _preproc(self, X):
269256
n_classes = len(self.classes_)
@@ -312,13 +299,6 @@ def fit(self, X, y, sample_weight=None):
312299
for k, this_df in zip(idx_pos_class, df.T):
313300
if self.method == 'isotonic':
314301
calibrator = IsotonicRegression(out_of_bounds='clip')
315-
# XXX: isotonic regression cannot deal correctly with
316-
# situations in which multiple inputs are identical but
317-
# have different outputs. Since this is not untypical
318-
# when calibrating, we add some small random jitter to
319-
# the inputs.
320-
jitter = self.random_state.normal(0, 1e-10, this_df.shape[0])
321-
this_df = this_df + jitter
322302
elif self.method == 'sigmoid':
323303
calibrator = _SigmoidCalibration()
324304
else:

sklearn/tests/test_calibration.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -114,8 +114,7 @@ def test_sample_weight_warning():
114114

115115
for method in ['sigmoid', 'isotonic']:
116116
base_estimator = LinearSVC(random_state=42)
117-
calibrated_clf = CalibratedClassifierCV(base_estimator, method=method,
118-
random_state=42)
117+
calibrated_clf = CalibratedClassifierCV(base_estimator, method=method)
119118
# LinearSVC does not currently support sample weights but they
120119
# can still be used for the calibration step (with a warning)
121120
msg = "LinearSVC does not support sample_weight."

0 commit comments

Comments
 (0)