Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 63fc8a8

Browse files
committed
Moved timing code out of closure into _pre_transform
Removed closure on fit and transform, so timing code is inserted into `_pre_transform`, and separately for the estimator's `fit` and `fit_transform`. This aggregates timing information to the step level.
1 parent 4e88f3d commit 63fc8a8

File tree

3 files changed

+28
-32
lines changed

3 files changed

+28
-32
lines changed

doc/modules/pipeline.rst

+6-3
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,8 @@ is an estimator object::
4444
whiten=False)), ('svm', SVC(C=1.0, cache_size=200, class_weight=None,
4545
coef0=0.0, decision_function_shape=None, degree=3, gamma='auto',
4646
kernel='rbf', max_iter=-1, probability=False, random_state=None,
47-
shrinking=True, tol=0.001, verbose=False))])
47+
shrinking=True, tol=0.001, verbose=False))],
48+
verbose=False)
4849

4950
The utility function :func:`make_pipeline` is a shorthand
5051
for constructing pipelines;
@@ -58,7 +59,8 @@ filling in the names automatically::
5859
Pipeline(steps=[('binarizer', Binarizer(copy=True, threshold=0.0)),
5960
('multinomialnb', MultinomialNB(alpha=1.0,
6061
class_prior=None,
61-
fit_prior=True))])
62+
fit_prior=True))],
63+
verbose=False)
6264

6365
The estimators of a pipeline are stored as a list in the ``steps`` attribute::
6466

@@ -78,7 +80,8 @@ Parameters of the estimators in the pipeline can be accessed using the
7880
whiten=False)), ('svm', SVC(C=10, cache_size=200, class_weight=None,
7981
coef0=0.0, decision_function_shape=None, degree=3, gamma='auto',
8082
kernel='rbf', max_iter=-1, probability=False, random_state=None,
81-
shrinking=True, tol=0.001, verbose=False))])
83+
shrinking=True, tol=0.001, verbose=False))],
84+
verbose=False)
8285

8386
This is particularly important for doing grid searches::
8487

sklearn/pipeline.py

+20-26
Original file line numberDiff line numberDiff line change
@@ -99,40 +99,18 @@ def __init__(self, steps, verbose=False):
9999
transforms = estimators[:-1]
100100
estimator = estimators[-1]
101101

102-
for i, t in enumerate(transforms):
103-
if hasattr(t, "fit"):
104-
transforms[i].fit = self._wrap_timer(t.fit, names[i], "fit")
105-
106-
if hasattr(t, "transform"):
107-
transforms[i].transform = self._wrap_timer(t.transform,
108-
names[i], "transform")
109-
102+
for t in transforms:
110103
if (not (hasattr(t, "fit") or hasattr(t, "fit_transform")) or not
111104
hasattr(t, "transform")):
112105
raise TypeError("All intermediate steps of the chain should "
113106
"be transforms and implement fit and transform"
114107
" '%s' (type %s) doesn't)" % (t, type(t)))
115108

116-
if hasattr(estimator, "fit"):
117-
estimator.fit = self._wrap_timer(estimator.fit, names[-1], "fit")
118-
else:
109+
if not hasattr(estimator, "fit"):
119110
raise TypeError("Last step of chain should implement fit "
120111
"'%s' (type %s) doesn't)"
121112
% (estimator, type(estimator)))
122113

123-
def _wrap_timer(self, f, name, action):
124-
def timed_f(*args, **kwargs):
125-
start_time = time.time()
126-
ret = f(*args, **kwargs)
127-
elapsed_time = time.time() - start_time
128-
time_str = logger.short_format_time(elapsed_time)
129-
if self.verbose:
130-
print('[Pipeline] %s, %s, %s' % (name, action, time_str))
131-
132-
return ret
133-
134-
return timed_f
135-
136114
@property
137115
def _estimator_type(self):
138116
return self.steps[-1][1]._estimator_type
@@ -166,11 +144,16 @@ def _pre_transform(self, X, y=None, **fit_params):
166144
fit_params_steps[step][param] = pval
167145
Xt = X
168146
for name, transform in self.steps[:-1]:
147+
start_time = time.time()
169148
if hasattr(transform, "fit_transform"):
170149
Xt = transform.fit_transform(Xt, y, **fit_params_steps[name])
171150
else:
172151
Xt = transform.fit(Xt, y, **fit_params_steps[name]) \
173152
.transform(Xt)
153+
if self.verbose:
154+
elapsed = time.time() - start_time
155+
print('[Pipeline] %s ... %s' % (name, elapsed))
156+
174157
return Xt, fit_params_steps[self.steps[-1][0]]
175158

176159
def fit(self, X, y=None, **fit_params):
@@ -187,7 +170,12 @@ def fit(self, X, y=None, **fit_params):
187170
the pipeline.
188171
"""
189172
Xt, fit_params = self._pre_transform(X, y, **fit_params)
173+
start_time = time.time()
190174
self.steps[-1][-1].fit(Xt, y, **fit_params)
175+
if self.verbose:
176+
elapsed = time.time() - start_time
177+
print('[Pipeline] %s ... %s' % (self.steps[-1][0], elapsed))
178+
191179
return self
192180

193181
def fit_transform(self, X, y=None, **fit_params):
@@ -206,10 +194,16 @@ def fit_transform(self, X, y=None, **fit_params):
206194
the pipeline.
207195
"""
208196
Xt, fit_params = self._pre_transform(X, y, **fit_params)
197+
start_time = time.time()
209198
if hasattr(self.steps[-1][-1], 'fit_transform'):
210-
return self.steps[-1][-1].fit_transform(Xt, y, **fit_params)
199+
ret = self.steps[-1][-1].fit_transform(Xt, y, **fit_params)
211200
else:
212-
return self.steps[-1][-1].fit(Xt, y, **fit_params).transform(Xt)
201+
ret = self.steps[-1][-1].fit(Xt, y, **fit_params).transform(Xt)
202+
203+
if self.verbose:
204+
elapsed = time.time() - start_time
205+
print('[Pipeline] %s ... %s' % (self.steps[-1][0], elapsed))
206+
return ret
213207

214208
@if_delegate_has_method(delegate='_final_estimator')
215209
def predict(self, X):

sklearn/tests/test_pipeline.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -492,8 +492,7 @@ def test_verbosity():
492492
('kbest', SelectKBest(k=1)), ('lr', LinearRegression())
493493
], verbose=True)
494494
verbose_reg.fit(X, y)
495-
assert_true("[Pipeline] kbest, fit" in out.getvalue())
496-
assert_true("[Pipeline] kbest, transform" in out.getvalue())
497-
assert_true("[Pipeline] lr, fit" in out.getvalue())
495+
assert_true("[Pipeline] kbest ..." in out.getvalue())
496+
assert_true("[Pipeline] lr ..." in out.getvalue())
498497
finally:
499498
sys.stdout = old_stdout

0 commit comments

Comments
 (0)