Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 4e88f3d

Browse files
committed
Optional verbosity for Pipeline
Adds named verbosity argument in Pipeline constructor. After each step, verbose pipelines print to standard output lines like: > [Pipeline] <step name>, <action> (fit or transform), <duration>
1 parent 86b1ba7 commit 4e88f3d

File tree

2 files changed

+60
-8
lines changed

2 files changed

+60
-8
lines changed

sklearn/pipeline.py

+32-6
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,12 @@
1616
from scipy import sparse
1717

1818
from .base import BaseEstimator, TransformerMixin
19-
from .externals.joblib import Parallel, delayed
19+
from .externals.joblib import Parallel, delayed, logger
2020
from .externals import six
2121
from .utils import tosequence
2222
from .utils.metaestimators import if_delegate_has_method
2323
from .externals.six import iteritems
24+
import time
2425

2526
__all__ = ['Pipeline', 'FeatureUnion']
2627

@@ -72,7 +73,8 @@ class Pipeline(BaseEstimator):
7273
>>> # and a parameter 'C' of the svm
7374
>>> anova_svm.set_params(anova__k=10, svc__C=.1).fit(X, y)
7475
... # doctest: +ELLIPSIS
75-
Pipeline(steps=[...])
76+
Pipeline(steps=[...],
77+
verbose=False)
7678
>>> prediction = anova_svm.predict(X)
7779
>>> anova_svm.score(X, y) # doctest: +ELLIPSIS
7880
0.77...
@@ -86,28 +88,51 @@ class Pipeline(BaseEstimator):
8688

8789
# BaseEstimator interface
8890

89-
def __init__(self, steps):
91+
def __init__(self, steps, verbose=False):
9092
names, estimators = zip(*steps)
9193
if len(dict(steps)) != len(steps):
9294
raise ValueError("Provided step names are not unique: %s" % (names,))
9395

9496
# shallow copy of steps
9597
self.steps = tosequence(steps)
98+
self.verbose = verbose
9699
transforms = estimators[:-1]
97100
estimator = estimators[-1]
98101

99-
for t in transforms:
102+
for i, t in enumerate(transforms):
103+
if hasattr(t, "fit"):
104+
transforms[i].fit = self._wrap_timer(t.fit, names[i], "fit")
105+
106+
if hasattr(t, "transform"):
107+
transforms[i].transform = self._wrap_timer(t.transform,
108+
names[i], "transform")
109+
100110
if (not (hasattr(t, "fit") or hasattr(t, "fit_transform")) or not
101111
hasattr(t, "transform")):
102112
raise TypeError("All intermediate steps of the chain should "
103113
"be transforms and implement fit and transform"
104114
" '%s' (type %s) doesn't)" % (t, type(t)))
105115

106-
if not hasattr(estimator, "fit"):
116+
if hasattr(estimator, "fit"):
117+
estimator.fit = self._wrap_timer(estimator.fit, names[-1], "fit")
118+
else:
107119
raise TypeError("Last step of chain should implement fit "
108120
"'%s' (type %s) doesn't)"
109121
% (estimator, type(estimator)))
110122

123+
def _wrap_timer(self, f, name, action):
124+
def timed_f(*args, **kwargs):
125+
start_time = time.time()
126+
ret = f(*args, **kwargs)
127+
elapsed_time = time.time() - start_time
128+
time_str = logger.short_format_time(elapsed_time)
129+
if self.verbose:
130+
print('[Pipeline] %s, %s, %s' % (name, action, time_str))
131+
132+
return ret
133+
134+
return timed_f
135+
111136
@property
112137
def _estimator_type(self):
113138
return self.steps[-1][1]._estimator_type
@@ -379,7 +404,8 @@ def make_pipeline(*steps):
379404
>>> make_pipeline(StandardScaler(), GaussianNB()) # doctest: +NORMALIZE_WHITESPACE
380405
Pipeline(steps=[('standardscaler',
381406
StandardScaler(copy=True, with_mean=True, with_std=True)),
382-
('gaussiannb', GaussianNB())])
407+
('gaussiannb', GaussianNB())],
408+
verbose=False)
383409
384410
Returns
385411
-------

sklearn/tests/test_pipeline.py

+28-2
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
"""
22
Test the pipeline module.
33
"""
4+
import sys
45
import numpy as np
56
from scipy import sparse
67

78
from sklearn.externals.six.moves import zip
9+
from sklearn.externals.six import StringIO
810
from sklearn.utils.testing import assert_raises, assert_raises_regex, assert_raise_message
911
from sklearn.utils.testing import assert_equal
1012
from sklearn.utils.testing import assert_false
@@ -449,7 +451,6 @@ def test_feature_union_feature_names():
449451
assert_true("chars__" in feat or "words__" in feat)
450452
assert_equal(len(feature_names), 35)
451453

452-
453454
def test_classes_property():
454455
iris = load_iris()
455456
X = iris.data
@@ -464,10 +465,35 @@ def test_classes_property():
464465
clf.fit(X, y)
465466
assert_array_equal(clf.classes_, np.unique(y))
466467

467-
468468
def test_X1d_inverse_transform():
469469
transformer = TransfT()
470470
pipeline = make_pipeline(transformer)
471471
X = np.ones(10)
472472
msg = "1d X will not be reshaped in pipeline.inverse_transform"
473473
assert_warns_message(FutureWarning, msg, pipeline.inverse_transform, X)
474+
475+
def test_verbosity():
476+
iris = load_iris()
477+
X = iris.data
478+
y = iris.target
479+
480+
old_stdout = sys.stdout
481+
try:
482+
out = StringIO()
483+
sys.stdout = out
484+
485+
nonverbose_reg = Pipeline([
486+
('kbest', SelectKBest(k=1)), ('lr', LinearRegression())
487+
], verbose=False)
488+
nonverbose_reg.fit(X, y)
489+
assert_true("[Pipeline]" not in out.getvalue())
490+
491+
verbose_reg = Pipeline([
492+
('kbest', SelectKBest(k=1)), ('lr', LinearRegression())
493+
], verbose=True)
494+
verbose_reg.fit(X, y)
495+
assert_true("[Pipeline] kbest, fit" in out.getvalue())
496+
assert_true("[Pipeline] kbest, transform" in out.getvalue())
497+
assert_true("[Pipeline] lr, fit" in out.getvalue())
498+
finally:
499+
sys.stdout = old_stdout

0 commit comments

Comments
 (0)