16
16
from scipy import sparse
17
17
18
18
from .base import BaseEstimator , TransformerMixin
19
- from .externals .joblib import Parallel , delayed
19
+ from .externals .joblib import Parallel , delayed , logger
20
20
from .externals import six
21
21
from .utils import tosequence
22
22
from .utils .metaestimators import if_delegate_has_method
23
23
from .externals .six import iteritems
24
+ import time
24
25
25
26
__all__ = ['Pipeline' , 'FeatureUnion' ]
26
27
@@ -72,7 +73,8 @@ class Pipeline(BaseEstimator):
72
73
>>> # and a parameter 'C' of the svm
73
74
>>> anova_svm.set_params(anova__k=10, svc__C=.1).fit(X, y)
74
75
... # doctest: +ELLIPSIS
75
- Pipeline(steps=[...])
76
+ Pipeline(steps=[...],
77
+ verbose=False)
76
78
>>> prediction = anova_svm.predict(X)
77
79
>>> anova_svm.score(X, y) # doctest: +ELLIPSIS
78
80
0.77...
@@ -86,28 +88,51 @@ class Pipeline(BaseEstimator):
86
88
87
89
# BaseEstimator interface
88
90
89
- def __init__ (self , steps ):
91
+ def __init__ (self , steps , verbose = False ):
90
92
names , estimators = zip (* steps )
91
93
if len (dict (steps )) != len (steps ):
92
94
raise ValueError ("Provided step names are not unique: %s" % (names ,))
93
95
94
96
# shallow copy of steps
95
97
self .steps = tosequence (steps )
98
+ self .verbose = verbose
96
99
transforms = estimators [:- 1 ]
97
100
estimator = estimators [- 1 ]
98
101
99
- for t in transforms :
102
+ for i , t in enumerate (transforms ):
103
+ if hasattr (t , "fit" ):
104
+ transforms [i ].fit = self ._wrap_timer (t .fit , names [i ], "fit" )
105
+
106
+ if hasattr (t , "transform" ):
107
+ transforms [i ].transform = self ._wrap_timer (t .transform ,
108
+ names [i ], "transform" )
109
+
100
110
if (not (hasattr (t , "fit" ) or hasattr (t , "fit_transform" )) or not
101
111
hasattr (t , "transform" )):
102
112
raise TypeError ("All intermediate steps of the chain should "
103
113
"be transforms and implement fit and transform"
104
114
" '%s' (type %s) doesn't)" % (t , type (t )))
105
115
106
- if not hasattr (estimator , "fit" ):
116
+ if hasattr (estimator , "fit" ):
117
+ estimator .fit = self ._wrap_timer (estimator .fit , names [- 1 ], "fit" )
118
+ else :
107
119
raise TypeError ("Last step of chain should implement fit "
108
120
"'%s' (type %s) doesn't)"
109
121
% (estimator , type (estimator )))
110
122
123
+ def _wrap_timer (self , f , name , action ):
124
+ def timed_f (* args , ** kwargs ):
125
+ start_time = time .time ()
126
+ ret = f (* args , ** kwargs )
127
+ elapsed_time = time .time () - start_time
128
+ time_str = logger .short_format_time (elapsed_time )
129
+ if self .verbose :
130
+ print ('[Pipeline] %s, %s, %s' % (name , action , time_str ))
131
+
132
+ return ret
133
+
134
+ return timed_f
135
+
111
136
@property
112
137
def _estimator_type (self ):
113
138
return self .steps [- 1 ][1 ]._estimator_type
@@ -379,7 +404,8 @@ def make_pipeline(*steps):
379
404
>>> make_pipeline(StandardScaler(), GaussianNB()) # doctest: +NORMALIZE_WHITESPACE
380
405
Pipeline(steps=[('standardscaler',
381
406
StandardScaler(copy=True, with_mean=True, with_std=True)),
382
- ('gaussiannb', GaussianNB())])
407
+ ('gaussiannb', GaussianNB())],
408
+ verbose=False)
383
409
384
410
Returns
385
411
-------
0 commit comments