diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index 6c98ea70efad8..348ae8df2173f 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -356,7 +356,7 @@ def fit_predict(self, X, y=None, **fit_params): Xt = X for name, transform in self.steps[:-1]: if transform is not None: - Xt = transform.transform(Xt) + Xt = transform.fit_transform(Xt) return self.steps[-1][-1].fit_predict(Xt, y, **fit_params) @if_delegate_has_method(delegate='_final_estimator') diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py index 518c966169a28..325e9112c14cb 100644 --- a/sklearn/tests/test_pipeline.py +++ b/sklearn/tests/test_pipeline.py @@ -278,13 +278,20 @@ def test_fit_predict_on_pipeline(): iris = load_iris() scaler = StandardScaler() km = KMeans(random_state=0) + # As pipeline doesn't clone estimators on construction, + # it must have its own estimators + scaler_for_pipeline = StandardScaler() + km_for_pipeline = KMeans(random_state=0) # first compute the transform and clustering step separately scaled = scaler.fit_transform(iris.data) separate_pred = km.fit_predict(scaled) # use a pipeline to do the transform and clustering in one step - pipe = Pipeline([('scaler', scaler), ('Kmeans', km)]) + pipe = Pipeline([ + ('scaler', scaler_for_pipeline), + ('Kmeans', km_for_pipeline) + ]) pipeline_pred = pipe.fit_predict(iris.data) assert_array_almost_equal(pipeline_pred, separate_pred)