From d0b11c4c97a85827b1969dea6ca0dd7d698e9f10 Mon Sep 17 00:00:00 2001 From: apantykhin Date: Wed, 25 Apr 2018 00:06:28 +0400 Subject: [PATCH 1/4] set force_all_finite=True for SelectFromModel.transform --- sklearn/feature_selection/base.py | 5 ++++- sklearn/feature_selection/from_model.py | 3 +++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/sklearn/feature_selection/base.py b/sklearn/feature_selection/base.py index 3067d6ef31bc1..9adc10ca1b60b 100644 --- a/sklearn/feature_selection/base.py +++ b/sklearn/feature_selection/base.py @@ -72,7 +72,10 @@ def transform(self, X): X_r : array of shape [n_samples, n_selected_features] The input samples with only the selected features. """ - X = check_array(X, accept_sparse='csr') + return self._transform(X) + + def _transform(self, X, force_all_finite=True): + X = check_array(X, force_all_finite=force_all_finite, accept_sparse='csr') mask = self.get_support() if not mask.any(): warn("No features were selected: either the data is" diff --git a/sklearn/feature_selection/from_model.py b/sklearn/feature_selection/from_model.py index 657259f39ea17..42618a19e3b54 100644 --- a/sklearn/feature_selection/from_model.py +++ b/sklearn/feature_selection/from_model.py @@ -143,6 +143,9 @@ def _get_support_mask(self): threshold = _calculate_threshold(estimator, scores, self.threshold) return scores >= threshold + def transform(self, X): + return self._transform(X, False) + def fit(self, X, y=None, **fit_params): """Fit the SelectFromModel meta-transformer. From 26ef4d0706a31d939f41f3586175eba3db84c772 Mon Sep 17 00:00:00 2001 From: apantykhin Date: Wed, 25 Apr 2018 01:03:15 +0400 Subject: [PATCH 2/4] fix flake8 --- sklearn/feature_selection/base.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sklearn/feature_selection/base.py b/sklearn/feature_selection/base.py index 9adc10ca1b60b..a9239e7fc9a8c 100644 --- a/sklearn/feature_selection/base.py +++ b/sklearn/feature_selection/base.py @@ -75,7 +75,8 @@ def transform(self, X): return self._transform(X) def _transform(self, X, force_all_finite=True): - X = check_array(X, force_all_finite=force_all_finite, accept_sparse='csr') + X = check_array(X, force_all_finite=force_all_finite, + accept_sparse='csr') mask = self.get_support() if not mask.any(): warn("No features were selected: either the data is" From 2642357e0f579635c1e26d5e3576226113d249fd Mon Sep 17 00:00:00 2001 From: apantykhin Date: Wed, 25 Apr 2018 20:23:38 +0400 Subject: [PATCH 3/4] add test --- sklearn/feature_selection/tests/test_from_model.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/sklearn/feature_selection/tests/test_from_model.py b/sklearn/feature_selection/tests/test_from_model.py index 6efec43dce37b..a076e8d5177ab 100644 --- a/sklearn/feature_selection/tests/test_from_model.py +++ b/sklearn/feature_selection/tests/test_from_model.py @@ -198,3 +198,16 @@ def test_threshold_without_refitting(): # Set a higher threshold to filter out more features. model.threshold = "1.0 * mean" assert_greater(X_transform.shape[1], model.transform(data).shape[1]) + + +def test_transform_accepts_infinite_data(): + # Test that transform doesn't check for np.inf and np.nan values. + est = RandomForestClassifier() + model = SelectFromModel(estimator=est) + model.fit(data, y) + + X_len = len(model.get_support()) + X = np.arange(X_len).reshape(1, X_len) + X.fill(np.inf) + + model.transform(X) From f628d5e24c3e09fac61c8b8acb51897a718c860a Mon Sep 17 00:00:00 2001 From: apantykhin Date: Fri, 27 Apr 2018 23:29:54 +0400 Subject: [PATCH 4/4] add test nan --- sklearn/feature_selection/tests/test_from_model.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sklearn/feature_selection/tests/test_from_model.py b/sklearn/feature_selection/tests/test_from_model.py index a076e8d5177ab..d0fde777b13c8 100644 --- a/sklearn/feature_selection/tests/test_from_model.py +++ b/sklearn/feature_selection/tests/test_from_model.py @@ -1,5 +1,6 @@ import numpy as np +from sklearn.impute import SimpleImputer from sklearn.utils.testing import assert_true from sklearn.utils.testing import assert_false from sklearn.utils.testing import assert_equal @@ -202,8 +203,9 @@ def test_threshold_without_refitting(): def test_transform_accepts_infinite_data(): # Test that transform doesn't check for np.inf and np.nan values. - est = RandomForestClassifier() + est = SimpleImputer(strategy='mean', missing_values='NaN') model = SelectFromModel(estimator=est) + data[2, :] = np.nan model.fit(data, y) X_len = len(model.get_support())