diff --git a/sklearn/feature_extraction/tests/test_text.py b/sklearn/feature_extraction/tests/test_text.py index f8f741a862594..3ccda942b65e1 100644 --- a/sklearn/feature_extraction/tests/test_text.py +++ b/sklearn/feature_extraction/tests/test_text.py @@ -30,7 +30,6 @@ from numpy.testing import assert_array_almost_equal from numpy.testing import assert_array_equal from sklearn.utils import IS_PYPY -from sklearn.exceptions import ChangedBehaviorWarning from sklearn.utils._testing import (assert_almost_equal, assert_warns_message, assert_raise_message, clean_warning_registry, @@ -1294,12 +1293,8 @@ def test_callable_analyzer_error(Estimator, input_type, err_type, err_msg): @pytest.mark.parametrize('input_type', ['file', 'filename']) def test_callable_analyzer_change_behavior(Estimator, analyzer, input_type): data = ['this is text, not file or filename'] - warn_msg = 'Since v0.21, vectorizer' with pytest.raises((FileNotFoundError, AttributeError)): - with pytest.warns(ChangedBehaviorWarning, match=warn_msg) as records: - Estimator(analyzer=analyzer, input=input_type).fit_transform(data) - assert len(records) == 1 - assert warn_msg in str(records[0]) + Estimator(analyzer=analyzer, input=input_type).fit_transform(data) @pytest.mark.parametrize( diff --git a/sklearn/feature_extraction/text.py b/sklearn/feature_extraction/text.py index 5b127a10962bc..4954329728d5e 100644 --- a/sklearn/feature_extraction/text.py +++ b/sklearn/feature_extraction/text.py @@ -32,7 +32,7 @@ from ..utils.validation import check_is_fitted, check_array, FLOAT_DTYPES from ..utils import _IS_32BIT, deprecated from ..utils.fixes import _astype_copy_false -from ..exceptions import ChangedBehaviorWarning, NotFittedError +from ..exceptions import NotFittedError __all__ = ['HashingVectorizer', @@ -390,28 +390,6 @@ def _check_stop_words_consistency(self, stop_words, preprocess, tokenize): self._stop_words_id = id(self.stop_words) return 'error' - def _validate_custom_analyzer(self): - # This is to check if the given custom analyzer expects file or a - # filename instead of data. - # Behavior changed in v0.21, function could be removed in v0.23 - import tempfile - with tempfile.NamedTemporaryFile() as f: - fname = f.name - # now we're sure fname doesn't exist - - msg = ("Since v0.21, vectorizers pass the data to the custom analyzer " - "and not the file names or the file objects. This warning " - "will be removed in v0.23.") - try: - self.analyzer(fname) - except FileNotFoundError: - warnings.warn(msg, ChangedBehaviorWarning) - except AttributeError as e: - if str(e) == "'str' object has no attribute 'read'": - warnings.warn(msg, ChangedBehaviorWarning) - except Exception: - pass - def build_analyzer(self): """Return a callable that handles preprocessing, tokenization and n-grams generation. @@ -424,8 +402,6 @@ def build_analyzer(self): """ if callable(self.analyzer): - if self.input in ['file', 'filename']: - self._validate_custom_analyzer() return partial( _analyze, analyzer=self.analyzer, decoder=self.decode )