diff --git a/doc/whats_new/v0.22.rst b/doc/whats_new/v0.22.rst index 74cab9b7d5272..571201c10f9e9 100644 --- a/doc/whats_new/v0.22.rst +++ b/doc/whats_new/v0.22.rst @@ -45,6 +45,7 @@ Changelog :pr:`123456` by :user:`Joe Bloggs `. where 123456 is the *pull request* number, not the issue number. + :mod:`sklearn.base` ................... @@ -143,12 +144,17 @@ Changelog :pr:`14114` by :user:`Guillaume Lemaitre `. :mod:`sklearn.feature_extraction` -....................... +................................. - |Fix| Functions created by build_preprocessor and build_analyzer of :class:`feature_extraction.text.VectorizerMixin` can now be pickled. :pr:`14430` by :user:`Dillon Niederhut `. +- |API| Deprecated unused `copy` param for + :meth: `feature_extraction.text.TfidfVectorizer.transform` it will be + removed in v0.24. :pr:`14520` by + :user:`Guillem G. Subies `. + :mod:`sklearn.gaussian_process` ............................... diff --git a/sklearn/feature_extraction/tests/test_text.py b/sklearn/feature_extraction/tests/test_text.py index 2bc1ad25bca63..9b54e16e93baf 100644 --- a/sklearn/feature_extraction/tests/test_text.py +++ b/sklearn/feature_extraction/tests/test_text.py @@ -509,6 +509,18 @@ def test_tfidf_vectorizer_setters(): assert tv._tfidf.sublinear_tf +# FIXME Remove copy parameter support in 0.24 +def test_tfidf_vectorizer_deprecationwarning(): + msg = ("'copy' param is unused and has been deprecated since " + "version 0.22. Backward compatibility for 'copy' will " + "be removed in 0.24.") + with pytest.warns(DeprecationWarning, match=msg): + tv = TfidfVectorizer() + train_data = JUNK_FOOD_DOCS + tv.fit(train_data) + tv.transform(train_data, copy=True) + + @fails_if_pypy def test_hashing_vectorizer(): v = HashingVectorizer() diff --git a/sklearn/feature_extraction/text.py b/sklearn/feature_extraction/text.py index ed4d41cc464f8..8a1670561be46 100644 --- a/sklearn/feature_extraction/text.py +++ b/sklearn/feature_extraction/text.py @@ -1729,7 +1729,7 @@ def fit_transform(self, raw_documents, y=None): # we set copy to False return self._tfidf.transform(X, copy=False) - def transform(self, raw_documents, copy=True): + def transform(self, raw_documents, copy="deprecated"): """Transform documents to document-term matrix. Uses the vocabulary and document frequencies (df) learned by fit (or @@ -1744,6 +1744,11 @@ def transform(self, raw_documents, copy=True): Whether to copy X and operate on the copy or perform in-place operations. + .. deprecated:: 0.22 + The `copy` parameter is unused and was deprecated in version + 0.22 and will be removed in 0.24. This parameter will be + ignored. + Returns ------- X : sparse matrix, [n_samples, n_features] @@ -1751,6 +1756,12 @@ def transform(self, raw_documents, copy=True): """ check_is_fitted(self, '_tfidf', 'The tfidf vector is not fitted') + # FIXME Remove copy parameter support in 0.24 + if copy != "deprecated": + msg = ("'copy' param is unused and has been deprecated since " + "version 0.22. Backward compatibility for 'copy' will " + "be removed in 0.24.") + warnings.warn(msg, DeprecationWarning) X = super().transform(raw_documents) return self._tfidf.transform(X, copy=False)