From fe538a55a0d08d9863e32e988c80467616ec4f83 Mon Sep 17 00:00:00 2001 From: Tialo Date: Mon, 31 Jul 2023 15:00:58 +0300 Subject: [PATCH 1/2] added links to "plot_hashing_vs_dict_vectorizer" example --- sklearn/feature_extraction/_dict_vectorizer.py | 3 +++ sklearn/feature_extraction/_hash.py | 3 +++ sklearn/feature_extraction/text.py | 12 ++++++++++++ 3 files changed, 18 insertions(+) diff --git a/sklearn/feature_extraction/_dict_vectorizer.py b/sklearn/feature_extraction/_dict_vectorizer.py index e32de4be42462..0b9ea2f202cb0 100644 --- a/sklearn/feature_extraction/_dict_vectorizer.py +++ b/sklearn/feature_extraction/_dict_vectorizer.py @@ -42,6 +42,9 @@ class DictVectorizer(TransformerMixin, BaseEstimator): Features that do not occur in a sample (mapping) will have a zero value in the resulting array/matrix. + For an efficiency comparision of the different feature extractors, see + :ref:`sphx_glr_auto_examples_text_plot_hashing_vs_dict_vectorizer.py`. + Read more in the :ref:`User Guide `. Parameters diff --git a/sklearn/feature_extraction/_hash.py b/sklearn/feature_extraction/_hash.py index e0941ed1dac97..2019552ae65bb 100644 --- a/sklearn/feature_extraction/_hash.py +++ b/sklearn/feature_extraction/_hash.py @@ -34,6 +34,9 @@ class FeatureHasher(TransformerMixin, BaseEstimator): where memory is tight, e.g. when running prediction code on embedded devices. + For an efficiency comparision of the different feature extractors, see + :ref:`sphx_glr_auto_examples_text_plot_hashing_vs_dict_vectorizer.py`. + Read more in the :ref:`User Guide `. .. versionadded:: 0.13 diff --git a/sklearn/feature_extraction/text.py b/sklearn/feature_extraction/text.py index c6d826539ef49..81bcfed33ed18 100644 --- a/sklearn/feature_extraction/text.py +++ b/sklearn/feature_extraction/text.py @@ -602,6 +602,9 @@ class HashingVectorizer( The hash function employed is the signed 32-bit version of Murmurhash3. + For an efficiency comparision of the different feature extractors, see + :ref:`sphx_glr_auto_examples_text_plot_hashing_vs_dict_vectorizer.py`. + Read more in the :ref:`User Guide `. Parameters @@ -933,6 +936,9 @@ class CountVectorizer(_VectorizerMixin, BaseEstimator): that does some kind of feature selection then the number of features will be equal to the vocabulary size found by analyzing the data. + For an efficiency comparision of the different feature extractors, see + :ref:`sphx_glr_auto_examples_text_plot_hashing_vs_dict_vectorizer.py`. + Read more in the :ref:`User Guide `. Parameters @@ -1534,6 +1540,9 @@ class TfidfTransformer( Normalization is "c" (cosine) when ``norm='l2'``, "n" (none) when ``norm=None``. + For an efficiency comparision of the different feature extractors, see + :ref:`sphx_glr_auto_examples_text_plot_hashing_vs_dict_vectorizer.py`. + Read more in the :ref:`User Guide `. Parameters @@ -1758,6 +1767,9 @@ class TfidfVectorizer(CountVectorizer): For an example of usage, see :ref:`sphx_glr_auto_examples_text_plot_document_classification_20newsgroups.py`. + For an efficiency comparision of the different feature extractors, see + :ref:`sphx_glr_auto_examples_text_plot_hashing_vs_dict_vectorizer.py`. + Read more in the :ref:`User Guide `. Parameters From d8e0d3505bf0b760d6a3eb023374aa7ca099736b Mon Sep 17 00:00:00 2001 From: Tialo Date: Sat, 12 Aug 2023 17:12:38 +0300 Subject: [PATCH 2/2] deleted link from TfidfTransformer --- sklearn/feature_extraction/text.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/sklearn/feature_extraction/text.py b/sklearn/feature_extraction/text.py index 81bcfed33ed18..b2ef28de75766 100644 --- a/sklearn/feature_extraction/text.py +++ b/sklearn/feature_extraction/text.py @@ -1540,9 +1540,6 @@ class TfidfTransformer( Normalization is "c" (cosine) when ``norm='l2'``, "n" (none) when ``norm=None``. - For an efficiency comparision of the different feature extractors, see - :ref:`sphx_glr_auto_examples_text_plot_hashing_vs_dict_vectorizer.py`. - Read more in the :ref:`User Guide `. Parameters