From c15d5c8ea4663574c128ee8c8910e0e722e00a37 Mon Sep 17 00:00:00 2001
From: Sharon Tsao <sharontsao@Sharons-MacBook-Pro.local>
Date: Sat, 29 Sep 2018 12:27:13 -0400
Subject: [PATCH 1/2] tfidfvectorizer documentation

---
 sklearn/feature_extraction/text.py | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/sklearn/feature_extraction/text.py b/sklearn/feature_extraction/text.py
index 05f60d2805c7c..a0cfc6c1e9acc 100644
--- a/sklearn/feature_extraction/text.py
+++ b/sklearn/feature_extraction/text.py
@@ -1286,7 +1286,23 @@ def idf_(self, value):
 class TfidfVectorizer(CountVectorizer):
     """Convert a collection of raw documents to a matrix of TF-IDF features.
 
-    Equivalent to CountVectorizer followed by TfidfTransformer.
+    Equivalent to CountVectorizer followed by TfidfTransformer. 
+
+    CountVectorizer converts a collection of text documents to a matrix of token counts. 
+
+    TfidfTransformer then converts the count matrix from CountVectorizer to a normalized tf-idf representation. Tf is term frequency, and idf is inverse document frequency. This is a common way to calculate the count of a word relative to the appearance of a ducument. 
+
+    The formula that is used to compute the tf-idf of term t is
+    tf-idf(d, t) = tf(t) * idf(d, t), and the idf is computed as
+    idf(d, t) = log [ n / df(d, t) ] + 1 (if ``smooth_idf=False``),
+    where n is the total number of documents and df(d, t) is the
+    document frequency; the document frequency is the number of documents d
+    that contain term t. The effect of adding "1" to the idf in the equation
+    above is that terms with zero idf, i.e., terms  that occur in all documents
+    in a training set, will not be entirely ignored.
+    (Note that the idf formula above differs from the standard
+    textbook notation that defines the idf as
+    idf(d, t) = log [ n / (df(d, t) + 1) ]).
 
     Read more in the :ref:`User Guide <text_feature_extraction>`.
 

From 93ae2d10fed312dbea0d787bd2df83f3db1a3654 Mon Sep 17 00:00:00 2001
From: Sharon Tsao <sharontsao@Sharons-MacBook-Pro.local>
Date: Sat, 29 Sep 2018 13:02:27 -0400
Subject: [PATCH 2/2] adding line breaks

---
 sklearn/feature_extraction/text.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/sklearn/feature_extraction/text.py b/sklearn/feature_extraction/text.py
index a0cfc6c1e9acc..f1b87b8470bf7 100644
--- a/sklearn/feature_extraction/text.py
+++ b/sklearn/feature_extraction/text.py
@@ -1288,9 +1288,13 @@ class TfidfVectorizer(CountVectorizer):
 
     Equivalent to CountVectorizer followed by TfidfTransformer. 
 
-    CountVectorizer converts a collection of text documents to a matrix of token counts. 
+    CountVectorizer converts a collection of text documents to a matrix of 
+    token counts. 
 
-    TfidfTransformer then converts the count matrix from CountVectorizer to a normalized tf-idf representation. Tf is term frequency, and idf is inverse document frequency. This is a common way to calculate the count of a word relative to the appearance of a ducument. 
+    TfidfTransformer then converts the count matrix from CountVectorizer to a 
+    normalized tf-idf representation. Tf is term frequency, and idf is inverse 
+    document frequency. This is a common way to calculate the count of a word 
+    relative to the appearance of a ducument. 
 
     The formula that is used to compute the tf-idf of term t is
     tf-idf(d, t) = tf(t) * idf(d, t), and the idf is computed as