From 365778bc74dbf9d0708721cc9528f1e0625e408c Mon Sep 17 00:00:00 2001 From: "tobias.pitters" Date: Tue, 23 Aug 2022 19:52:28 +0200 Subject: [PATCH 1/3] fix docstring for strip-accents-unicode --- sklearn/tests/test_docstrings.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sklearn/tests/test_docstrings.py b/sklearn/tests/test_docstrings.py index 59ae00f2e803a..8ccc7face3699 100644 --- a/sklearn/tests/test_docstrings.py +++ b/sklearn/tests/test_docstrings.py @@ -16,7 +16,6 @@ "sklearn.decomposition._dict_learning.dict_learning_online", "sklearn.decomposition._nmf.non_negative_factorization", "sklearn.externals._packaging.version.parse", - "sklearn.feature_extraction.text.strip_accents_unicode", "sklearn.inspection._plot.partial_dependence.plot_partial_dependence", "sklearn.linear_model._least_angle.lars_path_gram", "sklearn.linear_model._omp.orthogonal_mp_gram", From 255c49def3fe773628590dfe1354f9cb82cb8719 Mon Sep 17 00:00:00 2001 From: "tobias.pitters" Date: Tue, 23 Aug 2022 20:02:10 +0200 Subject: [PATCH 2/3] fix docstring for strip accents unicode --- sklearn/feature_extraction/text.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/sklearn/feature_extraction/text.py b/sklearn/feature_extraction/text.py index 01c8b49370cbf..5b0812ab3845c 100644 --- a/sklearn/feature_extraction/text.py +++ b/sklearn/feature_extraction/text.py @@ -122,7 +122,7 @@ def _analyze( def strip_accents_unicode(s): - """Transform accentuated unicode symbols into their simple counterpart + """Transform accentuated unicode symbols into their simple counterpart. Warning: the python-level loop and join operations make this implementation 20 times slower than the strip_accents_ascii basic @@ -130,13 +130,21 @@ def strip_accents_unicode(s): Parameters ---------- - s : string - The string to strip + s : str + The string to strip. + + Returns + ------- + s : str + The stripped string. See Also -------- strip_accents_ascii : Remove accentuated char for any unicode symbol that has a direct ASCII equivalent. + """ + """_summary_ + """ try: # If `s` is ASCII-compatible, then it does not contain any accented From 7b44c97f5cfda6fa970ccefbc0302283324a3c37 Mon Sep 17 00:00:00 2001 From: Tobias Pitters <31857876+CloseChoice@users.noreply.github.com> Date: Wed, 24 Aug 2022 18:20:15 +0200 Subject: [PATCH 3/3] Update sklearn/feature_extraction/text.py Co-authored-by: Thomas J. Fan --- sklearn/feature_extraction/text.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/sklearn/feature_extraction/text.py b/sklearn/feature_extraction/text.py index 5b0812ab3845c..b10ffa89d66de 100644 --- a/sklearn/feature_extraction/text.py +++ b/sklearn/feature_extraction/text.py @@ -142,9 +142,6 @@ def strip_accents_unicode(s): -------- strip_accents_ascii : Remove accentuated char for any unicode symbol that has a direct ASCII equivalent. - """ - """_summary_ - """ try: # If `s` is ASCII-compatible, then it does not contain any accented