Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 26cd806

Browse files
committed
Merge pull request scikit-learn#4345 from bendavies/20newsgroups_example
update 20newsgroups text classification example for best practice
2 parents 9ce8322 + 3d8114e commit 26cd806

File tree

1 file changed

+7
-17
lines changed

1 file changed

+7
-17
lines changed

examples/text/document_classification_20newsgroups.py

Lines changed: 7 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
from sklearn.feature_extraction.text import HashingVectorizer
3737
from sklearn.feature_selection import SelectKBest, chi2
3838
from sklearn.linear_model import RidgeClassifier
39+
from sklearn.pipeline import Pipeline
3940
from sklearn.svm import LinearSVC
4041
from sklearn.linear_model import SGDClassifier
4142
from sklearn.linear_model import Perceptron
@@ -276,25 +277,14 @@ def benchmark(clf):
276277
results.append(benchmark(MultinomialNB(alpha=.01)))
277278
results.append(benchmark(BernoulliNB(alpha=.01)))
278279

279-
280-
class L1LinearSVC(LinearSVC):
281-
282-
def fit(self, X, y):
283-
# The smaller C, the stronger the regularization.
284-
# The more regularization, the more sparsity.
285-
self.transformer_ = LinearSVC(penalty="l1",
286-
dual=False, tol=1e-3)
287-
X = self.transformer_.fit_transform(X, y)
288-
return LinearSVC.fit(self, X, y)
289-
290-
def predict(self, X):
291-
X = self.transformer_.transform(X)
292-
return LinearSVC.predict(self, X)
293-
294280
print('=' * 80)
295281
print("LinearSVC with L1-based feature selection")
296-
results.append(benchmark(L1LinearSVC()))
297-
282+
# The smaller C, the stronger the regularization.
283+
# The more regularization, the more sparsity.
284+
results.append(benchmark(Pipeline([
285+
('feature_selection', LinearSVC(penalty="l1", dual=False, tol=1e-3)),
286+
('classification', LinearSVC())
287+
])))
298288

299289
# make some plots
300290

0 commit comments

Comments
 (0)