scikit-learn · qinhanmin2014 · Jun 4, 2018 · May 26, 2018 · May 26, 2018 · May 29, 2018
diff --git a/sklearn/feature_extraction/tests/test_dict_vectorizer.py b/sklearn/feature_extraction/tests/test_dict_vectorizer.py
@@ -5,43 +5,45 @@
 from random import Random
 import numpy as np
 import scipy.sparse as sp
-
 from numpy.testing import assert_array_equal
+
+import pytest
+
 from sklearn.utils.testing import (assert_equal, assert_in,
                                    assert_false, assert_true)
 
 from sklearn.feature_extraction import DictVectorizer
 from sklearn.feature_selection import SelectKBest, chi2
 
 
-def test_dictvectorizer():
+@pytest.mark.parametrize('sparse', (True, False))
+@pytest.mark.parametrize('dtype', (int, np.float32, np.int16))
+@pytest.mark.parametrize('sort', (True, False))
+@pytest.mark.parametrize('iterable', (True, False))
+def test_dictvectorizer(sparse, dtype, sort, iterable):
     D = [{"foo": 1, "bar": 3},
          {"bar": 4, "baz": 2},
          {"bar": 1, "quux": 1, "quuux": 2}]
 
-    for sparse in (True, False):
-        for dtype in (int, np.float32, np.int16):
-            for sort in (True, False):
-                for iterable in (True, False):
-                    v = DictVectorizer(sparse=sparse, dtype=dtype, sort=sort)
-                    X = v.fit_transform(iter(D) if iterable else D)
-
-                    assert_equal(sp.issparse(X), sparse)
-                    assert_equal(X.shape, (3, 5))
-                    assert_equal(X.sum(), 14)
-                    assert_equal(v.inverse_transform(X), D)
-
-                    if sparse:
-                        # CSR matrices can't be compared for equality
-                        assert_array_equal(X.A, v.transform(iter(D) if iterable
-                                                            else D).A)
-                    else:
-                        assert_array_equal(X, v.transform(iter(D) if iterable
-                                                          else D))
-
-                    if sort:
-                        assert_equal(v.feature_names_,
-                                     sorted(v.feature_names_))
+    v = DictVectorizer(sparse=sparse, dtype=dtype, sort=sort)
+    X = v.fit_transform(iter(D) if iterable else D)
+
+    assert_equal(sp.issparse(X), sparse)
+    assert_equal(X.shape, (3, 5))
+    assert_equal(X.sum(), 14)
+    assert_equal(v.inverse_transform(X), D)
+
+    if sparse:
+        # CSR matrices can't be compared for equality
+        assert_array_equal(X.A, v.transform(iter(D) if iterable
+                                            else D).A)
+    else:
+        assert_array_equal(X, v.transform(iter(D) if iterable
+                                          else D))
+
+    if sort:
+        assert_equal(v.feature_names_,
+                     sorted(v.feature_names_))
 
 
 def test_feature_selection():

diff --git a/sklearn/feature_extraction/tests/test_text.py b/sklearn/feature_extraction/tests/test_text.py
@@ -115,42 +115,42 @@ def test_to_ascii():
     assert_equal(strip_accents_ascii(a), expected)
 
 
-def test_word_analyzer_unigrams():
-    for Vectorizer in (CountVectorizer, HashingVectorizer):
-        wa = Vectorizer(strip_accents='ascii').build_analyzer()
-        text = ("J'ai mang\xe9 du kangourou  ce midi, "
-                "c'\xe9tait pas tr\xeas bon.")
-        expected = ['ai', 'mange', 'du', 'kangourou', 'ce', 'midi',
-                    'etait', 'pas', 'tres', 'bon']
-        assert_equal(wa(text), expected)
-
-        text = "This is a test, really.\n\n I met Harry yesterday."
-        expected = ['this', 'is', 'test', 'really', 'met', 'harry',
-                    'yesterday']
-        assert_equal(wa(text), expected)
-
-        wa = Vectorizer(input='file').build_analyzer()
-        text = StringIO("This is a test with a file-like object!")
-        expected = ['this', 'is', 'test', 'with', 'file', 'like',
-                    'object']
-        assert_equal(wa(text), expected)
-
-        # with custom preprocessor
-        wa = Vectorizer(preprocessor=uppercase).build_analyzer()
-        text = ("J'ai mang\xe9 du kangourou  ce midi, "
-                " c'\xe9tait pas tr\xeas bon.")
-        expected = ['AI', 'MANGE', 'DU', 'KANGOUROU', 'CE', 'MIDI',
-                    'ETAIT', 'PAS', 'TRES', 'BON']
-        assert_equal(wa(text), expected)
-
-        # with custom tokenizer
-        wa = Vectorizer(tokenizer=split_tokenize,
-                        strip_accents='ascii').build_analyzer()
-        text = ("J'ai mang\xe9 du kangourou  ce midi, "
-                "c'\xe9tait pas tr\xeas bon.")
-        expected = ["j'ai", 'mange', 'du', 'kangourou', 'ce', 'midi,',
-                    "c'etait", 'pas', 'tres', 'bon.']
-        assert_equal(wa(text), expected)
+@pytest.mark.parametrize('Vectorizer', (CountVectorizer, HashingVectorizer))
+def test_word_analyzer_unigrams(Vectorizer):
+    wa = Vectorizer(strip_accents='ascii').build_analyzer()
+    text = ("J'ai mang\xe9 du kangourou  ce midi, "
+            "c'\xe9tait pas tr\xeas bon.")
+    expected = ['ai', 'mange', 'du', 'kangourou', 'ce', 'midi',
+                'etait', 'pas', 'tres', 'bon']
+    assert_equal(wa(text), expected)
+
+    text = "This is a test, really.\n\n I met Harry yesterday."
+    expected = ['this', 'is', 'test', 'really', 'met', 'harry',
+                'yesterday']
+    assert_equal(wa(text), expected)
+
+    wa = Vectorizer(input='file').build_analyzer()
+    text = StringIO("This is a test with a file-like object!")
+    expected = ['this', 'is', 'test', 'with', 'file', 'like',
+                'object']
+    assert_equal(wa(text), expected)
+
+    # with custom preprocessor
+    wa = Vectorizer(preprocessor=uppercase).build_analyzer()
+    text = ("J'ai mang\xe9 du kangourou  ce midi, "
+            " c'\xe9tait pas tr\xeas bon.")
+    expected = ['AI', 'MANGE', 'DU', 'KANGOUROU', 'CE', 'MIDI',
+                'ETAIT', 'PAS', 'TRES', 'BON']
+    assert_equal(wa(text), expected)
+
+    # with custom tokenizer
+    wa = Vectorizer(tokenizer=split_tokenize,
+                    strip_accents='ascii').build_analyzer()
+    text = ("J'ai mang\xe9 du kangourou  ce midi, "
+            "c'\xe9tait pas tr\xeas bon.")
+    expected = ["j'ai", 'mange', 'du', 'kangourou', 'ce', 'midi,',
+                "c'etait", 'pas', 'tres', 'bon.']
+    assert_equal(wa(text), expected)
 
 
 def test_word_analyzer_unigrams_and_bigrams():
@@ -574,22 +574,17 @@ def test_feature_names():
         assert_equal(idx, cv.vocabulary_.get(name))
 
 
-def test_vectorizer_max_features():
-    vec_factories = (
-        CountVectorizer,
-        TfidfVectorizer,
-    )
-
+@pytest.mark.parametrize('Vectorizer', (CountVectorizer, TfidfVectorizer))
+def test_vectorizer_max_features(Vectorizer):
     expected_vocabulary = set(['burger', 'beer', 'salad', 'pizza'])
     expected_stop_words = set([u'celeri', u'tomato', u'copyright', u'coke',
                                u'sparkling', u'water', u'the'])
 
-    for vec_factory in vec_factories:
-        # test bounded number of extracted features
-        vectorizer = vec_factory(max_df=0.6, max_features=4)
-        vectorizer.fit(ALL_FOOD_DOCS)
-        assert_equal(set(vectorizer.vocabulary_), expected_vocabulary)
-        assert_equal(vectorizer.stop_words_, expected_stop_words)
+    # test bounded number of extracted features
+    vectorizer = Vectorizer(max_df=0.6, max_features=4)
+    vectorizer.fit(ALL_FOOD_DOCS)
+    assert_equal(set(vectorizer.vocabulary_), expected_vocabulary)
+    assert_equal(vectorizer.stop_words_, expected_stop_words)
 
 
 def test_count_vectorizer_max_features():
@@ -713,23 +708,24 @@ def test_hashed_binary_occurrences():
     assert_equal(X.dtype, np.float64)
 
 
-def test_vectorizer_inverse_transform():
+@pytest.mark.parametrize('Vectorizer', (CountVectorizer, TfidfVectorizer))
+def test_vectorizer_inverse_transform(Vectorizer):
     # raw documents
     data = ALL_FOOD_DOCS
-    for vectorizer in (TfidfVectorizer(), CountVectorizer()):
-        transformed_data = vectorizer.fit_transform(data)
-        inversed_data = vectorizer.inverse_transform(transformed_data)
-        analyze = vectorizer.build_analyzer()
-        for doc, inversed_terms in zip(data, inversed_data):
-            terms = np.sort(np.unique(analyze(doc)))
-            inversed_terms = np.sort(np.unique(inversed_terms))
-            assert_array_equal(terms, inversed_terms)
-
-        # Test that inverse_transform also works with numpy arrays
-        transformed_data = transformed_data.toarray()
-        inversed_data2 = vectorizer.inverse_transform(transformed_data)
-        for terms, terms2 in zip(inversed_data, inversed_data2):
-            assert_array_equal(np.sort(terms), np.sort(terms2))
+    vectorizer = Vectorizer()
+    transformed_data = vectorizer.fit_transform(data)
+    inversed_data = vectorizer.inverse_transform(transformed_data)
+    analyze = vectorizer.build_analyzer()
+    for doc, inversed_terms in zip(data, inversed_data):
+        terms = np.sort(np.unique(analyze(doc)))
+        inversed_terms = np.sort(np.unique(inversed_terms))
+        assert_array_equal(terms, inversed_terms)
+
+    # Test that inverse_transform also works with numpy arrays
+    transformed_data = transformed_data.toarray()
+    inversed_data2 = vectorizer.inverse_transform(transformed_data)
+    for terms, terms2 in zip(inversed_data, inversed_data2):
+        assert_array_equal(np.sort(terms), np.sort(terms2))
 
 
 def test_count_vectorizer_pipeline_grid_selection():
@@ -1030,16 +1026,16 @@ def test_vectorizer_vocab_clone():
     assert_equal(vect_vocab_clone.vocabulary_, vect_vocab.vocabulary_)
 
 
-def test_vectorizer_string_object_as_input():
+@pytest.mark.parametrize('Vectorizer',
+                         (CountVectorizer, TfidfVectorizer, HashingVectorizer))
+def test_vectorizer_string_object_as_input(Vectorizer):
     message = ("Iterable over raw text documents expected, "
                "string object received.")
-    for vec in [CountVectorizer(), TfidfVectorizer(), HashingVectorizer()]:
-        assert_raise_message(
+    vec = Vectorizer()
+    assert_raise_message(
             ValueError, message, vec.fit_transform, "hello world!")
-        assert_raise_message(
-            ValueError, message, vec.fit, "hello world!")
-        assert_raise_message(
-            ValueError, message, vec.transform, "hello world!")
+    assert_raise_message(ValueError, message, vec.fit, "hello world!")
+    assert_raise_message(ValueError, message, vec.transform, "hello world!")
 
 
 @pytest.mark.parametrize("vec", [

diff --git a/sklearn/feature_selection/tests/test_feature_select.py b/sklearn/feature_selection/tests/test_feature_select.py
@@ -7,7 +7,6 @@
 import numpy as np
 from scipy import stats, sparse
 
-from numpy.testing import run_module_suite
 from sklearn.utils.testing import assert_equal
 from sklearn.utils.testing import assert_almost_equal
 from sklearn.utils.testing import assert_raises
@@ -670,7 +669,3 @@ def test_mutual_info_regression():
     gtruth = np.zeros(10)
     gtruth[:2] = 1
     assert_array_equal(support, gtruth)
-
-
-if __name__ == '__main__':
-    run_module_suite()
diff --git a/sklearn/feature_selection/tests/test_mutual_info.py b/sklearn/feature_selection/tests/test_mutual_info.py
@@ -1,7 +1,6 @@
 from __future__ import division
 
 import numpy as np
-from numpy.testing import run_module_suite
 from scipy.sparse import csr_matrix
 
 from sklearn.utils import check_random_state
@@ -200,7 +199,3 @@ def test_mutual_info_options():
         assert_array_equal(mi_3, mi_4)
 
     assert_false(np.allclose(mi_1, mi_3))
-
-
-if __name__ == '__main__':
-    run_module_suite()
diff --git a/sklearn/gaussian_process/tests/test_gaussian_process.py b/sklearn/gaussian_process/tests/test_gaussian_process.py
@@ -7,6 +7,8 @@
 
 import numpy as np
 
+import pytest
+
 from sklearn.gaussian_process import GaussianProcess
 from sklearn.gaussian_process import regression_models as regression
 from sklearn.gaussian_process import correlation_models as correlation
@@ -100,16 +102,17 @@ def test_wrong_number_of_outputs():
     assert_raises(ValueError, gp.fit, [[1, 2, 3], [4, 5, 6]], [1, 2, 3])
 
 
-def test_more_builtin_correlation_models(random_start=1):
+@pytest.mark.parametrize(
+        'corr',
+        ['absolute_exponential', 'squared_exponential', 'cubic', 'linear'])
+def test_more_builtin_correlation_models(corr):
     # Repeat test_1d and test_2d for several built-in correlation
     # models specified as strings.
-    all_corr = ['absolute_exponential', 'squared_exponential', 'cubic',
-                'linear']
+    random_start = 1
 
-    for corr in all_corr:
-        test_1d(regr='constant', corr=corr, random_start=random_start)
-        test_2d(regr='constant', corr=corr, random_start=random_start)
-        test_2d_2d(regr='constant', corr=corr, random_start=random_start)
+    test_1d(regr='constant', corr=corr, random_start=random_start)
+    test_2d(regr='constant', corr=corr, random_start=random_start)
+    test_2d_2d(regr='constant', corr=corr, random_start=random_start)
 
 
 def test_ordinary_kriging():