scikit-learn · giorgiop · Nov 4, 2015 · Nov 4, 2015 · Nov 4, 2015 · Nov 4, 2015
diff --git a/sklearn/gaussian_process/tests/test_gpc.py b/sklearn/gaussian_process/tests/test_gpc.py
@@ -10,7 +10,7 @@
 from sklearn.gaussian_process import GaussianProcessClassifier
 from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C
 
-from sklearn.utils.testing import (assert_true, assert_greater, assert_equal,
+from sklearn.utils.testing import (assert_true, assert_greater,
                                    assert_almost_equal, assert_array_equal)
 
 
@@ -29,8 +29,8 @@ def f(x):
 fixed_kernel = RBF(length_scale=1.0, length_scale_bounds="fixed")
 kernels = [RBF(length_scale=0.1), fixed_kernel,
            RBF(length_scale=1.0, length_scale_bounds=(1e-3, 1e3)),
-           C(1.0, (1e-2, 1e2))
-           * RBF(length_scale=1.0, length_scale_bounds=(1e-3, 1e3))]
+           C(1.0, (1e-2, 1e2)) *
+           RBF(length_scale=1.0, length_scale_bounds=(1e-3, 1e3))]
 
 
 def test_predict_consistent():
@@ -45,7 +45,8 @@ def test_predict_consistent():
 def test_lml_improving():
     """ Test that hyperparameter-tuning improves log-marginal likelihood. """
     for kernel in kernels:
-        if kernel == fixed_kernel: continue
+        if kernel == fixed_kernel:
+            continue
         gpc = GaussianProcessClassifier(kernel=kernel).fit(X, y)
         assert_greater(gpc.log_marginal_likelihood(gpc.kernel_.theta),
                        gpc.log_marginal_likelihood(kernel.theta))
@@ -62,15 +63,16 @@ def test_lml_precomputed():
 def test_converged_to_local_maximum():
     """ Test that we are in local maximum after hyperparameter-optimization."""
     for kernel in kernels:
-        if kernel == fixed_kernel: continue
+        if kernel == fixed_kernel:
+            continue
         gpc = GaussianProcessClassifier(kernel=kernel).fit(X, y)
 
         lml, lml_gradient = \
             gpc.log_marginal_likelihood(gpc.kernel_.theta, True)
 
-        assert_true(np.all((np.abs(lml_gradient) < 1e-4)
-                           | (gpc.kernel_.theta == gpc.kernel_.bounds[:, 0])
-                           | (gpc.kernel_.theta == gpc.kernel_.bounds[:, 1])))
+        assert_true(np.all((np.abs(lml_gradient) < 1e-4) |
+                           (gpc.kernel_.theta == gpc.kernel_.bounds[:, 0]) |
+                           (gpc.kernel_.theta == gpc.kernel_.bounds[:, 1])))
 
 
 def test_lml_gradient():
@@ -93,7 +95,7 @@ def test_random_starts():
     Test that an increasing number of random-starts of GP fitting only
     increases the log marginal likelihood of the chosen theta.
     """
-    n_samples, n_features = 25, 3
+    n_samples, n_features = 25, 2
     np.random.seed(0)
     rng = np.random.RandomState(0)
     X = rng.randn(n_samples, n_features) * 2 - 1
@@ -103,7 +105,7 @@ def test_random_starts():
         * RBF(length_scale=[1e-3] * n_features,
               length_scale_bounds=[(1e-4, 1e+2)] * n_features)
     last_lml = -np.inf
-    for n_restarts_optimizer in range(9):
+    for n_restarts_optimizer in range(5):
         gp = GaussianProcessClassifier(
             kernel=kernel, n_restarts_optimizer=n_restarts_optimizer,
             random_state=0).fit(X, y)
@@ -114,12 +116,12 @@ def test_random_starts():
 
 def test_custom_optimizer():
     """ Test that GPC can use externally defined optimizers. """
-    # Define a dummy optimizer that simply tests 1000 random hyperparameters
+    # Define a dummy optimizer that simply tests 50 random hyperparameters
     def optimizer(obj_func, initial_theta, bounds):
         rng = np.random.RandomState(0)
         theta_opt, func_min = \
             initial_theta, obj_func(initial_theta, eval_gradient=False)
-        for _ in range(1000):
+        for _ in range(50):
             theta = np.atleast_1d(rng.uniform(np.maximum(-2, bounds[:, 0]),
                                               np.minimum(1, bounds[:, 1])))
             f = obj_func(theta, eval_gradient=False)
@@ -128,7 +130,8 @@ def optimizer(obj_func, initial_theta, bounds):
         return theta_opt, func_min
 
     for kernel in kernels:
-        if kernel == fixed_kernel: continue
+        if kernel == fixed_kernel:
+            continue
         gpc = GaussianProcessClassifier(kernel=kernel, optimizer=optimizer)
         gpc.fit(X, y_mc)
         # Checks that optimizer improved marginal likelihood

diff --git a/sklearn/gaussian_process/tests/test_gpr.py b/sklearn/gaussian_process/tests/test_gpr.py
@@ -25,14 +25,14 @@ def f(x):
 fixed_kernel = RBF(length_scale=1.0, length_scale_bounds="fixed")
 kernels = [RBF(length_scale=1.0), fixed_kernel,
            RBF(length_scale=1.0, length_scale_bounds=(1e-3, 1e3)),
-           C(1.0, (1e-2, 1e2))
-           * RBF(length_scale=1.0, length_scale_bounds=(1e-3, 1e3)),
-           C(1.0, (1e-2, 1e2))
-           * RBF(length_scale=1.0, length_scale_bounds=(1e-3, 1e3))
-           + C(1e-5, (1e-5, 1e2)),
-           C(0.1, (1e-2, 1e2))
-           * RBF(length_scale=1.0, length_scale_bounds=(1e-3, 1e3))
-           + C(1e-5, (1e-5, 1e2))]
+           C(1.0, (1e-2, 1e2)) *
+           RBF(length_scale=1.0, length_scale_bounds=(1e-3, 1e3)),
+           C(1.0, (1e-2, 1e2)) *
+           RBF(length_scale=1.0, length_scale_bounds=(1e-3, 1e3)) +
+           C(1e-5, (1e-5, 1e2)),
+           C(0.1, (1e-2, 1e2)) *
+           RBF(length_scale=1.0, length_scale_bounds=(1e-3, 1e3)) +
+           C(1e-5, (1e-5, 1e2))]
 
 
 def test_gpr_interpolation():
@@ -48,7 +48,8 @@ def test_gpr_interpolation():
 def test_lml_improving():
     """ Test that hyperparameter-tuning improves log-marginal likelihood. """
     for kernel in kernels:
-        if kernel == fixed_kernel: continue
+        if kernel == fixed_kernel:
+            continue
         gpr = GaussianProcessRegressor(kernel=kernel).fit(X, y)
         assert_greater(gpr.log_marginal_likelihood(gpr.kernel_.theta),
                        gpr.log_marginal_likelihood(kernel.theta))
@@ -65,21 +66,23 @@ def test_lml_precomputed():
 def test_converged_to_local_maximum():
     """ Test that we are in local maximum after hyperparameter-optimization."""
     for kernel in kernels:
-        if kernel == fixed_kernel: continue
+        if kernel == fixed_kernel:
+            continue
         gpr = GaussianProcessRegressor(kernel=kernel).fit(X, y)
 
         lml, lml_gradient = \
             gpr.log_marginal_likelihood(gpr.kernel_.theta, True)
 
-        assert_true(np.all((np.abs(lml_gradient) < 1e-4)
-                           | (gpr.kernel_.theta == gpr.kernel_.bounds[:, 0])
-                           | (gpr.kernel_.theta == gpr.kernel_.bounds[:, 1])))
+        assert_true(np.all((np.abs(lml_gradient) < 1e-4) |
+                           (gpr.kernel_.theta == gpr.kernel_.bounds[:, 0]) |
+                           (gpr.kernel_.theta == gpr.kernel_.bounds[:, 1])))
 
 
 def test_solution_inside_bounds():
     """ Test that hyperparameter-optimization remains in bounds"""
     for kernel in kernels:
-        if kernel == fixed_kernel: continue
+        if kernel == fixed_kernel:
+            continue
         gpr = GaussianProcessRegressor(kernel=kernel).fit(X, y)
 
         bounds = gpr.kernel_.bounds
@@ -128,7 +131,7 @@ def test_sample_statistics():
 
         y_mean, y_cov = gpr.predict(X2, return_cov=True)
 
-        samples = gpr.sample_y(X2, 1000000)
+        samples = gpr.sample_y(X2, 300000)
 
         # More digits accuracy would require many more samples
         assert_almost_equal(y_mean, np.mean(samples, 1), 2)
@@ -172,7 +175,7 @@ def test_random_starts():
     Test that an increasing number of random-starts of GP fitting only
     increases the log marginal likelihood of the chosen theta.
     """
-    n_samples, n_features = 25, 3
+    n_samples, n_features = 25, 2
     np.random.seed(0)
     rng = np.random.RandomState(0)
     X = rng.randn(n_samples, n_features) * 2 - 1
@@ -184,7 +187,7 @@ def test_random_starts():
               length_scale_bounds=[(1e-4, 1e+2)] * n_features) \
         + WhiteKernel(noise_level=1e-5, noise_level_bounds=(1e-5, 1e1))
     last_lml = -np.inf
-    for n_restarts_optimizer in range(9):
+    for n_restarts_optimizer in range(5):
         gp = GaussianProcessRegressor(
             kernel=kernel, n_restarts_optimizer=n_restarts_optimizer,
             random_state=0,).fit(X, y)
@@ -267,12 +270,12 @@ def test_y_multioutput():
 
 def test_custom_optimizer():
     """ Test that GPR can use externally defined optimizers. """
-    # Define a dummy optimizer that simply tests 1000 random hyperparameters
+    # Define a dummy optimizer that simply tests 50 random hyperparameters
     def optimizer(obj_func, initial_theta, bounds):
         rng = np.random.RandomState(0)
         theta_opt, func_min = \
             initial_theta, obj_func(initial_theta, eval_gradient=False)
-        for _ in range(1000):
+        for _ in range(50):
             theta = np.atleast_1d(rng.uniform(np.maximum(-2, bounds[:, 0]),
                                               np.minimum(1, bounds[:, 1])))
             f = obj_func(theta, eval_gradient=False)
@@ -281,7 +284,8 @@ def optimizer(obj_func, initial_theta, bounds):
         return theta_opt, func_min
 
     for kernel in kernels:
-        if kernel == fixed_kernel: continue
+        if kernel == fixed_kernel:
+            continue
         gpr = GaussianProcessRegressor(kernel=kernel, optimizer=optimizer)
         gpr.fit(X, y)
         # Checks that optimizer improved marginal likelihood

diff --git a/sklearn/gaussian_process/tests/test_kernels.py b/sklearn/gaussian_process/tests/test_kernels.py
@@ -8,7 +8,7 @@
 
 import numpy as np
 
-from scipy.optimize import approx_fprime
+from sklearn.gaussian_process.kernels import _approx_fprime
 
 from sklearn.metrics.pairwise \
     import PAIRWISE_KERNEL_FUNCTIONS, euclidean_distances, pairwise_kernels
@@ -23,8 +23,8 @@
                                    assert_array_almost_equal)
 
 
-X = np.random.RandomState(0).normal(0, 1, (10, 2))
-Y = np.random.RandomState(0).normal(0, 1, (11, 2))
+X = np.random.RandomState(0).normal(0, 1, (5, 2))
+Y = np.random.RandomState(0).normal(0, 1, (6, 2))
 
 kernel_white = RBF(length_scale=2.0) + WhiteKernel(noise_level=3.0)
 kernels = [RBF(length_scale=2.0), RBF(length_scale_bounds=(0.5, 2.0)),
@@ -57,16 +57,13 @@ def test_kernel_gradient():
         assert_equal(K_gradient.shape[1], X.shape[0])
         assert_equal(K_gradient.shape[2], kernel.theta.shape[0])
 
-        K_gradient_approx = np.empty_like(K_gradient)
-        for i in range(K.shape[0]):
-            for j in range(K.shape[1]):
-                def eval_kernel_ij_for_theta(theta):
-                    kernel_clone = kernel.clone_with_theta(theta)
-                    K = kernel_clone(X, eval_gradient=False)
-                    return K[i, j]
-                K_gradient_approx[i, j] = \
-                    approx_fprime(kernel.theta, eval_kernel_ij_for_theta,
-                                  1e-10)
+        def eval_kernel_for_theta(theta):
+            kernel_clone = kernel.clone_with_theta(theta)
+            K = kernel_clone(X, eval_gradient=False)
+            return K
+
+        K_gradient_approx = \
+            _approx_fprime(kernel.theta, eval_kernel_for_theta, 1e-10)
 
         assert_almost_equal(K_gradient, K_gradient_approx, 4)
 

diff --git a/sklearn/linear_model/tests/test_coordinate_descent.py b/sklearn/linear_model/tests/test_coordinate_descent.py
@@ -167,8 +167,8 @@ def test_lasso_cv():
     # for this we check that they don't fall in the grid of
     # clf.alphas further than 1
     assert_true(np.abs(
-        np.searchsorted(clf.alphas_[::-1], lars.alpha_)
-        - np.searchsorted(clf.alphas_[::-1], clf.alpha_)) <= 1)
+        np.searchsorted(clf.alphas_[::-1], lars.alpha_) -
+        np.searchsorted(clf.alphas_[::-1], clf.alpha_)) <= 1)
     # check that they also give a similar MSE
     mse_lars = interpolate.interp1d(lars.cv_alphas_, lars.cv_mse_path_.T)
     np.testing.assert_approx_equal(mse_lars(clf.alphas_[5]).mean(),
@@ -438,29 +438,29 @@ def test_multioutput_enetcv_error():
 
 
 def test_multitask_enet_and_lasso_cv():
-    X, y, _, _ = build_dataset(n_features=100, n_targets=3)
+    X, y, _, _ = build_dataset(n_features=50, n_targets=3)
     clf = MultiTaskElasticNetCV().fit(X, y)
     assert_almost_equal(clf.alpha_, 0.00556, 3)
     clf = MultiTaskLassoCV().fit(X, y)
     assert_almost_equal(clf.alpha_, 0.00278, 3)
 
     X, y, _, _ = build_dataset(n_targets=3)
-    clf = MultiTaskElasticNetCV(n_alphas=50, eps=1e-3, max_iter=100,
+    clf = MultiTaskElasticNetCV(n_alphas=10, eps=1e-3, max_iter=100,
                                 l1_ratio=[0.3, 0.5], tol=1e-3)
     clf.fit(X, y)
     assert_equal(0.5, clf.l1_ratio_)
     assert_equal((3, X.shape[1]), clf.coef_.shape)
     assert_equal((3, ), clf.intercept_.shape)
-    assert_equal((2, 50, 3), clf.mse_path_.shape)
-    assert_equal((2, 50), clf.alphas_.shape)
+    assert_equal((2, 10, 3), clf.mse_path_.shape)
+    assert_equal((2, 10), clf.alphas_.shape)
 
     X, y, _, _ = build_dataset(n_targets=3)
-    clf = MultiTaskLassoCV(n_alphas=50, eps=1e-3, max_iter=100, tol=1e-3)
+    clf = MultiTaskLassoCV(n_alphas=10, eps=1e-3, max_iter=100, tol=1e-3)
     clf.fit(X, y)
     assert_equal((3, X.shape[1]), clf.coef_.shape)
     assert_equal((3, ), clf.intercept_.shape)
-    assert_equal((50, 3), clf.mse_path_.shape)
-    assert_equal(50, len(clf.alphas_))
+    assert_equal((10, 3), clf.mse_path_.shape)
+    assert_equal(10, len(clf.alphas_))
 
 
 def test_1d_multioutput_enet_and_multitask_enet_cv():

diff --git a/sklearn/model_selection/tests/test_split.py b/sklearn/model_selection/tests/test_split.py
@@ -41,7 +41,7 @@
 from sklearn.model_selection import train_test_split
 from sklearn.model_selection import GridSearchCV
 
-from sklearn.svm import LinearSVC
+from sklearn.linear_model import Ridge
 
 from sklearn.model_selection._split import _safe_split
 from sklearn.model_selection._split import _validate_shuffle_split
@@ -419,31 +419,33 @@ def test_shuffle_stratifiedkfold():
 def test_kfold_can_detect_dependent_samples_on_digits():  # see #2372
     # The digits samples are dependent: they are apparently grouped by authors
     # although we don't have any information on the groups segment locations
-    # for this data. We can highlight this fact be computing k-fold cross-
+    # for this data. We can highlight this fact by computing k-fold cross-
     # validation with and without shuffling: we observe that the shuffling case
     # wrongly makes the IID assumption and is therefore too optimistic: it
-    # estimates a much higher accuracy (around 0.96) than than the non
-    # shuffling variant (around 0.86).
+    # estimates a much higher accuracy (around 0.93) than that the non
+    # shuffling variant (around 0.81).
 
-    X, y = digits.data[:800], digits.target[:800]
+    X, y = digits.data[:600], digits.target[:600]
     model = SVC(C=10, gamma=0.005)
 
-    cv = KFold(n_folds=5, shuffle=False)
+    n_folds = 3
+
+    cv = KFold(n_folds=n_folds, shuffle=False)
     mean_score = cross_val_score(model, X, y, cv=cv).mean()
-    assert_greater(0.88, mean_score)
-    assert_greater(mean_score, 0.85)
+    assert_greater(0.92, mean_score)
+    assert_greater(mean_score, 0.80)
 
     # Shuffling the data artificially breaks the dependency and hides the
     # overfitting of the model with regards to the writing style of the authors
     # by yielding a seriously overestimated score:
 
-    cv = KFold(5, shuffle=True, random_state=0)
+    cv = KFold(n_folds, shuffle=True, random_state=0)
     mean_score = cross_val_score(model, X, y, cv=cv).mean()
-    assert_greater(mean_score, 0.95)
+    assert_greater(mean_score, 0.92)
 
-    cv = KFold(5, shuffle=True, random_state=1)
+    cv = KFold(n_folds, shuffle=True, random_state=1)
     mean_score = cross_val_score(model, X, y, cv=cv).mean()
-    assert_greater(mean_score, 0.95)
+    assert_greater(mean_score, 0.92)
 
     # Similarly, StratifiedKFold should try to shuffle the data as little
     # as possible (while respecting the balanced class constraints)
@@ -452,10 +454,10 @@ def test_kfold_can_detect_dependent_samples_on_digits():  # see #2372
     # the estimated mean score is close to the score measured with
     # non-shuffled KFold
 
-    cv = StratifiedKFold(5)
+    cv = StratifiedKFold(n_folds)
     mean_score = cross_val_score(model, X, y, cv=cv).mean()
-    assert_greater(0.88, mean_score)
-    assert_greater(mean_score, 0.85)
+    assert_greater(0.93, mean_score)
+    assert_greater(mean_score, 0.80)
 
 
 def test_shuffle_split():
@@ -517,10 +519,12 @@ def test_stratified_shuffle_split_iter():
         for train, test in sss:
             assert_array_equal(np.unique(y[train]), np.unique(y[test]))
             # Checks if folds keep classes proportions
-            p_train = (np.bincount(np.unique(y[train], return_inverse=True)[1])
-                       / float(len(y[train])))
-            p_test = (np.bincount(np.unique(y[test], return_inverse=True)[1])
-                      / float(len(y[test])))
+            p_train = (np.bincount(np.unique(y[train],
+                                   return_inverse=True)[1]) /
+                       float(len(y[train])))
+            p_test = (np.bincount(np.unique(y[test],
+                                  return_inverse=True)[1]) /
+                      float(len(y[test])))
             assert_array_almost_equal(p_train, p_test, 1)
             assert_equal(y[train].size + y[test].size, y.size)
             assert_array_equal(np.lib.arraysetops.intersect1d(train, test), [])
@@ -946,10 +950,10 @@ def test_nested_cv():
     labels = rng.randint(0, 5, 15)
 
     cvs = [LeaveOneLabelOut(), LeaveOneOut(), LabelKFold(), StratifiedKFold(),
-           StratifiedShuffleSplit(n_iter=10, random_state=0)]
+           StratifiedShuffleSplit(n_iter=3, random_state=0)]
 
     for inner_cv, outer_cv in combinations_with_replacement(cvs, 2):
-        gs = GridSearchCV(LinearSVC(random_state=0), param_grid={'C': [1, 10]},
+        gs = GridSearchCV(Ridge(), param_grid={'alpha': [1, .1]},
                           cv=inner_cv)
         cross_val_score(gs, X=X, y=y, labels=labels, cv=outer_cv,
                         fit_params={'labels': labels})