Thanks to visit codestin.com
Credit goes to github.com

Skip to content

[MRG+1]: TEST runtime down to 4:30 min on an old laptop #5711

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 8 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 16 additions & 13 deletions sklearn/gaussian_process/tests/test_gpc.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C

from sklearn.utils.testing import (assert_true, assert_greater, assert_equal,
from sklearn.utils.testing import (assert_true, assert_greater,
assert_almost_equal, assert_array_equal)


Expand All @@ -29,8 +29,8 @@ def f(x):
fixed_kernel = RBF(length_scale=1.0, length_scale_bounds="fixed")
kernels = [RBF(length_scale=0.1), fixed_kernel,
RBF(length_scale=1.0, length_scale_bounds=(1e-3, 1e3)),
C(1.0, (1e-2, 1e2))
* RBF(length_scale=1.0, length_scale_bounds=(1e-3, 1e3))]
C(1.0, (1e-2, 1e2)) *
RBF(length_scale=1.0, length_scale_bounds=(1e-3, 1e3))]


def test_predict_consistent():
Expand All @@ -45,7 +45,8 @@ def test_predict_consistent():
def test_lml_improving():
""" Test that hyperparameter-tuning improves log-marginal likelihood. """
for kernel in kernels:
if kernel == fixed_kernel: continue
if kernel == fixed_kernel:
continue
gpc = GaussianProcessClassifier(kernel=kernel).fit(X, y)
assert_greater(gpc.log_marginal_likelihood(gpc.kernel_.theta),
gpc.log_marginal_likelihood(kernel.theta))
Expand All @@ -62,15 +63,16 @@ def test_lml_precomputed():
def test_converged_to_local_maximum():
""" Test that we are in local maximum after hyperparameter-optimization."""
for kernel in kernels:
if kernel == fixed_kernel: continue
if kernel == fixed_kernel:
continue
gpc = GaussianProcessClassifier(kernel=kernel).fit(X, y)

lml, lml_gradient = \
gpc.log_marginal_likelihood(gpc.kernel_.theta, True)

assert_true(np.all((np.abs(lml_gradient) < 1e-4)
| (gpc.kernel_.theta == gpc.kernel_.bounds[:, 0])
| (gpc.kernel_.theta == gpc.kernel_.bounds[:, 1])))
assert_true(np.all((np.abs(lml_gradient) < 1e-4) |
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just a note for later: let's ignore those kind of non-important pep8 violations. We could give a list of flake8 warnings to ignore in a conf file in the repo.

(gpc.kernel_.theta == gpc.kernel_.bounds[:, 0]) |
(gpc.kernel_.theta == gpc.kernel_.bounds[:, 1])))


def test_lml_gradient():
Expand All @@ -93,7 +95,7 @@ def test_random_starts():
Test that an increasing number of random-starts of GP fitting only
increases the log marginal likelihood of the chosen theta.
"""
n_samples, n_features = 25, 3
n_samples, n_features = 25, 2
np.random.seed(0)
rng = np.random.RandomState(0)
X = rng.randn(n_samples, n_features) * 2 - 1
Expand All @@ -103,7 +105,7 @@ def test_random_starts():
* RBF(length_scale=[1e-3] * n_features,
length_scale_bounds=[(1e-4, 1e+2)] * n_features)
last_lml = -np.inf
for n_restarts_optimizer in range(9):
for n_restarts_optimizer in range(5):
gp = GaussianProcessClassifier(
kernel=kernel, n_restarts_optimizer=n_restarts_optimizer,
random_state=0).fit(X, y)
Expand All @@ -114,12 +116,12 @@ def test_random_starts():

def test_custom_optimizer():
""" Test that GPC can use externally defined optimizers. """
# Define a dummy optimizer that simply tests 1000 random hyperparameters
# Define a dummy optimizer that simply tests 50 random hyperparameters
def optimizer(obj_func, initial_theta, bounds):
rng = np.random.RandomState(0)
theta_opt, func_min = \
initial_theta, obj_func(initial_theta, eval_gradient=False)
for _ in range(1000):
for _ in range(50):
theta = np.atleast_1d(rng.uniform(np.maximum(-2, bounds[:, 0]),
np.minimum(1, bounds[:, 1])))
f = obj_func(theta, eval_gradient=False)
Expand All @@ -128,7 +130,8 @@ def optimizer(obj_func, initial_theta, bounds):
return theta_opt, func_min

for kernel in kernels:
if kernel == fixed_kernel: continue
if kernel == fixed_kernel:
continue
gpc = GaussianProcessClassifier(kernel=kernel, optimizer=optimizer)
gpc.fit(X, y_mc)
# Checks that optimizer improved marginal likelihood
Expand Down
44 changes: 24 additions & 20 deletions sklearn/gaussian_process/tests/test_gpr.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,14 @@ def f(x):
fixed_kernel = RBF(length_scale=1.0, length_scale_bounds="fixed")
kernels = [RBF(length_scale=1.0), fixed_kernel,
RBF(length_scale=1.0, length_scale_bounds=(1e-3, 1e3)),
C(1.0, (1e-2, 1e2))
* RBF(length_scale=1.0, length_scale_bounds=(1e-3, 1e3)),
C(1.0, (1e-2, 1e2))
* RBF(length_scale=1.0, length_scale_bounds=(1e-3, 1e3))
+ C(1e-5, (1e-5, 1e2)),
C(0.1, (1e-2, 1e2))
* RBF(length_scale=1.0, length_scale_bounds=(1e-3, 1e3))
+ C(1e-5, (1e-5, 1e2))]
C(1.0, (1e-2, 1e2)) *
RBF(length_scale=1.0, length_scale_bounds=(1e-3, 1e3)),
C(1.0, (1e-2, 1e2)) *
RBF(length_scale=1.0, length_scale_bounds=(1e-3, 1e3)) +
C(1e-5, (1e-5, 1e2)),
C(0.1, (1e-2, 1e2)) *
RBF(length_scale=1.0, length_scale_bounds=(1e-3, 1e3)) +
C(1e-5, (1e-5, 1e2))]


def test_gpr_interpolation():
Expand All @@ -48,7 +48,8 @@ def test_gpr_interpolation():
def test_lml_improving():
""" Test that hyperparameter-tuning improves log-marginal likelihood. """
for kernel in kernels:
if kernel == fixed_kernel: continue
if kernel == fixed_kernel:
continue
gpr = GaussianProcessRegressor(kernel=kernel).fit(X, y)
assert_greater(gpr.log_marginal_likelihood(gpr.kernel_.theta),
gpr.log_marginal_likelihood(kernel.theta))
Expand All @@ -65,21 +66,23 @@ def test_lml_precomputed():
def test_converged_to_local_maximum():
""" Test that we are in local maximum after hyperparameter-optimization."""
for kernel in kernels:
if kernel == fixed_kernel: continue
if kernel == fixed_kernel:
continue
gpr = GaussianProcessRegressor(kernel=kernel).fit(X, y)

lml, lml_gradient = \
gpr.log_marginal_likelihood(gpr.kernel_.theta, True)

assert_true(np.all((np.abs(lml_gradient) < 1e-4)
| (gpr.kernel_.theta == gpr.kernel_.bounds[:, 0])
| (gpr.kernel_.theta == gpr.kernel_.bounds[:, 1])))
assert_true(np.all((np.abs(lml_gradient) < 1e-4) |
(gpr.kernel_.theta == gpr.kernel_.bounds[:, 0]) |
(gpr.kernel_.theta == gpr.kernel_.bounds[:, 1])))


def test_solution_inside_bounds():
""" Test that hyperparameter-optimization remains in bounds"""
for kernel in kernels:
if kernel == fixed_kernel: continue
if kernel == fixed_kernel:
continue
gpr = GaussianProcessRegressor(kernel=kernel).fit(X, y)

bounds = gpr.kernel_.bounds
Expand Down Expand Up @@ -128,7 +131,7 @@ def test_sample_statistics():

y_mean, y_cov = gpr.predict(X2, return_cov=True)

samples = gpr.sample_y(X2, 1000000)
samples = gpr.sample_y(X2, 300000)

# More digits accuracy would require many more samples
assert_almost_equal(y_mean, np.mean(samples, 1), 2)
Expand Down Expand Up @@ -172,7 +175,7 @@ def test_random_starts():
Test that an increasing number of random-starts of GP fitting only
increases the log marginal likelihood of the chosen theta.
"""
n_samples, n_features = 25, 3
n_samples, n_features = 25, 2
np.random.seed(0)
rng = np.random.RandomState(0)
X = rng.randn(n_samples, n_features) * 2 - 1
Expand All @@ -184,7 +187,7 @@ def test_random_starts():
length_scale_bounds=[(1e-4, 1e+2)] * n_features) \
+ WhiteKernel(noise_level=1e-5, noise_level_bounds=(1e-5, 1e1))
last_lml = -np.inf
for n_restarts_optimizer in range(9):
for n_restarts_optimizer in range(5):
gp = GaussianProcessRegressor(
kernel=kernel, n_restarts_optimizer=n_restarts_optimizer,
random_state=0,).fit(X, y)
Expand Down Expand Up @@ -267,12 +270,12 @@ def test_y_multioutput():

def test_custom_optimizer():
""" Test that GPR can use externally defined optimizers. """
# Define a dummy optimizer that simply tests 1000 random hyperparameters
# Define a dummy optimizer that simply tests 50 random hyperparameters
def optimizer(obj_func, initial_theta, bounds):
rng = np.random.RandomState(0)
theta_opt, func_min = \
initial_theta, obj_func(initial_theta, eval_gradient=False)
for _ in range(1000):
for _ in range(50):
theta = np.atleast_1d(rng.uniform(np.maximum(-2, bounds[:, 0]),
np.minimum(1, bounds[:, 1])))
f = obj_func(theta, eval_gradient=False)
Expand All @@ -281,7 +284,8 @@ def optimizer(obj_func, initial_theta, bounds):
return theta_opt, func_min

for kernel in kernels:
if kernel == fixed_kernel: continue
if kernel == fixed_kernel:
continue
gpr = GaussianProcessRegressor(kernel=kernel, optimizer=optimizer)
gpr.fit(X, y)
# Checks that optimizer improved marginal likelihood
Expand Down
23 changes: 10 additions & 13 deletions sklearn/gaussian_process/tests/test_kernels.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

import numpy as np

from scipy.optimize import approx_fprime
from sklearn.gaussian_process.kernels import _approx_fprime

from sklearn.metrics.pairwise \
import PAIRWISE_KERNEL_FUNCTIONS, euclidean_distances, pairwise_kernels
Expand All @@ -23,8 +23,8 @@
assert_array_almost_equal)


X = np.random.RandomState(0).normal(0, 1, (10, 2))
Y = np.random.RandomState(0).normal(0, 1, (11, 2))
X = np.random.RandomState(0).normal(0, 1, (5, 2))
Y = np.random.RandomState(0).normal(0, 1, (6, 2))

kernel_white = RBF(length_scale=2.0) + WhiteKernel(noise_level=3.0)
kernels = [RBF(length_scale=2.0), RBF(length_scale_bounds=(0.5, 2.0)),
Expand Down Expand Up @@ -57,16 +57,13 @@ def test_kernel_gradient():
assert_equal(K_gradient.shape[1], X.shape[0])
assert_equal(K_gradient.shape[2], kernel.theta.shape[0])

K_gradient_approx = np.empty_like(K_gradient)
for i in range(K.shape[0]):
for j in range(K.shape[1]):
def eval_kernel_ij_for_theta(theta):
kernel_clone = kernel.clone_with_theta(theta)
K = kernel_clone(X, eval_gradient=False)
return K[i, j]
K_gradient_approx[i, j] = \
approx_fprime(kernel.theta, eval_kernel_ij_for_theta,
1e-10)
def eval_kernel_for_theta(theta):
kernel_clone = kernel.clone_with_theta(theta)
K = kernel_clone(X, eval_gradient=False)
return K

K_gradient_approx = \
_approx_fprime(kernel.theta, eval_kernel_for_theta, 1e-10)

assert_almost_equal(K_gradient, K_gradient_approx, 4)

Expand Down
18 changes: 9 additions & 9 deletions sklearn/linear_model/tests/test_coordinate_descent.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,8 +167,8 @@ def test_lasso_cv():
# for this we check that they don't fall in the grid of
# clf.alphas further than 1
assert_true(np.abs(
np.searchsorted(clf.alphas_[::-1], lars.alpha_)
- np.searchsorted(clf.alphas_[::-1], clf.alpha_)) <= 1)
np.searchsorted(clf.alphas_[::-1], lars.alpha_) -
np.searchsorted(clf.alphas_[::-1], clf.alpha_)) <= 1)
# check that they also give a similar MSE
mse_lars = interpolate.interp1d(lars.cv_alphas_, lars.cv_mse_path_.T)
np.testing.assert_approx_equal(mse_lars(clf.alphas_[5]).mean(),
Expand Down Expand Up @@ -438,29 +438,29 @@ def test_multioutput_enetcv_error():


def test_multitask_enet_and_lasso_cv():
X, y, _, _ = build_dataset(n_features=100, n_targets=3)
X, y, _, _ = build_dataset(n_features=50, n_targets=3)
clf = MultiTaskElasticNetCV().fit(X, y)
assert_almost_equal(clf.alpha_, 0.00556, 3)
clf = MultiTaskLassoCV().fit(X, y)
assert_almost_equal(clf.alpha_, 0.00278, 3)

X, y, _, _ = build_dataset(n_targets=3)
clf = MultiTaskElasticNetCV(n_alphas=50, eps=1e-3, max_iter=100,
clf = MultiTaskElasticNetCV(n_alphas=10, eps=1e-3, max_iter=100,
l1_ratio=[0.3, 0.5], tol=1e-3)
clf.fit(X, y)
assert_equal(0.5, clf.l1_ratio_)
assert_equal((3, X.shape[1]), clf.coef_.shape)
assert_equal((3, ), clf.intercept_.shape)
assert_equal((2, 50, 3), clf.mse_path_.shape)
assert_equal((2, 50), clf.alphas_.shape)
assert_equal((2, 10, 3), clf.mse_path_.shape)
assert_equal((2, 10), clf.alphas_.shape)

X, y, _, _ = build_dataset(n_targets=3)
clf = MultiTaskLassoCV(n_alphas=50, eps=1e-3, max_iter=100, tol=1e-3)
clf = MultiTaskLassoCV(n_alphas=10, eps=1e-3, max_iter=100, tol=1e-3)
clf.fit(X, y)
assert_equal((3, X.shape[1]), clf.coef_.shape)
assert_equal((3, ), clf.intercept_.shape)
assert_equal((50, 3), clf.mse_path_.shape)
assert_equal(50, len(clf.alphas_))
assert_equal((10, 3), clf.mse_path_.shape)
assert_equal(10, len(clf.alphas_))


def test_1d_multioutput_enet_and_multitask_enet_cv():
Expand Down
46 changes: 25 additions & 21 deletions sklearn/model_selection/tests/test_split.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV

from sklearn.svm import LinearSVC
from sklearn.linear_model import Ridge

from sklearn.model_selection._split import _safe_split
from sklearn.model_selection._split import _validate_shuffle_split
Expand Down Expand Up @@ -419,31 +419,33 @@ def test_shuffle_stratifiedkfold():
def test_kfold_can_detect_dependent_samples_on_digits(): # see #2372
# The digits samples are dependent: they are apparently grouped by authors
# although we don't have any information on the groups segment locations
# for this data. We can highlight this fact be computing k-fold cross-
# for this data. We can highlight this fact by computing k-fold cross-
# validation with and without shuffling: we observe that the shuffling case
# wrongly makes the IID assumption and is therefore too optimistic: it
# estimates a much higher accuracy (around 0.96) than than the non
# shuffling variant (around 0.86).
# estimates a much higher accuracy (around 0.93) than that the non
# shuffling variant (around 0.81).

X, y = digits.data[:800], digits.target[:800]
X, y = digits.data[:600], digits.target[:600]
model = SVC(C=10, gamma=0.005)

cv = KFold(n_folds=5, shuffle=False)
n_folds = 3

cv = KFold(n_folds=n_folds, shuffle=False)
mean_score = cross_val_score(model, X, y, cv=cv).mean()
assert_greater(0.88, mean_score)
assert_greater(mean_score, 0.85)
assert_greater(0.92, mean_score)
assert_greater(mean_score, 0.80)

# Shuffling the data artificially breaks the dependency and hides the
# overfitting of the model with regards to the writing style of the authors
# by yielding a seriously overestimated score:

cv = KFold(5, shuffle=True, random_state=0)
cv = KFold(n_folds, shuffle=True, random_state=0)
mean_score = cross_val_score(model, X, y, cv=cv).mean()
assert_greater(mean_score, 0.95)
assert_greater(mean_score, 0.92)

cv = KFold(5, shuffle=True, random_state=1)
cv = KFold(n_folds, shuffle=True, random_state=1)
mean_score = cross_val_score(model, X, y, cv=cv).mean()
assert_greater(mean_score, 0.95)
assert_greater(mean_score, 0.92)

# Similarly, StratifiedKFold should try to shuffle the data as little
# as possible (while respecting the balanced class constraints)
Expand All @@ -452,10 +454,10 @@ def test_kfold_can_detect_dependent_samples_on_digits(): # see #2372
# the estimated mean score is close to the score measured with
# non-shuffled KFold

cv = StratifiedKFold(5)
cv = StratifiedKFold(n_folds)
mean_score = cross_val_score(model, X, y, cv=cv).mean()
assert_greater(0.88, mean_score)
assert_greater(mean_score, 0.85)
assert_greater(0.93, mean_score)
assert_greater(mean_score, 0.80)


def test_shuffle_split():
Expand Down Expand Up @@ -517,10 +519,12 @@ def test_stratified_shuffle_split_iter():
for train, test in sss:
assert_array_equal(np.unique(y[train]), np.unique(y[test]))
# Checks if folds keep classes proportions
p_train = (np.bincount(np.unique(y[train], return_inverse=True)[1])
/ float(len(y[train])))
p_test = (np.bincount(np.unique(y[test], return_inverse=True)[1])
/ float(len(y[test])))
p_train = (np.bincount(np.unique(y[train],
return_inverse=True)[1]) /
float(len(y[train])))
p_test = (np.bincount(np.unique(y[test],
return_inverse=True)[1]) /
float(len(y[test])))
assert_array_almost_equal(p_train, p_test, 1)
assert_equal(y[train].size + y[test].size, y.size)
assert_array_equal(np.lib.arraysetops.intersect1d(train, test), [])
Expand Down Expand Up @@ -946,10 +950,10 @@ def test_nested_cv():
labels = rng.randint(0, 5, 15)

cvs = [LeaveOneLabelOut(), LeaveOneOut(), LabelKFold(), StratifiedKFold(),
StratifiedShuffleSplit(n_iter=10, random_state=0)]
StratifiedShuffleSplit(n_iter=3, random_state=0)]

for inner_cv, outer_cv in combinations_with_replacement(cvs, 2):
gs = GridSearchCV(LinearSVC(random_state=0), param_grid={'C': [1, 10]},
gs = GridSearchCV(Ridge(), param_grid={'alpha': [1, .1]},
cv=inner_cv)
cross_val_score(gs, X=X, y=y, labels=labels, cv=outer_cv,
fit_params={'labels': labels})
Expand Down
Loading