From 3617030424a56447a2de689fc3b3505ecca3dea8 Mon Sep 17 00:00:00 2001 From: Kuai Yu Date: Sat, 28 Jul 2018 10:55:48 -0400 Subject: [PATCH 1/5] style changes --- sklearn/linear_model/least_angle.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/sklearn/linear_model/least_angle.py b/sklearn/linear_model/least_angle.py index bae65fc889370..3af5869c19291 100644 --- a/sklearn/linear_model/least_angle.py +++ b/sklearn/linear_model/least_angle.py @@ -196,7 +196,7 @@ def lars_path(X, y, Xy=None, Gram=None, max_iter=500, if verbose: if verbose > 1: - print("Step\t\tAdded\t\tDropped\t\tActive set size\t\tC") + print('Step\t\tAdded\t\tDropped\t\tActive set size\t\tC') else: sys.stdout.write('.') sys.stdout.flush() @@ -320,7 +320,7 @@ def lars_path(X, y, Xy=None, Gram=None, max_iter=500, n_active += 1 if verbose > 1: - print("%s\t\t%s\t\t%s\t\t%s\t\t%s" % (n_iter, active[-1], '', + print('%s\t\t%s\t\t%s\t\t%s\t\t%s' % (n_iter, active[-1], '', n_active, C)) if method == 'lasso' and n_iter > 0 and prev_alpha[0] < alpha[0]: @@ -470,7 +470,7 @@ def lars_path(X, y, Xy=None, Gram=None, max_iter=500, sign_active = np.delete(sign_active, idx) sign_active = np.append(sign_active, 0.) # just to maintain size if verbose > 1: - print("%s\t\t%s\t\t%s\t\t%s\t\t%s" % (n_iter, '', drop_idx, + print('%s\t\t%s\t\t%s\t\t%s\t\t%s' % (n_iter, '', drop_idx, n_active, abs(temp))) if return_path: @@ -1115,8 +1115,8 @@ def fit(self, X, y): # As we use cross-validation, the Gram matrix is not precomputed here Gram = self.precompute if hasattr(Gram, '__array__'): - warnings.warn("Parameter 'precompute' cannot be an array in " - "%s. Automatically switch to 'auto' instead." + warnings.warn('Parameter "precompute" cannot be an array in ' + '%s. Automatically switch to "auto" instead.' % self.__class__.__name__) Gram = 'auto' @@ -1170,8 +1170,8 @@ def fit(self, X, y): return self @property - @deprecated("Attribute alpha is deprecated in 0.19 and " - "will be removed in 0.21. See ``alpha_`` instead") + @deprecated('Attribute alpha is deprecated in 0.19 and ' + 'will be removed in 0.21. See ``alpha_`` instead') def alpha(self): # impedance matching for the above Lars.fit (should not be documented) return self.alpha_ From 709afd04db08c5f5c2b89fc79cf88dbc521e542f Mon Sep 17 00:00:00 2001 From: Kuai Yu Date: Sun, 29 Jul 2018 09:29:01 -0400 Subject: [PATCH 2/5] style fixes --- sklearn/linear_model/least_angle.py | 23 ++-- .../linear_model/tests/test_least_angle.py | 127 +++++++----------- 2 files changed, 63 insertions(+), 87 deletions(-) diff --git a/sklearn/linear_model/least_angle.py b/sklearn/linear_model/least_angle.py index 3af5869c19291..31206e3dc07af 100644 --- a/sklearn/linear_model/least_angle.py +++ b/sklearn/linear_model/least_angle.py @@ -27,7 +27,7 @@ from ..externals.six.moves import xrange from ..externals.six import string_types -solve_triangular_args = {'check_finite': False} +SOLVE_TRIANGULAR_ARGS = {'check_finite': False} def lars_path(X, y, Xy=None, Gram=None, max_iter=500, @@ -285,7 +285,7 @@ def lars_path(X, y, Xy=None, Gram=None, max_iter=500, L[n_active, :n_active], trans=0, lower=1, overwrite_b=True, - **solve_triangular_args) + **SOLVE_TRIANGULAR_ARGS) v = np.dot(L[n_active, :n_active], L[n_active, :n_active]) diag = max(np.sqrt(np.abs(c - v)), eps) @@ -338,9 +338,9 @@ def lars_path(X, y, Xy=None, Gram=None, max_iter=500, break # least squares solution - least_squares, info = solve_cholesky(L[:n_active, :n_active], - sign_active[:n_active], - lower=True) + least_squares, _ = solve_cholesky(L[:n_active, :n_active], + sign_active[:n_active], + lower=True) if least_squares.size == 1 and least_squares == 0: # This happens because sign_active[:n_active] = 0 @@ -356,7 +356,7 @@ def lars_path(X, y, Xy=None, Gram=None, max_iter=500, L_ = L[:n_active, :n_active].copy() while not np.isfinite(AA): L_.flat[::n_active + 1] += (2 ** i) * eps - least_squares, info = solve_cholesky( + least_squares, _ = solve_cholesky( L_, sign_active[:n_active], lower=True) tmp = max(np.sum(least_squares * sign_active[:n_active]), eps) @@ -427,8 +427,8 @@ def lars_path(X, y, Xy=None, Gram=None, max_iter=500, if drop and method == 'lasso': # handle the case when idx is not length of 1 - [arrayfuncs.cholesky_delete(L[:n_active, :n_active], ii) for ii in - idx] + for ii in idx: + arrayfuncs.cholesky_delete(L[:n_active, :n_active], ii) n_active -= 1 m, n = idx, n_active @@ -606,7 +606,8 @@ def __init__(self, fit_intercept=True, verbose=False, normalize=True, self.copy_X = copy_X self.fit_path = fit_path - def _get_gram(self, precompute, X, y): + @staticmethod + def _get_gram(precompute, X, y): if (not hasattr(precompute, '__array__')) and ( (precompute is True) or (precompute == 'auto' and X.shape[0] > X.shape[1]) or @@ -1135,7 +1136,7 @@ def fit(self, X, y): all_alphas = all_alphas[::stride] mse_path = np.empty((len(all_alphas), len(cv_paths))) - for index, (alphas, active, coefs, residues) in enumerate(cv_paths): + for index, (alphas, _, _, residues) in enumerate(cv_paths): alphas = alphas[::-1] residues = residues[::-1] if alphas[0] != 0: @@ -1481,7 +1482,7 @@ def fit(self, X, y, copy_X=True): Gram = self.precompute - alphas_, active_, coef_path_, self.n_iter_ = lars_path( + alphas_, _, coef_path_, self.n_iter_ = lars_path( X, y, Gram=Gram, copy_X=copy_X, copy_Gram=True, alpha_min=0.0, method='lasso', verbose=self.verbose, max_iter=max_iter, eps=self.eps, return_n_iter=True, positive=self.positive) diff --git a/sklearn/linear_model/tests/test_least_angle.py b/sklearn/linear_model/tests/test_least_angle.py index 8545ecd988399..e9a1934948a0d 100644 --- a/sklearn/linear_model/tests/test_least_angle.py +++ b/sklearn/linear_model/tests/test_least_angle.py @@ -3,9 +3,8 @@ from distutils.version import LooseVersion import numpy as np -from scipy import linalg - import pytest +from scipy import linalg from sklearn.model_selection import train_test_split from sklearn.utils.testing import assert_equal @@ -22,10 +21,12 @@ from sklearn import linear_model, datasets from sklearn.linear_model.least_angle import _lars_path_residues +# TODO: use another dataset that has multiple drops diabetes = datasets.load_diabetes() X, y = diabetes.data, diabetes.target - -# TODO: use another dataset that has multiple drops +G = np.dot(X.T, X) +Xy = np.dot(X.T, y) +n_samples = y.size def test_simple(): @@ -38,12 +39,12 @@ def test_simple(): try: sys.stdout = StringIO() - alphas_, active, coef_path_ = linear_model.lars_path( - diabetes.data, diabetes.target, method="lar", verbose=10) + _, _, coef_path_ = linear_model.lars_path( + X, y, method='lar', verbose=10) sys.stdout = old_stdout - for (i, coef_) in enumerate(coef_path_.T): + for i, coef_ in enumerate(coef_path_.T): res = y - np.dot(X, coef_) cov = np.dot(X.T, res) C = np.max(abs(cov)) @@ -61,9 +62,8 @@ def test_simple(): def test_simple_precomputed(): # The same, with precomputed Gram matrix - G = np.dot(diabetes.data.T, diabetes.data) - alphas_, active, coef_path_ = linear_model.lars_path( - diabetes.data, diabetes.target, Gram=G, method="lar") + _, _, coef_path_ = linear_model.lars_path( + X, y, Gram=G, method='lar') for i, coef_ in enumerate(coef_path_.T): res = y - np.dot(X, coef_) @@ -80,10 +80,7 @@ def test_simple_precomputed(): def test_all_precomputed(): # Test that lars_path with precomputed Gram and Xy gives the right answer - X, y = diabetes.data, diabetes.target - G = np.dot(X.T, X) - Xy = np.dot(X.T, y) - for method in 'lar', 'lasso': + for method in ('lar', 'lasso'): output = linear_model.lars_path(X, y, method=method) output_pre = linear_model.lars_path(X, y, Gram=G, Xy=Xy, method=method) for expected, got in zip(output, output_pre): @@ -95,7 +92,7 @@ def test_all_precomputed(): def test_lars_lstsq(): # Test that Lars gives least square solution at the end # of the path - X1 = 3 * diabetes.data # use un-normalized dataset + X1 = 3 * X # use un-normalized dataset clf = linear_model.LassoLars(alpha=0.) clf.fit(X1, y) # Avoid FutureWarning about default value change when numpy >= 1.14 @@ -109,7 +106,7 @@ def test_lars_lstsq(): def test_lasso_gives_lstsq_solution(): # Test that Lars Lasso gives least square solution at the end # of the path - alphas_, active, coef_path_ = linear_model.lars_path(X, y, method="lasso") + _, _, coef_path_ = linear_model.lars_path(X, y, method='lasso') coef_lstsq = np.linalg.lstsq(X, y)[0] assert_array_almost_equal(coef_lstsq, coef_path_[:, -1]) @@ -122,8 +119,8 @@ def test_collinearity(): y = np.array([1., 0., 0]) rng = np.random.RandomState(0) - f = ignore_warnings - _, _, coef_path_ = f(linear_model.lars_path)(X, y, alpha_min=0.01) + _, _, coef_path_ = ignore_warnings(linear_model.lars_path)( + X, y, alpha_min=0.01) assert_true(not np.isnan(coef_path_).any()) residual = np.dot(X, coef_path_[:, -1]) - y assert_less((residual ** 2).sum(), 1.) # just make sure it's bounded @@ -140,11 +137,10 @@ def test_collinearity(): def test_no_path(): # Test that the ``return_path=False`` option returns the correct output - - alphas_, active_, coef_path_ = linear_model.lars_path( - diabetes.data, diabetes.target, method="lar") - alpha_, active, coef = linear_model.lars_path( - diabetes.data, diabetes.target, method="lar", return_path=False) + alphas_, _, coef_path_ = linear_model.lars_path( + X, y, method='lar') + alpha_, _, coef = linear_model.lars_path( + X, y, method='lar', return_path=False) assert_array_almost_equal(coef, coef_path_[:, -1]) assert_true(alpha_ == alphas_[-1]) @@ -152,14 +148,10 @@ def test_no_path(): def test_no_path_precomputed(): # Test that the ``return_path=False`` option with Gram remains correct - - G = np.dot(diabetes.data.T, diabetes.data) - - alphas_, active_, coef_path_ = linear_model.lars_path( - diabetes.data, diabetes.target, method="lar", Gram=G) - alpha_, active, coef = linear_model.lars_path( - diabetes.data, diabetes.target, method="lar", Gram=G, - return_path=False) + alphas_, _, coef_path_ = linear_model.lars_path( + X, y, method='lar', Gram=G) + alpha_, _, coef = linear_model.lars_path( + X, y, method='lar', Gram=G, return_path=False) assert_array_almost_equal(coef, coef_path_[:, -1]) assert_true(alpha_ == alphas_[-1]) @@ -172,25 +164,20 @@ def test_no_path_all_precomputed(): G = np.dot(X.T, X) Xy = np.dot(X.T, y) - alphas_, active_, coef_path_ = linear_model.lars_path( - X, y, method="lasso", Gram=G, Xy=Xy, alpha_min=0.9) - print("---") - alpha_, active, coef = linear_model.lars_path( - X, y, method="lasso", Gram=G, Xy=Xy, alpha_min=0.9, return_path=False) + alphas_, _, coef_path_ = linear_model.lars_path( + X, y, method='lasso', Xy=Xy, Gram=G, alpha_min=0.9) + alpha_, _, coef = linear_model.lars_path( + X, y, method='lasso', Gram=G, Xy=Xy, alpha_min=0.9, return_path=False) assert_array_almost_equal(coef, coef_path_[:, -1]) assert_true(alpha_ == alphas_[-1]) @pytest.mark.filterwarnings('ignore: You should specify a value') # 0.22 -@pytest.mark.parametrize( - 'classifier', - [linear_model.Lars, linear_model.LarsCV, linear_model.LassoLarsIC]) +@pytest.mark.parametrize('classifier', [linear_model.Lars, linear_model.LarsCV, + linear_model.LassoLarsIC]) def test_lars_precompute(classifier): # Check for different values of precompute - X, y = diabetes.data, diabetes.target - G = np.dot(X.T, X) - clf = classifier(precompute=G) output_1 = ignore_warnings(clf.fit)(X, y).coef_ for precompute in [True, False, 'auto', None]: @@ -203,7 +190,7 @@ def test_singular_matrix(): # Test when input is a singular matrix X1 = np.array([[1, 1.], [1., 1.]]) y1 = np.array([1, 1]) - alphas, active, coef_path = linear_model.lars_path(X1, y1) + _, _, coef_path = linear_model.lars_path(X1, y1) assert_array_almost_equal(coef_path.T, [[0, 0], [1, 0]]) @@ -212,14 +199,14 @@ def test_rank_deficient_design(): # deficient input data (with n_features < rank) in the same way # as coordinate descent Lasso y = [5, 0, 5] - for X in ([[5, 0], + for X in ( + [[5, 0], [0, 5], [10, 10]], - [[10, 10, 0], [1e-32, 0, 0], - [0, 0, 1]], - ): + [0, 0, 1]] + ): # To be able to use the coefs to compute the objective function, # we need to turn off normalization lars = linear_model.LassoLars(.1, normalize=False) @@ -234,7 +221,7 @@ def test_rank_deficient_design(): assert_less(obj_lars, obj_cd * (1. + 1e-8)) -def test_lasso_lars_vs_lasso_cd(verbose=False): +def test_lasso_lars_vs_lasso_cd(): # Test that LassoLars and Lasso using coordinate descent give the # same results. X = 3 * diabetes.data @@ -271,7 +258,7 @@ def test_lasso_lars_vs_lasso_cd(verbose=False): assert_less(error, 0.01) -def test_lasso_lars_vs_lasso_cd_early_stopping(verbose=False): +def test_lasso_lars_vs_lasso_cd_early_stopping(): # Test that LassoLars and Lasso using coordinate descent give the # same results when early stopping is used. # (test : before, in the middle, and in the last part of the path) @@ -393,8 +380,7 @@ def test_lars_n_nonzero_coefs(verbose=False): @ignore_warnings def test_multitarget(): # Assure that estimators receiving multidimensional y do the right thing - X = diabetes.data - Y = np.vstack([diabetes.target, diabetes.target ** 2]).T + Y = np.vstack([y, y ** 2]).T n_targets = Y.shape[1] estimators = [ linear_model.LassoLars(), @@ -439,10 +425,9 @@ def test_lars_cv(): @pytest.mark.filterwarnings('ignore::FutureWarning') def test_lars_cv_max_iter(): with warnings.catch_warnings(record=True) as w: - X = diabetes.data - y = diabetes.target rng = np.random.RandomState(42) x = rng.randn(len(y)) + X = diabetes.data X = np.c_[X, x, x] # add correlated features lars_cv = linear_model.LassoLarsCV(max_iter=5) lars_cv.fit(X, y) @@ -458,7 +443,6 @@ def test_lasso_lars_ic(): lars_aic = linear_model.LassoLarsIC('aic') rng = np.random.RandomState(42) X = diabetes.data - y = diabetes.target X = np.c_[X, rng.randn(X.shape[0], 5)] # add 5 bad features lars_bic.fit(X, y) lars_aic.fit(X, y) @@ -498,52 +482,43 @@ def test_lars_path_positive_constraint(): # Once deprecation of LAR + positive option is done use these: # assert_raises(ValueError, linear_model.lars_path, diabetes['data'], # diabetes['target'], method='lar', positive=True) - - with pytest.warns(DeprecationWarning, match="broken"): + with pytest.warns(DeprecationWarning, match='broken'): linear_model.lars_path(diabetes['data'], diabetes['target'], return_path=True, method='lar', positive=True) - method = 'lasso' - alpha, active, coefs = \ - linear_model.lars_path(diabetes['data'], diabetes['target'], - return_path=True, method=method, + _, _, coefs = \ + linear_model.lars_path(X, y, return_path=True, method=method, positive=False) assert_true(coefs.min() < 0) - alpha, active, coefs = \ - linear_model.lars_path(diabetes['data'], diabetes['target'], - return_path=True, method=method, + _, _, coefs = \ + linear_model.lars_path(X, y, return_path=True, method=method, positive=True) assert_true(coefs.min() >= 0) -# now we gonna test the positive option for all estimator classes - -default_parameter = {'fit_intercept': False} - -estimator_parameter_map = {'LassoLars': {'alpha': 0.1}, - 'LassoLarsCV': {}, - 'LassoLarsIC': {}} - - @pytest.mark.filterwarnings('ignore: You should specify a value') # 0.22 def test_estimatorclasses_positive_constraint(): # testing the transmissibility for the positive option of all estimator # classes in this same function here + default_parameter = {'fit_intercept': False} + estimator_parameter_map = {'LassoLars': {'alpha': 0.1}, + 'LassoLarsCV': {}, + 'LassoLarsIC': {}} for estname in estimator_parameter_map: params = default_parameter.copy() params.update(estimator_parameter_map[estname]) estimator = getattr(linear_model, estname)(positive=False, **params) - estimator.fit(diabetes['data'], diabetes['target']) + estimator.fit(X, y) assert_true(estimator.coef_.min() < 0) estimator = getattr(linear_model, estname)(positive=True, **params) - estimator.fit(diabetes['data'], diabetes['target']) + estimator.fit(X, y) assert_true(min(estimator.coef_) >= 0) -def test_lasso_lars_vs_lasso_cd_positive(verbose=False): +def test_lasso_lars_vs_lasso_cd_positive(): # Test that LassoLars and Lasso using coordinate descent give the # same results when using the positive option @@ -634,7 +609,7 @@ def test_lasso_lars_vs_R_implementation(): 0.025219751009936], [0, -3.577397088285891, -4.702795355871871, -7.016748621359461, -7.614898471899412, -0.336938391359179, - 0, 0, 0.001213370600853, 0.048162321585148], + 0, 0, 0.001213370600853, 0.048162321585148], [0, 0, 0, 2.231558436628169, 2.723267514525966, 2.811549786389614, 2.813766976061531, 2.817462468949557, 2.817368178703816, 2.816221090636795], From f0475532deb14c650c9ec8e2c925fb0e078f6904 Mon Sep 17 00:00:00 2001 From: Kuai Yu Date: Sat, 4 Aug 2018 15:27:49 -0400 Subject: [PATCH 3/5] Revert "style changes" This reverts commit 3617030424a56447a2de689fc3b3505ecca3dea8. --- sklearn/linear_model/least_angle.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/sklearn/linear_model/least_angle.py b/sklearn/linear_model/least_angle.py index c78e1a07507e0..8acd976fb1507 100644 --- a/sklearn/linear_model/least_angle.py +++ b/sklearn/linear_model/least_angle.py @@ -196,7 +196,7 @@ def lars_path(X, y, Xy=None, Gram=None, max_iter=500, if verbose: if verbose > 1: - print('Step\t\tAdded\t\tDropped\t\tActive set size\t\tC') + print("Step\t\tAdded\t\tDropped\t\tActive set size\t\tC") else: sys.stdout.write('.') sys.stdout.flush() @@ -320,7 +320,7 @@ def lars_path(X, y, Xy=None, Gram=None, max_iter=500, n_active += 1 if verbose > 1: - print('%s\t\t%s\t\t%s\t\t%s\t\t%s' % (n_iter, active[-1], '', + print("%s\t\t%s\t\t%s\t\t%s\t\t%s" % (n_iter, active[-1], '', n_active, C)) if method == 'lasso' and n_iter > 0 and prev_alpha[0] < alpha[0]: @@ -470,7 +470,7 @@ def lars_path(X, y, Xy=None, Gram=None, max_iter=500, sign_active = np.delete(sign_active, idx) sign_active = np.append(sign_active, 0.) # just to maintain size if verbose > 1: - print('%s\t\t%s\t\t%s\t\t%s\t\t%s' % (n_iter, '', drop_idx, + print("%s\t\t%s\t\t%s\t\t%s\t\t%s" % (n_iter, '', drop_idx, n_active, abs(temp))) if return_path: @@ -1116,8 +1116,8 @@ def fit(self, X, y): # As we use cross-validation, the Gram matrix is not precomputed here Gram = self.precompute if hasattr(Gram, '__array__'): - warnings.warn('Parameter "precompute" cannot be an array in ' - '%s. Automatically switch to "auto" instead.' + warnings.warn("Parameter 'precompute' cannot be an array in " + "%s. Automatically switch to 'auto' instead." % self.__class__.__name__) Gram = 'auto' @@ -1171,8 +1171,8 @@ def fit(self, X, y): return self @property - @deprecated('Attribute alpha is deprecated in 0.19 and ' - 'will be removed in 0.21. See ``alpha_`` instead') + @deprecated("Attribute alpha is deprecated in 0.19 and " + "will be removed in 0.21. See ``alpha_`` instead") def alpha(self): # impedance matching for the above Lars.fit (should not be documented) return self.alpha_ From 386400afa5f0aa289b6e3aeed4d20fba06d34729 Mon Sep 17 00:00:00 2001 From: Kuai Yu Date: Sat, 4 Aug 2018 15:27:59 -0400 Subject: [PATCH 4/5] Revert "style fixes" This reverts commit 709afd04db08c5f5c2b89fc79cf88dbc521e542f. --- sklearn/linear_model/least_angle.py | 23 ++-- .../linear_model/tests/test_least_angle.py | 127 +++++++++++------- 2 files changed, 87 insertions(+), 63 deletions(-) diff --git a/sklearn/linear_model/least_angle.py b/sklearn/linear_model/least_angle.py index 8acd976fb1507..3acb308cf3b92 100644 --- a/sklearn/linear_model/least_angle.py +++ b/sklearn/linear_model/least_angle.py @@ -27,7 +27,7 @@ from ..externals.six.moves import xrange from ..externals.six import string_types -SOLVE_TRIANGULAR_ARGS = {'check_finite': False} +solve_triangular_args = {'check_finite': False} def lars_path(X, y, Xy=None, Gram=None, max_iter=500, @@ -285,7 +285,7 @@ def lars_path(X, y, Xy=None, Gram=None, max_iter=500, L[n_active, :n_active], trans=0, lower=1, overwrite_b=True, - **SOLVE_TRIANGULAR_ARGS) + **solve_triangular_args) v = np.dot(L[n_active, :n_active], L[n_active, :n_active]) diag = max(np.sqrt(np.abs(c - v)), eps) @@ -338,9 +338,9 @@ def lars_path(X, y, Xy=None, Gram=None, max_iter=500, break # least squares solution - least_squares, _ = solve_cholesky(L[:n_active, :n_active], - sign_active[:n_active], - lower=True) + least_squares, info = solve_cholesky(L[:n_active, :n_active], + sign_active[:n_active], + lower=True) if least_squares.size == 1 and least_squares == 0: # This happens because sign_active[:n_active] = 0 @@ -356,7 +356,7 @@ def lars_path(X, y, Xy=None, Gram=None, max_iter=500, L_ = L[:n_active, :n_active].copy() while not np.isfinite(AA): L_.flat[::n_active + 1] += (2 ** i) * eps - least_squares, _ = solve_cholesky( + least_squares, info = solve_cholesky( L_, sign_active[:n_active], lower=True) tmp = max(np.sum(least_squares * sign_active[:n_active]), eps) @@ -427,8 +427,8 @@ def lars_path(X, y, Xy=None, Gram=None, max_iter=500, if drop and method == 'lasso': # handle the case when idx is not length of 1 - for ii in idx: - arrayfuncs.cholesky_delete(L[:n_active, :n_active], ii) + [arrayfuncs.cholesky_delete(L[:n_active, :n_active], ii) for ii in + idx] n_active -= 1 m, n = idx, n_active @@ -606,8 +606,7 @@ def __init__(self, fit_intercept=True, verbose=False, normalize=True, self.copy_X = copy_X self.fit_path = fit_path - @staticmethod - def _get_gram(precompute, X, y): + def _get_gram(self, precompute, X, y): if (not hasattr(precompute, '__array__')) and ( (precompute is True) or (precompute == 'auto' and X.shape[0] > X.shape[1]) or @@ -1136,7 +1135,7 @@ def fit(self, X, y): all_alphas = all_alphas[::stride] mse_path = np.empty((len(all_alphas), len(cv_paths))) - for index, (alphas, _, _, residues) in enumerate(cv_paths): + for index, (alphas, active, coefs, residues) in enumerate(cv_paths): alphas = alphas[::-1] residues = residues[::-1] if alphas[0] != 0: @@ -1482,7 +1481,7 @@ def fit(self, X, y, copy_X=True): Gram = self.precompute - alphas_, _, coef_path_, self.n_iter_ = lars_path( + alphas_, active_, coef_path_, self.n_iter_ = lars_path( X, y, Gram=Gram, copy_X=copy_X, copy_Gram=True, alpha_min=0.0, method='lasso', verbose=self.verbose, max_iter=max_iter, eps=self.eps, return_n_iter=True, positive=self.positive) diff --git a/sklearn/linear_model/tests/test_least_angle.py b/sklearn/linear_model/tests/test_least_angle.py index e9a1934948a0d..8545ecd988399 100644 --- a/sklearn/linear_model/tests/test_least_angle.py +++ b/sklearn/linear_model/tests/test_least_angle.py @@ -3,9 +3,10 @@ from distutils.version import LooseVersion import numpy as np -import pytest from scipy import linalg +import pytest + from sklearn.model_selection import train_test_split from sklearn.utils.testing import assert_equal from sklearn.utils.testing import assert_array_almost_equal @@ -21,12 +22,10 @@ from sklearn import linear_model, datasets from sklearn.linear_model.least_angle import _lars_path_residues -# TODO: use another dataset that has multiple drops diabetes = datasets.load_diabetes() X, y = diabetes.data, diabetes.target -G = np.dot(X.T, X) -Xy = np.dot(X.T, y) -n_samples = y.size + +# TODO: use another dataset that has multiple drops def test_simple(): @@ -39,12 +38,12 @@ def test_simple(): try: sys.stdout = StringIO() - _, _, coef_path_ = linear_model.lars_path( - X, y, method='lar', verbose=10) + alphas_, active, coef_path_ = linear_model.lars_path( + diabetes.data, diabetes.target, method="lar", verbose=10) sys.stdout = old_stdout - for i, coef_ in enumerate(coef_path_.T): + for (i, coef_) in enumerate(coef_path_.T): res = y - np.dot(X, coef_) cov = np.dot(X.T, res) C = np.max(abs(cov)) @@ -62,8 +61,9 @@ def test_simple(): def test_simple_precomputed(): # The same, with precomputed Gram matrix - _, _, coef_path_ = linear_model.lars_path( - X, y, Gram=G, method='lar') + G = np.dot(diabetes.data.T, diabetes.data) + alphas_, active, coef_path_ = linear_model.lars_path( + diabetes.data, diabetes.target, Gram=G, method="lar") for i, coef_ in enumerate(coef_path_.T): res = y - np.dot(X, coef_) @@ -80,7 +80,10 @@ def test_simple_precomputed(): def test_all_precomputed(): # Test that lars_path with precomputed Gram and Xy gives the right answer - for method in ('lar', 'lasso'): + X, y = diabetes.data, diabetes.target + G = np.dot(X.T, X) + Xy = np.dot(X.T, y) + for method in 'lar', 'lasso': output = linear_model.lars_path(X, y, method=method) output_pre = linear_model.lars_path(X, y, Gram=G, Xy=Xy, method=method) for expected, got in zip(output, output_pre): @@ -92,7 +95,7 @@ def test_all_precomputed(): def test_lars_lstsq(): # Test that Lars gives least square solution at the end # of the path - X1 = 3 * X # use un-normalized dataset + X1 = 3 * diabetes.data # use un-normalized dataset clf = linear_model.LassoLars(alpha=0.) clf.fit(X1, y) # Avoid FutureWarning about default value change when numpy >= 1.14 @@ -106,7 +109,7 @@ def test_lars_lstsq(): def test_lasso_gives_lstsq_solution(): # Test that Lars Lasso gives least square solution at the end # of the path - _, _, coef_path_ = linear_model.lars_path(X, y, method='lasso') + alphas_, active, coef_path_ = linear_model.lars_path(X, y, method="lasso") coef_lstsq = np.linalg.lstsq(X, y)[0] assert_array_almost_equal(coef_lstsq, coef_path_[:, -1]) @@ -119,8 +122,8 @@ def test_collinearity(): y = np.array([1., 0., 0]) rng = np.random.RandomState(0) - _, _, coef_path_ = ignore_warnings(linear_model.lars_path)( - X, y, alpha_min=0.01) + f = ignore_warnings + _, _, coef_path_ = f(linear_model.lars_path)(X, y, alpha_min=0.01) assert_true(not np.isnan(coef_path_).any()) residual = np.dot(X, coef_path_[:, -1]) - y assert_less((residual ** 2).sum(), 1.) # just make sure it's bounded @@ -137,10 +140,11 @@ def test_collinearity(): def test_no_path(): # Test that the ``return_path=False`` option returns the correct output - alphas_, _, coef_path_ = linear_model.lars_path( - X, y, method='lar') - alpha_, _, coef = linear_model.lars_path( - X, y, method='lar', return_path=False) + + alphas_, active_, coef_path_ = linear_model.lars_path( + diabetes.data, diabetes.target, method="lar") + alpha_, active, coef = linear_model.lars_path( + diabetes.data, diabetes.target, method="lar", return_path=False) assert_array_almost_equal(coef, coef_path_[:, -1]) assert_true(alpha_ == alphas_[-1]) @@ -148,10 +152,14 @@ def test_no_path(): def test_no_path_precomputed(): # Test that the ``return_path=False`` option with Gram remains correct - alphas_, _, coef_path_ = linear_model.lars_path( - X, y, method='lar', Gram=G) - alpha_, _, coef = linear_model.lars_path( - X, y, method='lar', Gram=G, return_path=False) + + G = np.dot(diabetes.data.T, diabetes.data) + + alphas_, active_, coef_path_ = linear_model.lars_path( + diabetes.data, diabetes.target, method="lar", Gram=G) + alpha_, active, coef = linear_model.lars_path( + diabetes.data, diabetes.target, method="lar", Gram=G, + return_path=False) assert_array_almost_equal(coef, coef_path_[:, -1]) assert_true(alpha_ == alphas_[-1]) @@ -164,20 +172,25 @@ def test_no_path_all_precomputed(): G = np.dot(X.T, X) Xy = np.dot(X.T, y) - alphas_, _, coef_path_ = linear_model.lars_path( - X, y, method='lasso', Xy=Xy, Gram=G, alpha_min=0.9) - alpha_, _, coef = linear_model.lars_path( - X, y, method='lasso', Gram=G, Xy=Xy, alpha_min=0.9, return_path=False) + alphas_, active_, coef_path_ = linear_model.lars_path( + X, y, method="lasso", Gram=G, Xy=Xy, alpha_min=0.9) + print("---") + alpha_, active, coef = linear_model.lars_path( + X, y, method="lasso", Gram=G, Xy=Xy, alpha_min=0.9, return_path=False) assert_array_almost_equal(coef, coef_path_[:, -1]) assert_true(alpha_ == alphas_[-1]) @pytest.mark.filterwarnings('ignore: You should specify a value') # 0.22 -@pytest.mark.parametrize('classifier', [linear_model.Lars, linear_model.LarsCV, - linear_model.LassoLarsIC]) +@pytest.mark.parametrize( + 'classifier', + [linear_model.Lars, linear_model.LarsCV, linear_model.LassoLarsIC]) def test_lars_precompute(classifier): # Check for different values of precompute + X, y = diabetes.data, diabetes.target + G = np.dot(X.T, X) + clf = classifier(precompute=G) output_1 = ignore_warnings(clf.fit)(X, y).coef_ for precompute in [True, False, 'auto', None]: @@ -190,7 +203,7 @@ def test_singular_matrix(): # Test when input is a singular matrix X1 = np.array([[1, 1.], [1., 1.]]) y1 = np.array([1, 1]) - _, _, coef_path = linear_model.lars_path(X1, y1) + alphas, active, coef_path = linear_model.lars_path(X1, y1) assert_array_almost_equal(coef_path.T, [[0, 0], [1, 0]]) @@ -199,14 +212,14 @@ def test_rank_deficient_design(): # deficient input data (with n_features < rank) in the same way # as coordinate descent Lasso y = [5, 0, 5] - for X in ( - [[5, 0], + for X in ([[5, 0], [0, 5], [10, 10]], + [[10, 10, 0], [1e-32, 0, 0], - [0, 0, 1]] - ): + [0, 0, 1]], + ): # To be able to use the coefs to compute the objective function, # we need to turn off normalization lars = linear_model.LassoLars(.1, normalize=False) @@ -221,7 +234,7 @@ def test_rank_deficient_design(): assert_less(obj_lars, obj_cd * (1. + 1e-8)) -def test_lasso_lars_vs_lasso_cd(): +def test_lasso_lars_vs_lasso_cd(verbose=False): # Test that LassoLars and Lasso using coordinate descent give the # same results. X = 3 * diabetes.data @@ -258,7 +271,7 @@ def test_lasso_lars_vs_lasso_cd(): assert_less(error, 0.01) -def test_lasso_lars_vs_lasso_cd_early_stopping(): +def test_lasso_lars_vs_lasso_cd_early_stopping(verbose=False): # Test that LassoLars and Lasso using coordinate descent give the # same results when early stopping is used. # (test : before, in the middle, and in the last part of the path) @@ -380,7 +393,8 @@ def test_lars_n_nonzero_coefs(verbose=False): @ignore_warnings def test_multitarget(): # Assure that estimators receiving multidimensional y do the right thing - Y = np.vstack([y, y ** 2]).T + X = diabetes.data + Y = np.vstack([diabetes.target, diabetes.target ** 2]).T n_targets = Y.shape[1] estimators = [ linear_model.LassoLars(), @@ -425,9 +439,10 @@ def test_lars_cv(): @pytest.mark.filterwarnings('ignore::FutureWarning') def test_lars_cv_max_iter(): with warnings.catch_warnings(record=True) as w: + X = diabetes.data + y = diabetes.target rng = np.random.RandomState(42) x = rng.randn(len(y)) - X = diabetes.data X = np.c_[X, x, x] # add correlated features lars_cv = linear_model.LassoLarsCV(max_iter=5) lars_cv.fit(X, y) @@ -443,6 +458,7 @@ def test_lasso_lars_ic(): lars_aic = linear_model.LassoLarsIC('aic') rng = np.random.RandomState(42) X = diabetes.data + y = diabetes.target X = np.c_[X, rng.randn(X.shape[0], 5)] # add 5 bad features lars_bic.fit(X, y) lars_aic.fit(X, y) @@ -482,43 +498,52 @@ def test_lars_path_positive_constraint(): # Once deprecation of LAR + positive option is done use these: # assert_raises(ValueError, linear_model.lars_path, diabetes['data'], # diabetes['target'], method='lar', positive=True) - with pytest.warns(DeprecationWarning, match='broken'): + + with pytest.warns(DeprecationWarning, match="broken"): linear_model.lars_path(diabetes['data'], diabetes['target'], return_path=True, method='lar', positive=True) + method = 'lasso' - _, _, coefs = \ - linear_model.lars_path(X, y, return_path=True, method=method, + alpha, active, coefs = \ + linear_model.lars_path(diabetes['data'], diabetes['target'], + return_path=True, method=method, positive=False) assert_true(coefs.min() < 0) - _, _, coefs = \ - linear_model.lars_path(X, y, return_path=True, method=method, + alpha, active, coefs = \ + linear_model.lars_path(diabetes['data'], diabetes['target'], + return_path=True, method=method, positive=True) assert_true(coefs.min() >= 0) +# now we gonna test the positive option for all estimator classes + +default_parameter = {'fit_intercept': False} + +estimator_parameter_map = {'LassoLars': {'alpha': 0.1}, + 'LassoLarsCV': {}, + 'LassoLarsIC': {}} + + @pytest.mark.filterwarnings('ignore: You should specify a value') # 0.22 def test_estimatorclasses_positive_constraint(): # testing the transmissibility for the positive option of all estimator # classes in this same function here - default_parameter = {'fit_intercept': False} - estimator_parameter_map = {'LassoLars': {'alpha': 0.1}, - 'LassoLarsCV': {}, - 'LassoLarsIC': {}} for estname in estimator_parameter_map: params = default_parameter.copy() params.update(estimator_parameter_map[estname]) estimator = getattr(linear_model, estname)(positive=False, **params) - estimator.fit(X, y) + estimator.fit(diabetes['data'], diabetes['target']) assert_true(estimator.coef_.min() < 0) estimator = getattr(linear_model, estname)(positive=True, **params) - estimator.fit(X, y) + estimator.fit(diabetes['data'], diabetes['target']) assert_true(min(estimator.coef_) >= 0) -def test_lasso_lars_vs_lasso_cd_positive(): +def test_lasso_lars_vs_lasso_cd_positive(verbose=False): # Test that LassoLars and Lasso using coordinate descent give the # same results when using the positive option @@ -609,7 +634,7 @@ def test_lasso_lars_vs_R_implementation(): 0.025219751009936], [0, -3.577397088285891, -4.702795355871871, -7.016748621359461, -7.614898471899412, -0.336938391359179, - 0, 0, 0.001213370600853, 0.048162321585148], + 0, 0, 0.001213370600853, 0.048162321585148], [0, 0, 0, 2.231558436628169, 2.723267514525966, 2.811549786389614, 2.813766976061531, 2.817462468949557, 2.817368178703816, 2.816221090636795], From 6aad0aaa8105b6b521442d8e9a5a1a8b72673109 Mon Sep 17 00:00:00 2001 From: Kuai Yu Date: Sat, 4 Aug 2018 15:31:14 -0400 Subject: [PATCH 5/5] PR Feedbac --- sklearn/linear_model/least_angle.py | 3 ++- sklearn/linear_model/tests/test_least_angle.py | 2 -- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/sklearn/linear_model/least_angle.py b/sklearn/linear_model/least_angle.py index 3acb308cf3b92..dd47030308d40 100644 --- a/sklearn/linear_model/least_angle.py +++ b/sklearn/linear_model/least_angle.py @@ -606,7 +606,8 @@ def __init__(self, fit_intercept=True, verbose=False, normalize=True, self.copy_X = copy_X self.fit_path = fit_path - def _get_gram(self, precompute, X, y): + @staticmethod + def _get_gram(precompute, X, y): if (not hasattr(precompute, '__array__')) and ( (precompute is True) or (precompute == 'auto' and X.shape[0] > X.shape[1]) or diff --git a/sklearn/linear_model/tests/test_least_angle.py b/sklearn/linear_model/tests/test_least_angle.py index 8545ecd988399..9c9a883f96383 100644 --- a/sklearn/linear_model/tests/test_least_angle.py +++ b/sklearn/linear_model/tests/test_least_angle.py @@ -80,7 +80,6 @@ def test_simple_precomputed(): def test_all_precomputed(): # Test that lars_path with precomputed Gram and Xy gives the right answer - X, y = diabetes.data, diabetes.target G = np.dot(X.T, X) Xy = np.dot(X.T, y) for method in 'lar', 'lasso': @@ -188,7 +187,6 @@ def test_no_path_all_precomputed(): [linear_model.Lars, linear_model.LarsCV, linear_model.LassoLarsIC]) def test_lars_precompute(classifier): # Check for different values of precompute - X, y = diabetes.data, diabetes.target G = np.dot(X.T, X) clf = classifier(precompute=G)