Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit a40b576

Browse files
larsmansFabian Pedregosa
authored and
Fabian Pedregosa
committed
refactor linear models to call as_float_array only from _center_data
coordinate_descent is the exception, since it plays tricks with X is X_init that I don't fully understand yet.
1 parent 3e3872c commit a40b576

File tree

6 files changed

+23
-31
lines changed

6 files changed

+23
-31
lines changed

sklearn/linear_model/base.py

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,7 @@
2020
from ..base import TransformerMixin
2121
from .sgd_fast import Hinge, Log, ModifiedHuber, SquaredLoss, Huber
2222
from ..utils.extmath import safe_sparse_dot
23-
from ..utils import safe_asanyarray
24-
from ..utils import as_float_array
23+
from ..utils import as_float_array, safe_asanyarray
2524

2625

2726
###
@@ -52,15 +51,16 @@ def predict(self, X):
5251
return safe_sparse_dot(X, self.coef_.T) + self.intercept_
5352

5453
@staticmethod
55-
def _center_data(X, y, fit_intercept, normalize=False):
54+
def _center_data(X, y, fit_intercept, normalize=False, overwrite_X=False):
5655
"""
5756
Centers data to have mean zero along axis 0. This is here because
5857
nearly all linear models will want their data to be centered.
5958
60-
WARNING : This function modifies X inplace :
61-
Use sklearn.utils.as_float_array before to convert X to np.float.
62-
You can specify an argument overwrite_X (default is False).
59+
If overwrite_X is True, modifies X in-place.
6360
"""
61+
if not overwrite_X:
62+
X = as_float_array(X, overwrite_X)
63+
6464
if fit_intercept:
6565
if sp.issparse(X):
6666
X_mean = np.zeros(X.shape[1])
@@ -140,10 +140,8 @@ def fit(self, X, y):
140140
X = np.asanyarray(X)
141141
y = np.asanyarray(y)
142142

143-
X = as_float_array(X, self.overwrite_X)
144-
145143
X, y, X_mean, y_mean, X_std = self._center_data(X, y,
146-
self.fit_intercept, self.normalize)
144+
self.fit_intercept, self.normalize, self.overwrite_X)
147145

148146
self.coef_, self.residues_, self.rank_, self.singular_ = \
149147
np.linalg.lstsq(X, y)

sklearn/linear_model/bayes.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
from scipy import linalg
1111

1212
from .base import LinearModel
13-
from ..utils import as_float_array
1413
from ..utils.extmath import fast_logdet
1514

1615

@@ -147,9 +146,8 @@ def fit(self, X, y):
147146
"""
148147
X = np.asanyarray(X, dtype=np.float)
149148
y = np.asanyarray(y, dtype=np.float)
150-
X = as_float_array(X, self.overwrite_X)
151149
X, y, X_mean, y_mean, X_std = self._center_data(X, y,
152-
self.fit_intercept, self.normalize)
150+
self.fit_intercept, self.normalize, self.overwrite_X)
153151
n_samples, n_features = X.shape
154152

155153
### Initialization of the values of the parameters
@@ -378,10 +376,8 @@ def fit(self, X, y):
378376
n_samples, n_features = X.shape
379377
coef_ = np.zeros(n_features)
380378

381-
X = as_float_array(X, self.overwrite_X)
382-
383379
X, y, X_mean, y_mean, X_std = self._center_data(X, y, self.fit_intercept,
384-
self.normalize)
380+
self.normalize, self.overwrite_X)
385381

386382
### Launch the convergence loop
387383
keep_lambda = np.ones(n_features, dtype=bool)

sklearn/linear_model/coordinate_descent.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,8 @@ def fit(self, X, y, Xy=None, coef_init=None):
128128
X_init = X
129129
X, y, X_mean, y_mean, X_std = self._center_data(X, y,
130130
self.fit_intercept,
131-
self.normalize)
131+
self.normalize,
132+
overwrite_X=True)
132133
precompute = self.precompute
133134
if X_init is not X and hasattr(precompute, '__array__'):
134135
precompute = 'auto' # recompute Gram
@@ -399,7 +400,8 @@ def enet_path(X, y, rho=0.5, eps=1e-3, n_alphas=100, alphas=None,
399400
X_init = X
400401
X, y, X_mean, y_mean, X_std = LinearModel._center_data(X, y,
401402
fit_intercept,
402-
normalize)
403+
normalize,
404+
overwrite_X=True)
403405
X = np.asfortranarray(X) # make data contiguous in memory
404406
n_samples, n_features = X.shape
405407

sklearn/linear_model/least_angle.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
from scipy.linalg.lapack import get_lapack_funcs
1616

1717
from .base import LinearModel
18-
from ..utils import arrayfuncs, as_float_array
18+
from ..utils import arrayfuncs
1919
from ..utils import deprecated
2020
from ..cross_validation import check_cv
2121
from ..externals.joblib import Parallel, delayed
@@ -407,11 +407,10 @@ def fit(self, X, y, overwrite_X=False):
407407
X = np.atleast_2d(X)
408408
y = np.atleast_1d(y)
409409

410-
X = as_float_array(X, self.overwrite_X)
411-
412410
X, y, X_mean, y_mean, X_std = self._center_data(X, y,
413411
self.fit_intercept,
414-
self.normalize)
412+
self.normalize,
413+
self.overwrite_X)
415414
alpha = getattr(self, 'alpha', 0.)
416415
if hasattr(self, 'n_nonzero_coefs'):
417416
alpha = 0. # n_nonzero_coefs parametrization takes priority
@@ -951,10 +950,10 @@ def fit(self, X, y, overwrite_X=False):
951950
X = np.atleast_2d(X)
952951
y = np.atleast_1d(y)
953952

954-
X = as_float_array(X, self.overwrite_X)
955953
X, y, Xmean, ymean, Xstd = LinearModel._center_data(X, y,
956954
self.fit_intercept,
957-
normalize=self.normalize)
955+
self.normalize,
956+
self.overwrite_X)
958957
max_iter = self.max_iter
959958

960959
Gram = self._get_gram()

sklearn/linear_model/omp.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313

1414
from .base import LinearModel
1515
from ..utils.arrayfuncs import solve_triangular
16-
from ..utils import as_float_array
1716

1817
premature = """ Orthogonal matching pursuit ended prematurely due to linear
1918
dependence in the dictionary. The requested precision might not have been met.
@@ -506,11 +505,11 @@ def fit(self, X, y, Gram=None, Xy=None):
506505
X = np.atleast_2d(X)
507506
y = np.atleast_1d(y)
508507
n_features = X.shape[1]
509-
X = as_float_array(X, self.overwrite_X)
510508

511509
X, y, X_mean, y_mean, X_std = self._center_data(X, y,
512510
self.fit_intercept,
513-
self.normalize)
511+
self.normalize,
512+
self.overwrite_X)
514513

515514
if self.n_nonzero_coefs == None and self.tol is None:
516515
self.n_nonzero_coefs = int(0.1 * n_features)

sklearn/linear_model/ridge.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
from .base import LinearModel
1111
from ..utils.extmath import safe_sparse_dot
12-
from ..utils import safe_asanyarray, as_float_array
12+
from ..utils import safe_asanyarray
1313
from ..preprocessing import LabelBinarizer
1414
from ..grid_search import GridSearchCV
1515

@@ -204,11 +204,10 @@ def fit(self, X, y, sample_weight=1.0, solver='auto'):
204204
"""
205205
X = safe_asanyarray(X, dtype=np.float)
206206
y = np.asanyarray(y, dtype=np.float)
207-
X = as_float_array(X, self.overwrite_X)
208207

209208
X, y, X_mean, y_mean, X_std = \
210209
self._center_data(X, y, self.fit_intercept,
211-
self.normalize)
210+
self.normalize, self.overwrite_X)
212211

213212
self.coef_ = ridge_regression(X, y, self.alpha, sample_weight,
214213
solver, self.tol)
@@ -400,10 +399,9 @@ def fit(self, X, y, sample_weight=1.0):
400399
y = np.asanyarray(y, dtype=np.float)
401400

402401
n_samples = X.shape[0]
403-
X = as_float_array(X, self.overwrite_X)
404402

405403
X, y, X_mean, y_mean, X_std = LinearModel._center_data(X, y,
406-
self.fit_intercept, self.normalize)
404+
self.fit_intercept, self.normalize, self.overwrite_X)
407405

408406
K, v, Q = self._pre_compute(X, y)
409407
n_y = 1 if len(y.shape) == 1 else y.shape[1]

0 commit comments

Comments
 (0)