Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit b2e6dcb

Browse files
author
giorgiop
committed
tests with WLS
1 parent 4ace210 commit b2e6dcb

File tree

2 files changed

+48
-41
lines changed

2 files changed

+48
-41
lines changed

sklearn/linear_model/tests/test_base.py

Lines changed: 5 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
import numpy as np
77
from scipy import sparse
8+
from scipy import linalg
89

910
from sklearn.utils.testing import assert_array_almost_equal
1011
from sklearn.utils.testing import assert_almost_equal
@@ -55,25 +56,15 @@ def test_linear_regression_sample_weights():
5556

5657
for intercept in (True, False):
5758

59+
# LinearRegression with explicit sample_weight
5860
reg = LinearRegression(fit_intercept=intercept)
5961
reg.fit(X, y, sample_weight=sample_weight)
6062
coefs1 = reg.coef_
6163
inter1 = reg.intercept_
6264

63-
assert_equal(reg.coef_.shape, (X.shape[1], ))
65+
assert_equal(reg.coef_.shape, (X.shape[1], )) # sanity checks
6466
assert_greater(reg.score(X, y), 0.5)
6567

66-
# Sample weight can be implemented via a simple rescaling
67-
# for the square loss.
68-
scaled_y = y * np.sqrt(sample_weight)
69-
scaled_X = X * np.sqrt(sample_weight)[:, np.newaxis]
70-
reg.fit(scaled_X, scaled_y)
71-
coefs2 = reg.coef_
72-
inter2 = reg.intercept_
73-
74-
# assert_array_almost_equal(coefs1, coefs2)
75-
# assert_almost_equal(inter1, inter2)
76-
7768
# Closed form of the weighted least square
7869
# theta = (X^T W X)^(-1) * X^T W y
7970
W = np.diag(sample_weight)
@@ -82,8 +73,8 @@ def test_linear_regression_sample_weights():
8273
else:
8374
X_aug = np.column_stack((np.ones(n_samples), X))
8475

85-
coefs3 = np.linalg.pinv(X_aug.T.dot(W).dot(X_aug)
86-
).dot(X_aug.T).dot(W).dot(y)
76+
coefs3 = linalg.pinv(X_aug.T.dot(W).dot(X_aug)
77+
).dot(X_aug.T).dot(W).dot(y)
8778

8879
if intercept is False:
8980
assert_array_almost_equal(coefs1, coefs3)

sklearn/linear_model/tests/test_ridge.py

Lines changed: 43 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import numpy as np
22
import scipy.sparse as sp
33
from scipy import linalg
4+
from itertools import product
45

56
from sklearn.utils.testing import assert_true
67
from sklearn.utils.testing import assert_almost_equal
@@ -111,7 +112,7 @@ def test_ridge_singular():
111112
assert_greater(ridge.score(X, y), 0.9)
112113

113114

114-
def test_ridge_sample_weights():
115+
def test_ridge_regression_sample_weights():
115116
rng = np.random.RandomState(0)
116117

117118
for solver in ("cholesky", ):
@@ -125,6 +126,7 @@ def test_ridge_sample_weights():
125126
alpha=alpha,
126127
sample_weight=sample_weight,
127128
solver=solver)
129+
128130
# Sample weight can be implemented via a simple rescaling
129131
# for the square loss.
130132
coefs2 = ridge_regression(
@@ -133,32 +135,46 @@ def test_ridge_sample_weights():
133135
alpha=alpha, solver=solver)
134136
assert_array_almost_equal(coefs, coefs2)
135137

136-
# Test for fit_intercept = True
137-
est = Ridge(alpha=alpha, solver=solver)
138-
est.fit(X, y, sample_weight=sample_weight)
139-
140-
# Check using Newton's Method
141-
# Quadratic function should be solved in a single step.
142-
# Initialize
143-
sample_weight = np.sqrt(sample_weight)
144-
X_weighted = sample_weight[:, np.newaxis] * (
145-
np.column_stack((np.ones(n_samples), X)))
146-
y_weighted = y * sample_weight
147-
148-
# Gradient is (X*coef-y)*X + alpha*coef_[1:]
149-
# Remove coef since it is initialized to zero.
150-
grad = -np.dot(y_weighted, X_weighted)
151-
152-
# Hessian is (X.T*X) + alpha*I except that the first
153-
# diagonal element should be zero, since there is no
154-
# penalization of intercept.
155-
diag = alpha * np.ones(n_features + 1)
156-
diag[0] = 0.
157-
hess = np.dot(X_weighted.T, X_weighted)
158-
hess.flat[::n_features + 2] += diag
159-
coef_ = - np.dot(linalg.inv(hess), grad)
160-
assert_almost_equal(coef_[0], est.intercept_)
161-
assert_array_almost_equal(coef_[1:], est.coef_)
138+
139+
def test_ridge_sample_weights():
140+
rng = np.random.RandomState(0)
141+
param_grid = product((1.0, 1e-2), (True, False),
142+
('svd', 'cholesky', 'lsqr', 'sparse_cg'))
143+
144+
for n_samples, n_features in ((6, 5), (5, 10)):
145+
146+
y = rng.randn(n_samples)
147+
X = rng.randn(n_samples, n_features)
148+
sample_weight = 1 + rng.rand(n_samples)
149+
150+
for (alpha, intercept, solver) in param_grid:
151+
print(solver)
152+
153+
# Ridge with explicit sample_weight
154+
est = Ridge(alpha=alpha, fit_intercept=intercept, solver=solver)
155+
est.fit(X, y, sample_weight=sample_weight)
156+
coefs = est.coef_
157+
inter = est.intercept_
158+
159+
# Closed form of the weighted regularized least square
160+
# theta = (X^T W X + alpha I)^(-1) * X^T W y
161+
W = np.diag(sample_weight)
162+
if intercept is False:
163+
X_aug = X.copy()
164+
I = np.eye(n_features)
165+
else:
166+
X_aug = np.column_stack((np.ones(n_samples), X))
167+
I = np.eye(n_features + 1)
168+
I[0, 0] = 0
169+
170+
cf_coefs = linalg.inv(X_aug.T.dot(W).dot(X_aug) +
171+
alpha * I).dot(X_aug.T).dot(W).dot(y)
172+
173+
if intercept is False:
174+
assert_array_almost_equal(coefs, cf_coefs)
175+
else:
176+
assert_array_almost_equal(coefs, cf_coefs[1:])
177+
assert_almost_equal(inter, cf_coefs[0])
162178

163179

164180
def test_ridge_shapes():

0 commit comments

Comments
 (0)