5
5
6
6
import numpy as np
7
7
from scipy import sparse
8
+ from scipy import linalg
8
9
9
10
from sklearn .utils .testing import assert_array_almost_equal
11
+ from sklearn .utils .testing import assert_almost_equal
10
12
from sklearn .utils .testing import assert_equal
11
13
12
14
from sklearn .linear_model .base import LinearRegression
13
- from sklearn .linear_model .base import center_data , sparse_center_data , _rescale_data
15
+ from sklearn .linear_model .base import center_data
16
+ from sklearn .linear_model .base import sparse_center_data
17
+ from sklearn .linear_model .base import _rescale_data
14
18
from sklearn .utils import check_random_state
15
19
from sklearn .utils .testing import assert_greater
16
20
from sklearn .datasets .samples_generator import make_sparse_uncorrelated
@@ -23,48 +27,63 @@ def test_linear_regression():
23
27
X = [[1 ], [2 ]]
24
28
Y = [1 , 2 ]
25
29
26
- clf = LinearRegression ()
27
- clf .fit (X , Y )
30
+ reg = LinearRegression ()
31
+ reg .fit (X , Y )
28
32
29
- assert_array_almost_equal (clf .coef_ , [1 ])
30
- assert_array_almost_equal (clf .intercept_ , [0 ])
31
- assert_array_almost_equal (clf .predict (X ), [1 , 2 ])
33
+ assert_array_almost_equal (reg .coef_ , [1 ])
34
+ assert_array_almost_equal (reg .intercept_ , [0 ])
35
+ assert_array_almost_equal (reg .predict (X ), [1 , 2 ])
32
36
33
37
# test it also for degenerate input
34
38
X = [[1 ]]
35
39
Y = [0 ]
36
40
37
- clf = LinearRegression ()
38
- clf .fit (X , Y )
39
- assert_array_almost_equal (clf .coef_ , [0 ])
40
- assert_array_almost_equal (clf .intercept_ , [0 ])
41
- assert_array_almost_equal (clf .predict (X ), [0 ])
41
+ reg = LinearRegression ()
42
+ reg .fit (X , Y )
43
+ assert_array_almost_equal (reg .coef_ , [0 ])
44
+ assert_array_almost_equal (reg .intercept_ , [0 ])
45
+ assert_array_almost_equal (reg .predict (X ), [0 ])
42
46
43
47
44
48
def test_linear_regression_sample_weights ():
45
49
rng = np .random .RandomState (0 )
46
50
47
- for n_samples , n_features in ((6 , 5 ), (5 , 10 )):
51
+ # It would not work with under-determined systems
52
+ for n_samples , n_features in ((6 , 5 ), ):
53
+
48
54
y = rng .randn (n_samples )
49
55
X = rng .randn (n_samples , n_features )
50
56
sample_weight = 1.0 + rng .rand (n_samples )
51
57
52
- clf = LinearRegression ()
53
- clf .fit (X , y , sample_weight )
54
- coefs1 = clf .coef_
58
+ # TODO: loop over sparse data as well
59
+ for intercept in (True , False ):
60
+
61
+ # LinearRegression with explicit sample_weight
62
+ reg = LinearRegression (fit_intercept = intercept )
63
+ reg .fit (X , y , sample_weight = sample_weight )
64
+ coefs1 = reg .coef_
65
+ inter1 = reg .intercept_
66
+
67
+ assert_equal (reg .coef_ .shape , (X .shape [1 ], )) # sanity checks
68
+ assert_greater (reg .score (X , y ), 0.5 )
55
69
56
- assert_equal (clf .coef_ .shape , (X .shape [1 ], ))
57
- assert_greater (clf .score (X , y ), 0.9 )
58
- assert_array_almost_equal (clf .predict (X ), y )
70
+ # Closed form of the weighted least square
71
+ # theta = (X^T W X)^(-1) * X^T W y
72
+ W = np .diag (sample_weight )
73
+ if intercept is False :
74
+ X_aug = X .copy ()
75
+ else :
76
+ dummy_column = np .ones (shape = (n_samples , 1 ))
77
+ X_aug = np .concatenate ((dummy_column , X ), axis = 1 )
59
78
60
- # Sample weight can be implemented via a simple rescaling
61
- # for the square loss.
62
- scaled_y = y * np .sqrt (sample_weight )
63
- scaled_X = X * np .sqrt (sample_weight )[:, np .newaxis ]
64
- clf .fit (X , y )
65
- coefs2 = clf .coef_
79
+ coefs3 = linalg .pinv (X_aug .T .dot (W ).dot (X_aug )
80
+ ).dot (X_aug .T ).dot (W ).dot (y )
66
81
67
- assert_array_almost_equal (coefs1 , coefs2 )
82
+ if intercept is False :
83
+ assert_array_almost_equal (coefs1 , coefs3 )
84
+ else :
85
+ assert_array_almost_equal (coefs1 , coefs3 [1 :])
86
+ assert_almost_equal (inter1 , coefs3 [0 ])
68
87
69
88
70
89
def test_raises_value_error_if_sample_weights_greater_than_1d ():
@@ -82,12 +101,12 @@ def test_raises_value_error_if_sample_weights_greater_than_1d():
82
101
sample_weights_OK_1 = 1.
83
102
sample_weights_OK_2 = 2.
84
103
85
- clf = LinearRegression ()
104
+ reg = LinearRegression ()
86
105
87
106
# make sure the "OK" sample weights actually work
88
- clf .fit (X , y , sample_weights_OK )
89
- clf .fit (X , y , sample_weights_OK_1 )
90
- clf .fit (X , y , sample_weights_OK_2 )
107
+ reg .fit (X , y , sample_weights_OK )
108
+ reg .fit (X , y , sample_weights_OK_1 )
109
+ reg .fit (X , y , sample_weights_OK_2 )
91
110
92
111
93
112
def test_fit_intercept ():
@@ -135,12 +154,12 @@ def test_linear_regression_multiple_outcome(random_state=0):
135
154
Y = np .vstack ((y , y )).T
136
155
n_features = X .shape [1 ]
137
156
138
- clf = LinearRegression (fit_intercept = True )
139
- clf .fit ((X ), Y )
140
- assert_equal (clf .coef_ .shape , (2 , n_features ))
141
- Y_pred = clf .predict (X )
142
- clf .fit (X , y )
143
- y_pred = clf .predict (X )
157
+ reg = LinearRegression (fit_intercept = True )
158
+ reg .fit ((X ), Y )
159
+ assert_equal (reg .coef_ .shape , (2 , n_features ))
160
+ Y_pred = reg .predict (X )
161
+ reg .fit (X , y )
162
+ y_pred = reg .predict (X )
144
163
assert_array_almost_equal (np .vstack ((y_pred , y_pred )).T , Y_pred , decimal = 3 )
145
164
146
165
0 commit comments