5
5
6
6
import numpy as np
7
7
from scipy import sparse
8
+ from scipy import linalg
8
9
9
10
from sklearn .utils .testing import assert_array_almost_equal
11
+ from sklearn .utils .testing import assert_almost_equal
10
12
from sklearn .utils .testing import assert_equal
11
13
12
14
from sklearn .linear_model .base import LinearRegression
13
- from sklearn .linear_model .base import center_data , sparse_center_data , _rescale_data
15
+ from sklearn .linear_model .base import center_data
16
+ from sklearn .linear_model .base import sparse_center_data
17
+ from sklearn .linear_model .base import _rescale_data
14
18
from sklearn .utils import check_random_state
15
19
from sklearn .utils .testing import assert_greater
16
20
from sklearn .datasets .samples_generator import make_sparse_uncorrelated
@@ -23,48 +27,64 @@ def test_linear_regression():
23
27
X = [[1 ], [2 ]]
24
28
Y = [1 , 2 ]
25
29
26
- clf = LinearRegression ()
27
- clf .fit (X , Y )
30
+ reg = LinearRegression ()
31
+ reg .fit (X , Y )
28
32
29
- assert_array_almost_equal (clf .coef_ , [1 ])
30
- assert_array_almost_equal (clf .intercept_ , [0 ])
31
- assert_array_almost_equal (clf .predict (X ), [1 , 2 ])
33
+ assert_array_almost_equal (reg .coef_ , [1 ])
34
+ assert_array_almost_equal (reg .intercept_ , [0 ])
35
+ assert_array_almost_equal (reg .predict (X ), [1 , 2 ])
32
36
33
37
# test it also for degenerate input
34
38
X = [[1 ]]
35
39
Y = [0 ]
36
40
37
- clf = LinearRegression ()
38
- clf .fit (X , Y )
39
- assert_array_almost_equal (clf .coef_ , [0 ])
40
- assert_array_almost_equal (clf .intercept_ , [0 ])
41
- assert_array_almost_equal (clf .predict (X ), [0 ])
41
+ reg = LinearRegression ()
42
+ reg .fit (X , Y )
43
+ assert_array_almost_equal (reg .coef_ , [0 ])
44
+ assert_array_almost_equal (reg .intercept_ , [0 ])
45
+ assert_array_almost_equal (reg .predict (X ), [0 ])
42
46
43
47
44
48
def test_linear_regression_sample_weights ():
49
+ # TODO: loop over sparse data as well
50
+
45
51
rng = np .random .RandomState (0 )
46
52
47
- for n_samples , n_features in ((6 , 5 ), (5 , 10 )):
53
+ # It would not work with under-determined systems
54
+ for n_samples , n_features in ((6 , 5 ), ):
55
+
48
56
y = rng .randn (n_samples )
49
57
X = rng .randn (n_samples , n_features )
50
58
sample_weight = 1.0 + rng .rand (n_samples )
51
59
52
- clf = LinearRegression ()
53
- clf .fit (X , y , sample_weight )
54
- coefs1 = clf .coef_
60
+ for intercept in (True , False ):
61
+
62
+ # LinearRegression with explicit sample_weight
63
+ reg = LinearRegression (fit_intercept = intercept )
64
+ reg .fit (X , y , sample_weight = sample_weight )
65
+ coefs1 = reg .coef_
66
+ inter1 = reg .intercept_
67
+
68
+ assert_equal (reg .coef_ .shape , (X .shape [1 ], )) # sanity checks
69
+ assert_greater (reg .score (X , y ), 0.5 )
55
70
56
- assert_equal (clf .coef_ .shape , (X .shape [1 ], ))
57
- assert_greater (clf .score (X , y ), 0.9 )
58
- assert_array_almost_equal (clf .predict (X ), y )
71
+ # Closed form of the weighted least square
72
+ # theta = (X^T W X)^(-1) * X^T W y
73
+ W = np .diag (sample_weight )
74
+ if intercept is False :
75
+ X_aug = X
76
+ else :
77
+ dummy_column = np .ones (shape = (n_samples , 1 ))
78
+ X_aug = np .concatenate ((dummy_column , X ), axis = 1 )
59
79
60
- # Sample weight can be implemented via a simple rescaling
61
- # for the square loss.
62
- scaled_y = y * np .sqrt (sample_weight )
63
- scaled_X = X * np .sqrt (sample_weight )[:, np .newaxis ]
64
- clf .fit (X , y )
65
- coefs2 = clf .coef_
80
+ coefs2 = linalg .solve (X_aug .T .dot (W ).dot (X_aug ),
81
+ X_aug .T .dot (W ).dot (y ))
66
82
67
- assert_array_almost_equal (coefs1 , coefs2 )
83
+ if intercept is False :
84
+ assert_array_almost_equal (coefs1 , coefs2 )
85
+ else :
86
+ assert_array_almost_equal (coefs1 , coefs2 [1 :])
87
+ assert_almost_equal (inter1 , coefs2 [0 ])
68
88
69
89
70
90
def test_raises_value_error_if_sample_weights_greater_than_1d ():
@@ -82,12 +102,12 @@ def test_raises_value_error_if_sample_weights_greater_than_1d():
82
102
sample_weights_OK_1 = 1.
83
103
sample_weights_OK_2 = 2.
84
104
85
- clf = LinearRegression ()
105
+ reg = LinearRegression ()
86
106
87
107
# make sure the "OK" sample weights actually work
88
- clf .fit (X , y , sample_weights_OK )
89
- clf .fit (X , y , sample_weights_OK_1 )
90
- clf .fit (X , y , sample_weights_OK_2 )
108
+ reg .fit (X , y , sample_weights_OK )
109
+ reg .fit (X , y , sample_weights_OK_1 )
110
+ reg .fit (X , y , sample_weights_OK_2 )
91
111
92
112
93
113
def test_fit_intercept ():
@@ -135,12 +155,12 @@ def test_linear_regression_multiple_outcome(random_state=0):
135
155
Y = np .vstack ((y , y )).T
136
156
n_features = X .shape [1 ]
137
157
138
- clf = LinearRegression (fit_intercept = True )
139
- clf .fit ((X ), Y )
140
- assert_equal (clf .coef_ .shape , (2 , n_features ))
141
- Y_pred = clf .predict (X )
142
- clf .fit (X , y )
143
- y_pred = clf .predict (X )
158
+ reg = LinearRegression (fit_intercept = True )
159
+ reg .fit ((X ), Y )
160
+ assert_equal (reg .coef_ .shape , (2 , n_features ))
161
+ Y_pred = reg .predict (X )
162
+ reg .fit (X , y )
163
+ y_pred = reg .predict (X )
144
164
assert_array_almost_equal (np .vstack ((y_pred , y_pred )).T , Y_pred , decimal = 3 )
145
165
146
166
0 commit comments