Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit fa249c3

Browse files
committed
TST add / move test to test_linear_loss.py
1 parent 8dfe165 commit fa249c3

File tree

2 files changed

+310
-194
lines changed

2 files changed

+310
-194
lines changed
Lines changed: 302 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,302 @@
1+
"""
2+
Tests for LinearModelLoss
3+
4+
Note that correctness of losses is already well covered in the _loss module.
5+
"""
6+
import pytest
7+
import numpy as np
8+
from numpy.testing import assert_allclose
9+
from scipy import linalg, optimize, sparse
10+
11+
from sklearn._loss.loss import (
12+
HalfBinomialLoss,
13+
HalfMultinomialLoss,
14+
HalfPoissonLoss,
15+
)
16+
from sklearn.datasets import make_low_rank_matrix
17+
from sklearn.linear_model._linear_loss import LinearModelLoss
18+
from sklearn.utils.extmath import squared_norm
19+
20+
21+
# We don not need to test all losses, just what LinearModelLoss does on top of the
22+
# base losses.
23+
LOSSES = [HalfBinomialLoss, HalfMultinomialLoss, HalfPoissonLoss]
24+
25+
26+
def random_X_y_coef(
27+
linear_model_loss, n_samples, n_features, coef_bound=(-2, 2), seed=42
28+
):
29+
"""Random generate y, X and coef in valid range."""
30+
rng = np.random.RandomState(seed)
31+
n_dof = n_features + linear_model_loss.fit_intercept
32+
X = make_low_rank_matrix(
33+
n_samples=n_samples,
34+
n_features=n_features,
35+
random_state=rng,
36+
)
37+
38+
if linear_model_loss._loss.is_multiclass:
39+
n_classes = linear_model_loss._loss.n_classes
40+
coef = np.empty((n_classes, n_dof))
41+
coef.flat[:] = rng.uniform(
42+
low=coef_bound[0],
43+
high=coef_bound[1],
44+
size=n_classes * n_dof,
45+
)
46+
if linear_model_loss.fit_intercept:
47+
raw_prediction = X @ coef[:, :-1].T + coef[:, -1]
48+
else:
49+
raw_prediction = X @ coef.T
50+
proba = linear_model_loss._loss.link.inverse(raw_prediction)
51+
52+
# y = rng.choice(np.arange(n_classes), p=proba) does not work, see
53+
# See https://stackoverflow.com/a/34190035/16761084
54+
def choice_vectorized(items, p):
55+
s = p.cumsum(axis=1)
56+
r = np.random.rand(p.shape[0])[:, None]
57+
k = (s < r).sum(axis=1)
58+
return items[k]
59+
60+
y = choice_vectorized(np.arange(n_classes), p=proba).astype(np.float64)
61+
else:
62+
coef = np.empty((n_dof,))
63+
coef.flat[:] = rng.uniform(
64+
low=coef_bound[0],
65+
high=coef_bound[1],
66+
size=n_dof,
67+
)
68+
if linear_model_loss.fit_intercept:
69+
raw_prediction = X @ coef[:-1] + coef[-1]
70+
else:
71+
raw_prediction = X @ coef
72+
y = linear_model_loss._loss.link.inverse(
73+
raw_prediction + rng.uniform(low=-1, high=1, size=n_samples)
74+
)
75+
76+
return X, y, coef
77+
78+
79+
@pytest.mark.parametrize("base_loss", LOSSES)
80+
@pytest.mark.parametrize("fit_intercept", [False, True])
81+
@pytest.mark.parametrize("sample_weight", [None, "range"])
82+
@pytest.mark.parametrize("l2_reg_strength", [0, 1])
83+
def test_loss_gradients_are_the_same(
84+
base_loss, fit_intercept, sample_weight, l2_reg_strength
85+
):
86+
"""Test that loss and gradient are the same across different functions."""
87+
loss = LinearModelLoss(loss=base_loss(), fit_intercept=fit_intercept)
88+
X, y, coef = random_X_y_coef(
89+
linear_model_loss=loss, n_samples=10, n_features=5, seed=42
90+
)
91+
92+
if sample_weight == "range":
93+
sample_weight = np.linspace(1, y.shape[0], num=y.shape[0])
94+
95+
l1 = loss.loss(
96+
coef, X, y, sample_weight=sample_weight, l2_reg_strength=l2_reg_strength
97+
)
98+
g1 = loss.gradient(
99+
coef, X, y, sample_weight=sample_weight, l2_reg_strength=l2_reg_strength
100+
)
101+
l2, g2 = loss.loss_gradient(
102+
coef, X, y, sample_weight=sample_weight, l2_reg_strength=l2_reg_strength
103+
)
104+
g3, h3 = loss.gradient_hessp(
105+
coef, X, y, sample_weight=sample_weight, l2_reg_strength=l2_reg_strength
106+
)
107+
108+
assert_allclose(l1, l2)
109+
assert_allclose(g1, g2)
110+
assert_allclose(g1, g3)
111+
112+
# same for sparse X
113+
X = sparse.csr_matrix(X)
114+
l1_sp = loss.loss(
115+
coef, X, y, sample_weight=sample_weight, l2_reg_strength=l2_reg_strength
116+
)
117+
g1_sp = loss.gradient(
118+
coef, X, y, sample_weight=sample_weight, l2_reg_strength=l2_reg_strength
119+
)
120+
l2_sp, g2_sp = loss.loss_gradient(
121+
coef, X, y, sample_weight=sample_weight, l2_reg_strength=l2_reg_strength
122+
)
123+
g3_sp, h3_sp = loss.gradient_hessp(
124+
coef, X, y, sample_weight=sample_weight, l2_reg_strength=l2_reg_strength
125+
)
126+
127+
assert_allclose(l1, l1_sp)
128+
assert_allclose(l1, l2_sp)
129+
assert_allclose(g1, g1_sp)
130+
assert_allclose(g1, g2_sp)
131+
assert_allclose(g1, g3_sp)
132+
133+
134+
@pytest.mark.parametrize("base_loss", LOSSES)
135+
@pytest.mark.parametrize("sample_weight", [None, "range"])
136+
@pytest.mark.parametrize("l2_reg_strength", [0, 1])
137+
@pytest.mark.parametrize("X_sparse", [False, True])
138+
def test_loss_gradients_hessp_intercept(
139+
base_loss, sample_weight, l2_reg_strength, X_sparse
140+
):
141+
"""Test that loss and gradient handle intercept correctly."""
142+
loss = LinearModelLoss(loss=base_loss(), fit_intercept=False)
143+
loss_inter = LinearModelLoss(loss=base_loss(), fit_intercept=True)
144+
n_samples, n_features = 10, 5
145+
X, y, coef = random_X_y_coef(
146+
linear_model_loss=loss, n_samples=n_samples, n_features=n_features, seed=42
147+
)
148+
149+
X[:, -1] = 1 # make last column of 1 to mimic intercept term
150+
X_inter = X[
151+
:, :-1
152+
] # exclude intercept column as it is added automatically by loss_inter
153+
154+
if X_sparse:
155+
X = sparse.csr_matrix(X)
156+
157+
if sample_weight == "range":
158+
sample_weight = np.linspace(1, y.shape[0], num=y.shape[0])
159+
160+
l, g = loss.loss_gradient(
161+
coef, X, y, sample_weight=sample_weight, l2_reg_strength=l2_reg_strength
162+
)
163+
_, hessp = loss.gradient_hessp(
164+
coef, X, y, sample_weight=sample_weight, l2_reg_strength=l2_reg_strength
165+
)
166+
l_inter, g_inter = loss_inter.loss_gradient(
167+
coef, X_inter, y, sample_weight=sample_weight, l2_reg_strength=l2_reg_strength
168+
)
169+
_, hessp_inter = loss_inter.gradient_hessp(
170+
coef, X_inter, y, sample_weight=sample_weight, l2_reg_strength=l2_reg_strength
171+
)
172+
173+
# Note, that intercept gets no L2 penalty.
174+
assert l == pytest.approx(
175+
l_inter + 0.5 * l2_reg_strength * squared_norm(coef.T[-1])
176+
)
177+
178+
g_inter_corrected = g_inter
179+
g_inter_corrected.T[-1] += l2_reg_strength * coef.T[-1]
180+
assert_allclose(g, g_inter_corrected)
181+
182+
s = np.random.RandomState(42).randn(*coef.shape)
183+
h = hessp(s)
184+
h_inter = hessp_inter(s)
185+
h_inter_corrected = h_inter
186+
h_inter_corrected.T[-1] += l2_reg_strength * s.T[-1]
187+
assert_allclose(h, h_inter_corrected)
188+
189+
190+
@pytest.mark.parametrize("base_loss", LOSSES)
191+
@pytest.mark.parametrize("fit_intercept", [False, True])
192+
@pytest.mark.parametrize("sample_weight", [None, "range"])
193+
@pytest.mark.parametrize("l2_reg_strength", [0, 1])
194+
def test_gradients_hessians_numerically(
195+
base_loss, fit_intercept, sample_weight, l2_reg_strength
196+
):
197+
"""Test gradients and hessians with numerical derivatives.
198+
199+
Gradient should equal the numerical derivatives of the loss function.
200+
Hessians should equal the numerical derivatives of gradients.
201+
"""
202+
loss = LinearModelLoss(loss=base_loss(), fit_intercept=fit_intercept)
203+
n_samples, n_features = 10, 5
204+
X, y, coef = random_X_y_coef(
205+
linear_model_loss=loss, n_samples=n_samples, n_features=n_features, seed=42
206+
)
207+
coef = coef.ravel(order="F") # this is important only for multinomial loss
208+
209+
if sample_weight == "range":
210+
sample_weight = np.linspace(1, y.shape[0], num=y.shape[0])
211+
212+
# 1. Check gradients numerically
213+
eps = 1e-6
214+
g, hessp = loss.gradient_hessp(
215+
coef, X, y, sample_weight=sample_weight, l2_reg_strength=l2_reg_strength
216+
)
217+
# Use a trick to get central finte difference of accuracy 4 (five-point stencil)
218+
# https://en.wikipedia.org/wiki/Numerical_differentiation
219+
# https://en.wikipedia.org/wiki/Finite_difference_coefficient
220+
approx_g1 = optimize.approx_fprime(
221+
coef,
222+
lambda coef: loss.loss(
223+
coef - eps,
224+
X,
225+
y,
226+
sample_weight=sample_weight,
227+
l2_reg_strength=l2_reg_strength,
228+
),
229+
2 * eps,
230+
) # (f(x + eps) - f(x - eps)) / (2*eps)
231+
approx_g2 = optimize.approx_fprime(
232+
coef,
233+
lambda coef: loss.loss(
234+
coef - 2 * eps,
235+
X,
236+
y,
237+
sample_weight=sample_weight,
238+
l2_reg_strength=l2_reg_strength,
239+
),
240+
4 * eps,
241+
) # (f(x + 2*eps) - f(x - 2*eps)) / (4*eps)
242+
approx_g = 4 / 3 * approx_g1 - 1 / 3 * approx_g2
243+
assert_allclose(g, approx_g, rtol=1e-2, atol=1e-8)
244+
245+
# 2. Check hessp numerically along the second direction of the gradient
246+
vector = np.zeros_like(g)
247+
vector[1] = 1
248+
hess_col = hessp(vector)
249+
# Computation of the Hessian is particularly fragile to numerical errors when doing
250+
# simple finite differences. Here we compute the grad along a path in the direction
251+
# of the vector and then use a least-square regression to estimate the slope
252+
eps = 1e-3
253+
d_x = np.linspace(-eps, eps, 30)
254+
d_grad = np.array(
255+
[
256+
loss.gradient(
257+
coef + t * vector,
258+
X,
259+
y,
260+
sample_weight=sample_weight,
261+
l2_reg_strength=l2_reg_strength,
262+
)
263+
for t in d_x
264+
]
265+
)
266+
d_grad -= d_grad.mean(axis=0)
267+
approx_hess_col = linalg.lstsq(d_x[:, np.newaxis], d_grad)[0].ravel()
268+
assert_allclose(approx_hess_col, hess_col, rtol=1e-3)
269+
270+
271+
@pytest.mark.parametrize("fit_intercept", [False, True])
272+
def test_multinomial_coef_shape(fit_intercept):
273+
"""Test that multinomial LinearModelLoss respects shape of coef."""
274+
loss = LinearModelLoss(loss=HalfMultinomialLoss(), fit_intercept=fit_intercept)
275+
n_samples, n_features = 10, 5
276+
X, y, coef = random_X_y_coef(
277+
linear_model_loss=loss, n_samples=n_samples, n_features=n_features, seed=42
278+
)
279+
s = np.random.RandomState(42).randn(*coef.shape)
280+
281+
l, g = loss.loss_gradient(coef, X, y)
282+
g1 = loss.gradient(coef, X, y)
283+
g2, hessp = loss.gradient_hessp(coef, X, y)
284+
h = hessp(s)
285+
assert g.shape == coef.shape
286+
assert h.shape == coef.shape
287+
assert_allclose(g, g1)
288+
assert_allclose(g, g2)
289+
290+
coef_r = coef.ravel(order="F")
291+
s_r = s.ravel(order="F")
292+
l_r, g_r = loss.loss_gradient(coef_r, X, y)
293+
g1_r = loss.gradient(coef_r, X, y)
294+
g2_r, hessp_r = loss.gradient_hessp(coef_r, X, y)
295+
h_r = hessp_r(s_r)
296+
assert g_r.shape == coef_r.shape
297+
assert h_r.shape == coef_r.shape
298+
assert_allclose(g_r, g1_r)
299+
assert_allclose(g_r, g2_r)
300+
301+
assert_allclose(g, g_r.reshape(loss._loss.n_classes, -1, order="F"))
302+
assert_allclose(h, h_r.reshape(loss._loss.n_classes, -1, order="F"))

0 commit comments

Comments
 (0)