Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit b92aaeb

Browse files
jjerphanglemaitrejeremiedbb
authored andcommitted
TST use global_dtype in sklearn/manifold/tests/test_isomap.py (#22673)
Co-authored-by: Guillaume Lemaitre <[email protected]> Co-authored-by: Jérémie du Boisberranger <[email protected]>
1 parent de14bd4 commit b92aaeb

File tree

1 file changed

+117
-93
lines changed

1 file changed

+117
-93
lines changed

sklearn/manifold/tests/test_isomap.py

Lines changed: 117 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -1,79 +1,88 @@
11
from itertools import product
22
import numpy as np
33
import math
4-
from numpy.testing import (
5-
assert_almost_equal,
6-
assert_array_almost_equal,
7-
assert_array_equal,
8-
)
94
import pytest
105

11-
from sklearn import datasets
6+
from sklearn import datasets, clone
127
from sklearn import manifold
138
from sklearn import neighbors
149
from sklearn import pipeline
1510
from sklearn import preprocessing
1611
from sklearn.datasets import make_blobs
1712
from sklearn.metrics.pairwise import pairwise_distances
18-
from sklearn.utils._testing import assert_allclose, assert_allclose_dense_sparse
19-
13+
from sklearn.utils._testing import (
14+
assert_allclose,
15+
assert_allclose_dense_sparse,
16+
assert_array_equal,
17+
)
2018
from scipy.sparse import rand as sparse_rand
2119

2220
eigen_solvers = ["auto", "dense", "arpack"]
2321
path_methods = ["auto", "FW", "D"]
2422

2523

26-
def create_sample_data(n_pts=25, add_noise=False):
24+
def create_sample_data(dtype, n_pts=25, add_noise=False):
2725
# grid of equidistant points in 2D, n_components = n_dim
2826
n_per_side = int(math.sqrt(n_pts))
29-
X = np.array(list(product(range(n_per_side), repeat=2)))
27+
X = np.array(list(product(range(n_per_side), repeat=2))).astype(dtype, copy=False)
3028
if add_noise:
3129
# add noise in a third dimension
3230
rng = np.random.RandomState(0)
33-
noise = 0.1 * rng.randn(n_pts, 1)
31+
noise = 0.1 * rng.randn(n_pts, 1).astype(dtype, copy=False)
3432
X = np.concatenate((X, noise), 1)
3533
return X
3634

3735

3836
@pytest.mark.parametrize("n_neighbors, radius", [(24, None), (None, np.inf)])
39-
def test_isomap_simple_grid(n_neighbors, radius):
37+
@pytest.mark.parametrize("eigen_solver", eigen_solvers)
38+
@pytest.mark.parametrize("path_method", path_methods)
39+
def test_isomap_simple_grid(
40+
global_dtype, n_neighbors, radius, eigen_solver, path_method
41+
):
4042
# Isomap should preserve distances when all neighbors are used
4143
n_pts = 25
42-
X = create_sample_data(n_pts=n_pts, add_noise=False)
44+
X = create_sample_data(global_dtype, n_pts=n_pts, add_noise=False)
4345

4446
# distances from each point to all others
4547
if n_neighbors is not None:
4648
G = neighbors.kneighbors_graph(X, n_neighbors, mode="distance")
4749
else:
4850
G = neighbors.radius_neighbors_graph(X, radius, mode="distance")
4951

50-
for eigen_solver in eigen_solvers:
51-
for path_method in path_methods:
52-
clf = manifold.Isomap(
53-
n_neighbors=n_neighbors,
54-
radius=radius,
55-
n_components=2,
56-
eigen_solver=eigen_solver,
57-
path_method=path_method,
58-
)
59-
clf.fit(X)
60-
61-
if n_neighbors is not None:
62-
G_iso = neighbors.kneighbors_graph(
63-
clf.embedding_, n_neighbors, mode="distance"
64-
)
65-
else:
66-
G_iso = neighbors.radius_neighbors_graph(
67-
clf.embedding_, radius, mode="distance"
68-
)
69-
assert_allclose_dense_sparse(G, G_iso)
52+
clf = manifold.Isomap(
53+
n_neighbors=n_neighbors,
54+
radius=radius,
55+
n_components=2,
56+
eigen_solver=eigen_solver,
57+
path_method=path_method,
58+
)
59+
clf.fit(X)
60+
61+
if n_neighbors is not None:
62+
G_iso = neighbors.kneighbors_graph(clf.embedding_, n_neighbors, mode="distance")
63+
else:
64+
G_iso = neighbors.radius_neighbors_graph(
65+
clf.embedding_, radius, mode="distance"
66+
)
67+
atol = 1e-5 if global_dtype == np.float32 else 0
68+
assert_allclose_dense_sparse(G, G_iso, atol=atol)
7069

7170

7271
@pytest.mark.parametrize("n_neighbors, radius", [(24, None), (None, np.inf)])
73-
def test_isomap_reconstruction_error(n_neighbors, radius):
72+
@pytest.mark.parametrize("eigen_solver", eigen_solvers)
73+
@pytest.mark.parametrize("path_method", path_methods)
74+
def test_isomap_reconstruction_error(
75+
global_dtype, n_neighbors, radius, eigen_solver, path_method
76+
):
77+
if global_dtype is np.float32:
78+
pytest.skip(
79+
"Skipping test due to numerical instabilities on float32 data"
80+
"from KernelCenterer used in the reconstruction_error method"
81+
)
82+
7483
# Same setup as in test_isomap_simple_grid, with an added dimension
7584
n_pts = 25
76-
X = create_sample_data(n_pts=n_pts, add_noise=True)
85+
X = create_sample_data(global_dtype, n_pts=n_pts, add_noise=True)
7786

7887
# compute input kernel
7988
if n_neighbors is not None:
@@ -83,43 +92,42 @@ def test_isomap_reconstruction_error(n_neighbors, radius):
8392
centerer = preprocessing.KernelCenterer()
8493
K = centerer.fit_transform(-0.5 * G**2)
8594

86-
for eigen_solver in eigen_solvers:
87-
for path_method in path_methods:
88-
clf = manifold.Isomap(
89-
n_neighbors=n_neighbors,
90-
radius=radius,
91-
n_components=2,
92-
eigen_solver=eigen_solver,
93-
path_method=path_method,
94-
)
95-
clf.fit(X)
96-
97-
# compute output kernel
98-
if n_neighbors is not None:
99-
G_iso = neighbors.kneighbors_graph(
100-
clf.embedding_, n_neighbors, mode="distance"
101-
)
102-
else:
103-
G_iso = neighbors.radius_neighbors_graph(
104-
clf.embedding_, radius, mode="distance"
105-
)
106-
G_iso = G_iso.toarray()
107-
K_iso = centerer.fit_transform(-0.5 * G_iso**2)
108-
109-
# make sure error agrees
110-
reconstruction_error = np.linalg.norm(K - K_iso) / n_pts
111-
assert_almost_equal(reconstruction_error, clf.reconstruction_error())
95+
clf = manifold.Isomap(
96+
n_neighbors=n_neighbors,
97+
radius=radius,
98+
n_components=2,
99+
eigen_solver=eigen_solver,
100+
path_method=path_method,
101+
)
102+
clf.fit(X)
103+
104+
# compute output kernel
105+
if n_neighbors is not None:
106+
G_iso = neighbors.kneighbors_graph(clf.embedding_, n_neighbors, mode="distance")
107+
else:
108+
G_iso = neighbors.radius_neighbors_graph(
109+
clf.embedding_, radius, mode="distance"
110+
)
111+
G_iso = G_iso.toarray()
112+
K_iso = centerer.fit_transform(-0.5 * G_iso**2)
113+
114+
# make sure error agrees
115+
reconstruction_error = np.linalg.norm(K - K_iso) / n_pts
116+
atol = 1e-5 if global_dtype == np.float32 else 0
117+
assert_allclose(reconstruction_error, clf.reconstruction_error(), atol=atol)
112118

113119

114120
@pytest.mark.parametrize("n_neighbors, radius", [(2, None), (None, 0.5)])
115-
def test_transform(n_neighbors, radius):
121+
def test_transform(global_dtype, n_neighbors, radius):
116122
n_samples = 200
117123
n_components = 10
118124
noise_scale = 0.01
119125

120126
# Create S-curve dataset
121127
X, y = datasets.make_s_curve(n_samples, random_state=0)
122128

129+
X = X.astype(global_dtype, copy=False)
130+
123131
# Compute isomap embedding
124132
iso = manifold.Isomap(
125133
n_components=n_components, n_neighbors=n_neighbors, radius=radius
@@ -136,11 +144,12 @@ def test_transform(n_neighbors, radius):
136144

137145

138146
@pytest.mark.parametrize("n_neighbors, radius", [(2, None), (None, 10.0)])
139-
def test_pipeline(n_neighbors, radius):
147+
def test_pipeline(n_neighbors, radius, global_dtype):
140148
# check that Isomap works fine as a transformer in a Pipeline
141149
# only checks that no error is raised.
142150
# TODO check that it actually does something useful
143151
X, y = datasets.make_blobs(random_state=0)
152+
X = X.astype(global_dtype, copy=False)
144153
clf = pipeline.Pipeline(
145154
[
146155
("isomap", manifold.Isomap(n_neighbors=n_neighbors, radius=radius)),
@@ -151,7 +160,7 @@ def test_pipeline(n_neighbors, radius):
151160
assert 0.9 < clf.score(X, y)
152161

153162

154-
def test_pipeline_with_nearest_neighbors_transformer():
163+
def test_pipeline_with_nearest_neighbors_transformer(global_dtype):
155164
# Test chaining NearestNeighborsTransformer and Isomap with
156165
# neighbors_algorithm='precomputed'
157166
algorithm = "auto"
@@ -160,6 +169,9 @@ def test_pipeline_with_nearest_neighbors_transformer():
160169
X, _ = datasets.make_blobs(random_state=0)
161170
X2, _ = datasets.make_blobs(random_state=1)
162171

172+
X = X.astype(global_dtype, copy=False)
173+
X2 = X2.astype(global_dtype, copy=False)
174+
163175
# compare the chained version and the compact version
164176
est_chain = pipeline.make_pipeline(
165177
neighbors.KNeighborsTransformer(
@@ -173,38 +185,37 @@ def test_pipeline_with_nearest_neighbors_transformer():
173185

174186
Xt_chain = est_chain.fit_transform(X)
175187
Xt_compact = est_compact.fit_transform(X)
176-
assert_array_almost_equal(Xt_chain, Xt_compact)
188+
assert_allclose(Xt_chain, Xt_compact)
177189

178190
Xt_chain = est_chain.transform(X2)
179191
Xt_compact = est_compact.transform(X2)
180-
assert_array_almost_equal(Xt_chain, Xt_compact)
192+
assert_allclose(Xt_chain, Xt_compact)
181193

182194

183-
def test_different_metric():
184-
# Test that the metric parameters work correctly, and default to euclidean
185-
def custom_metric(x1, x2):
186-
return np.sqrt(np.sum(x1**2 + x2**2))
187-
188-
# metric, p, is_euclidean
189-
metrics = [
195+
@pytest.mark.parametrize(
196+
"metric, p, is_euclidean",
197+
[
190198
("euclidean", 2, True),
191199
("manhattan", 1, False),
192200
("minkowski", 1, False),
193201
("minkowski", 2, True),
194-
(custom_metric, 2, False),
195-
]
196-
202+
(lambda x1, x2: np.sqrt(np.sum(x1**2 + x2**2)), 2, False),
203+
],
204+
)
205+
def test_different_metric(global_dtype, metric, p, is_euclidean):
206+
# Isomap must work on various metric parameters work correctly
207+
# and must default to euclidean.
197208
X, _ = datasets.make_blobs(random_state=0)
198-
reference = manifold.Isomap().fit_transform(X)
209+
X = X.astype(global_dtype, copy=False)
199210

200-
for metric, p, is_euclidean in metrics:
201-
embedding = manifold.Isomap(metric=metric, p=p).fit_transform(X)
211+
reference = manifold.Isomap().fit_transform(X)
212+
embedding = manifold.Isomap(metric=metric, p=p).fit_transform(X)
202213

203-
if is_euclidean:
204-
assert_array_almost_equal(embedding, reference)
205-
else:
206-
with pytest.raises(AssertionError, match="not almost equal"):
207-
assert_array_almost_equal(embedding, reference)
214+
if is_euclidean:
215+
assert_allclose(embedding, reference)
216+
else:
217+
with pytest.raises(AssertionError, match="Not equal to tolerance"):
218+
assert_allclose(embedding, reference)
208219

209220

210221
def test_isomap_clone_bug():
@@ -218,26 +229,38 @@ def test_isomap_clone_bug():
218229

219230
@pytest.mark.parametrize("eigen_solver", eigen_solvers)
220231
@pytest.mark.parametrize("path_method", path_methods)
221-
def test_sparse_input(eigen_solver, path_method):
232+
def test_sparse_input(global_dtype, eigen_solver, path_method, global_random_seed):
222233
# TODO: compare results on dense and sparse data as proposed in:
223234
# https://github.com/scikit-learn/scikit-learn/pull/23585#discussion_r968388186
224-
X = sparse_rand(100, 3, density=0.1, format="csr")
235+
X = sparse_rand(
236+
100,
237+
3,
238+
density=0.1,
239+
format="csr",
240+
dtype=global_dtype,
241+
random_state=global_random_seed,
242+
)
225243

226-
clf = manifold.Isomap(
244+
iso_dense = manifold.Isomap(
227245
n_components=2,
228246
eigen_solver=eigen_solver,
229247
path_method=path_method,
230248
n_neighbors=8,
231249
)
232-
clf.fit(X)
233-
clf.transform(X)
250+
iso_sparse = clone(iso_dense)
251+
252+
X_trans_dense = iso_dense.fit_transform(X.toarray())
253+
X_trans_sparse = iso_sparse.fit_transform(X)
234254

255+
assert_allclose(X_trans_sparse, X_trans_dense, rtol=1e-4, atol=1e-4)
235256

236-
def test_isomap_fit_precomputed_radius_graph():
257+
258+
def test_isomap_fit_precomputed_radius_graph(global_dtype):
237259
# Isomap.fit_transform must yield similar result when using
238260
# a precomputed distance matrix.
239261

240262
X, y = datasets.make_s_curve(200, random_state=0)
263+
X = X.astype(global_dtype, copy=False)
241264
radius = 10
242265

243266
g = neighbors.radius_neighbors_graph(X, radius=radius, mode="distance")
@@ -247,7 +270,8 @@ def test_isomap_fit_precomputed_radius_graph():
247270

248271
isomap = manifold.Isomap(n_neighbors=None, radius=radius, metric="minkowski")
249272
result = isomap.fit_transform(X)
250-
assert_allclose(precomputed_result, result)
273+
atol = 1e-5 if global_dtype == np.float32 else 0
274+
assert_allclose(precomputed_result, result, atol=atol)
251275

252276

253277
def test_isomap_fitted_attributes_dtype(global_dtype):
@@ -294,10 +318,10 @@ def test_multiple_connected_components():
294318
manifold.Isomap(n_neighbors=2).fit(X)
295319

296320

297-
def test_multiple_connected_components_metric_precomputed():
321+
def test_multiple_connected_components_metric_precomputed(global_dtype):
298322
# Test that an error is raised when the graph has multiple components
299323
# and when X is a precomputed neighbors graph.
300-
X = np.array([0, 1, 2, 5, 6, 7])[:, None]
324+
X = np.array([0, 1, 2, 5, 6, 7])[:, None].astype(global_dtype, copy=False)
301325

302326
# works with a precomputed distance matrix (dense)
303327
X_distances = pairwise_distances(X)

0 commit comments

Comments
 (0)