1
1
from itertools import product
2
2
import numpy as np
3
3
import math
4
- from numpy .testing import (
5
- assert_almost_equal ,
6
- assert_array_almost_equal ,
7
- assert_array_equal ,
8
- )
9
4
import pytest
10
5
11
- from sklearn import datasets
6
+ from sklearn import datasets , clone
12
7
from sklearn import manifold
13
8
from sklearn import neighbors
14
9
from sklearn import pipeline
15
10
from sklearn import preprocessing
16
11
from sklearn .datasets import make_blobs
17
12
from sklearn .metrics .pairwise import pairwise_distances
18
- from sklearn .utils ._testing import assert_allclose , assert_allclose_dense_sparse
19
-
13
+ from sklearn .utils ._testing import (
14
+ assert_allclose ,
15
+ assert_allclose_dense_sparse ,
16
+ assert_array_equal ,
17
+ )
20
18
from scipy .sparse import rand as sparse_rand
21
19
22
20
eigen_solvers = ["auto" , "dense" , "arpack" ]
23
21
path_methods = ["auto" , "FW" , "D" ]
24
22
25
23
26
- def create_sample_data (n_pts = 25 , add_noise = False ):
24
+ def create_sample_data (dtype , n_pts = 25 , add_noise = False ):
27
25
# grid of equidistant points in 2D, n_components = n_dim
28
26
n_per_side = int (math .sqrt (n_pts ))
29
- X = np .array (list (product (range (n_per_side ), repeat = 2 )))
27
+ X = np .array (list (product (range (n_per_side ), repeat = 2 ))). astype ( dtype , copy = False )
30
28
if add_noise :
31
29
# add noise in a third dimension
32
30
rng = np .random .RandomState (0 )
33
- noise = 0.1 * rng .randn (n_pts , 1 )
31
+ noise = 0.1 * rng .randn (n_pts , 1 ). astype ( dtype , copy = False )
34
32
X = np .concatenate ((X , noise ), 1 )
35
33
return X
36
34
37
35
38
36
@pytest .mark .parametrize ("n_neighbors, radius" , [(24 , None ), (None , np .inf )])
39
- def test_isomap_simple_grid (n_neighbors , radius ):
37
+ @pytest .mark .parametrize ("eigen_solver" , eigen_solvers )
38
+ @pytest .mark .parametrize ("path_method" , path_methods )
39
+ def test_isomap_simple_grid (
40
+ global_dtype , n_neighbors , radius , eigen_solver , path_method
41
+ ):
40
42
# Isomap should preserve distances when all neighbors are used
41
43
n_pts = 25
42
- X = create_sample_data (n_pts = n_pts , add_noise = False )
44
+ X = create_sample_data (global_dtype , n_pts = n_pts , add_noise = False )
43
45
44
46
# distances from each point to all others
45
47
if n_neighbors is not None :
46
48
G = neighbors .kneighbors_graph (X , n_neighbors , mode = "distance" )
47
49
else :
48
50
G = neighbors .radius_neighbors_graph (X , radius , mode = "distance" )
49
51
50
- for eigen_solver in eigen_solvers :
51
- for path_method in path_methods :
52
- clf = manifold .Isomap (
53
- n_neighbors = n_neighbors ,
54
- radius = radius ,
55
- n_components = 2 ,
56
- eigen_solver = eigen_solver ,
57
- path_method = path_method ,
58
- )
59
- clf .fit (X )
60
-
61
- if n_neighbors is not None :
62
- G_iso = neighbors .kneighbors_graph (
63
- clf .embedding_ , n_neighbors , mode = "distance"
64
- )
65
- else :
66
- G_iso = neighbors .radius_neighbors_graph (
67
- clf .embedding_ , radius , mode = "distance"
68
- )
69
- assert_allclose_dense_sparse (G , G_iso )
52
+ clf = manifold .Isomap (
53
+ n_neighbors = n_neighbors ,
54
+ radius = radius ,
55
+ n_components = 2 ,
56
+ eigen_solver = eigen_solver ,
57
+ path_method = path_method ,
58
+ )
59
+ clf .fit (X )
60
+
61
+ if n_neighbors is not None :
62
+ G_iso = neighbors .kneighbors_graph (clf .embedding_ , n_neighbors , mode = "distance" )
63
+ else :
64
+ G_iso = neighbors .radius_neighbors_graph (
65
+ clf .embedding_ , radius , mode = "distance"
66
+ )
67
+ atol = 1e-5 if global_dtype == np .float32 else 0
68
+ assert_allclose_dense_sparse (G , G_iso , atol = atol )
70
69
71
70
72
71
@pytest .mark .parametrize ("n_neighbors, radius" , [(24 , None ), (None , np .inf )])
73
- def test_isomap_reconstruction_error (n_neighbors , radius ):
72
+ @pytest .mark .parametrize ("eigen_solver" , eigen_solvers )
73
+ @pytest .mark .parametrize ("path_method" , path_methods )
74
+ def test_isomap_reconstruction_error (
75
+ global_dtype , n_neighbors , radius , eigen_solver , path_method
76
+ ):
77
+ if global_dtype is np .float32 :
78
+ pytest .skip (
79
+ "Skipping test due to numerical instabilities on float32 data"
80
+ "from KernelCenterer used in the reconstruction_error method"
81
+ )
82
+
74
83
# Same setup as in test_isomap_simple_grid, with an added dimension
75
84
n_pts = 25
76
- X = create_sample_data (n_pts = n_pts , add_noise = True )
85
+ X = create_sample_data (global_dtype , n_pts = n_pts , add_noise = True )
77
86
78
87
# compute input kernel
79
88
if n_neighbors is not None :
@@ -83,43 +92,42 @@ def test_isomap_reconstruction_error(n_neighbors, radius):
83
92
centerer = preprocessing .KernelCenterer ()
84
93
K = centerer .fit_transform (- 0.5 * G ** 2 )
85
94
86
- for eigen_solver in eigen_solvers :
87
- for path_method in path_methods :
88
- clf = manifold .Isomap (
89
- n_neighbors = n_neighbors ,
90
- radius = radius ,
91
- n_components = 2 ,
92
- eigen_solver = eigen_solver ,
93
- path_method = path_method ,
94
- )
95
- clf .fit (X )
96
-
97
- # compute output kernel
98
- if n_neighbors is not None :
99
- G_iso = neighbors .kneighbors_graph (
100
- clf .embedding_ , n_neighbors , mode = "distance"
101
- )
102
- else :
103
- G_iso = neighbors .radius_neighbors_graph (
104
- clf .embedding_ , radius , mode = "distance"
105
- )
106
- G_iso = G_iso .toarray ()
107
- K_iso = centerer .fit_transform (- 0.5 * G_iso ** 2 )
108
-
109
- # make sure error agrees
110
- reconstruction_error = np .linalg .norm (K - K_iso ) / n_pts
111
- assert_almost_equal (reconstruction_error , clf .reconstruction_error ())
95
+ clf = manifold .Isomap (
96
+ n_neighbors = n_neighbors ,
97
+ radius = radius ,
98
+ n_components = 2 ,
99
+ eigen_solver = eigen_solver ,
100
+ path_method = path_method ,
101
+ )
102
+ clf .fit (X )
103
+
104
+ # compute output kernel
105
+ if n_neighbors is not None :
106
+ G_iso = neighbors .kneighbors_graph (clf .embedding_ , n_neighbors , mode = "distance" )
107
+ else :
108
+ G_iso = neighbors .radius_neighbors_graph (
109
+ clf .embedding_ , radius , mode = "distance"
110
+ )
111
+ G_iso = G_iso .toarray ()
112
+ K_iso = centerer .fit_transform (- 0.5 * G_iso ** 2 )
113
+
114
+ # make sure error agrees
115
+ reconstruction_error = np .linalg .norm (K - K_iso ) / n_pts
116
+ atol = 1e-5 if global_dtype == np .float32 else 0
117
+ assert_allclose (reconstruction_error , clf .reconstruction_error (), atol = atol )
112
118
113
119
114
120
@pytest .mark .parametrize ("n_neighbors, radius" , [(2 , None ), (None , 0.5 )])
115
- def test_transform (n_neighbors , radius ):
121
+ def test_transform (global_dtype , n_neighbors , radius ):
116
122
n_samples = 200
117
123
n_components = 10
118
124
noise_scale = 0.01
119
125
120
126
# Create S-curve dataset
121
127
X , y = datasets .make_s_curve (n_samples , random_state = 0 )
122
128
129
+ X = X .astype (global_dtype , copy = False )
130
+
123
131
# Compute isomap embedding
124
132
iso = manifold .Isomap (
125
133
n_components = n_components , n_neighbors = n_neighbors , radius = radius
@@ -136,11 +144,12 @@ def test_transform(n_neighbors, radius):
136
144
137
145
138
146
@pytest .mark .parametrize ("n_neighbors, radius" , [(2 , None ), (None , 10.0 )])
139
- def test_pipeline (n_neighbors , radius ):
147
+ def test_pipeline (n_neighbors , radius , global_dtype ):
140
148
# check that Isomap works fine as a transformer in a Pipeline
141
149
# only checks that no error is raised.
142
150
# TODO check that it actually does something useful
143
151
X , y = datasets .make_blobs (random_state = 0 )
152
+ X = X .astype (global_dtype , copy = False )
144
153
clf = pipeline .Pipeline (
145
154
[
146
155
("isomap" , manifold .Isomap (n_neighbors = n_neighbors , radius = radius )),
@@ -151,7 +160,7 @@ def test_pipeline(n_neighbors, radius):
151
160
assert 0.9 < clf .score (X , y )
152
161
153
162
154
- def test_pipeline_with_nearest_neighbors_transformer ():
163
+ def test_pipeline_with_nearest_neighbors_transformer (global_dtype ):
155
164
# Test chaining NearestNeighborsTransformer and Isomap with
156
165
# neighbors_algorithm='precomputed'
157
166
algorithm = "auto"
@@ -160,6 +169,9 @@ def test_pipeline_with_nearest_neighbors_transformer():
160
169
X , _ = datasets .make_blobs (random_state = 0 )
161
170
X2 , _ = datasets .make_blobs (random_state = 1 )
162
171
172
+ X = X .astype (global_dtype , copy = False )
173
+ X2 = X2 .astype (global_dtype , copy = False )
174
+
163
175
# compare the chained version and the compact version
164
176
est_chain = pipeline .make_pipeline (
165
177
neighbors .KNeighborsTransformer (
@@ -173,38 +185,37 @@ def test_pipeline_with_nearest_neighbors_transformer():
173
185
174
186
Xt_chain = est_chain .fit_transform (X )
175
187
Xt_compact = est_compact .fit_transform (X )
176
- assert_array_almost_equal (Xt_chain , Xt_compact )
188
+ assert_allclose (Xt_chain , Xt_compact )
177
189
178
190
Xt_chain = est_chain .transform (X2 )
179
191
Xt_compact = est_compact .transform (X2 )
180
- assert_array_almost_equal (Xt_chain , Xt_compact )
192
+ assert_allclose (Xt_chain , Xt_compact )
181
193
182
194
183
- def test_different_metric ():
184
- # Test that the metric parameters work correctly, and default to euclidean
185
- def custom_metric (x1 , x2 ):
186
- return np .sqrt (np .sum (x1 ** 2 + x2 ** 2 ))
187
-
188
- # metric, p, is_euclidean
189
- metrics = [
195
+ @pytest .mark .parametrize (
196
+ "metric, p, is_euclidean" ,
197
+ [
190
198
("euclidean" , 2 , True ),
191
199
("manhattan" , 1 , False ),
192
200
("minkowski" , 1 , False ),
193
201
("minkowski" , 2 , True ),
194
- (custom_metric , 2 , False ),
195
- ]
196
-
202
+ (lambda x1 , x2 : np .sqrt (np .sum (x1 ** 2 + x2 ** 2 )), 2 , False ),
203
+ ],
204
+ )
205
+ def test_different_metric (global_dtype , metric , p , is_euclidean ):
206
+ # Isomap must work on various metric parameters work correctly
207
+ # and must default to euclidean.
197
208
X , _ = datasets .make_blobs (random_state = 0 )
198
- reference = manifold . Isomap (). fit_transform ( X )
209
+ X = X . astype ( global_dtype , copy = False )
199
210
200
- for metric , p , is_euclidean in metrics :
201
- embedding = manifold .Isomap (metric = metric , p = p ).fit_transform (X )
211
+ reference = manifold . Isomap (). fit_transform ( X )
212
+ embedding = manifold .Isomap (metric = metric , p = p ).fit_transform (X )
202
213
203
- if is_euclidean :
204
- assert_array_almost_equal (embedding , reference )
205
- else :
206
- with pytest .raises (AssertionError , match = "not almost equal" ):
207
- assert_array_almost_equal (embedding , reference )
214
+ if is_euclidean :
215
+ assert_allclose (embedding , reference )
216
+ else :
217
+ with pytest .raises (AssertionError , match = "Not equal to tolerance " ):
218
+ assert_allclose (embedding , reference )
208
219
209
220
210
221
def test_isomap_clone_bug ():
@@ -218,26 +229,38 @@ def test_isomap_clone_bug():
218
229
219
230
@pytest .mark .parametrize ("eigen_solver" , eigen_solvers )
220
231
@pytest .mark .parametrize ("path_method" , path_methods )
221
- def test_sparse_input (eigen_solver , path_method ):
232
+ def test_sparse_input (global_dtype , eigen_solver , path_method , global_random_seed ):
222
233
# TODO: compare results on dense and sparse data as proposed in:
223
234
# https://github.com/scikit-learn/scikit-learn/pull/23585#discussion_r968388186
224
- X = sparse_rand (100 , 3 , density = 0.1 , format = "csr" )
235
+ X = sparse_rand (
236
+ 100 ,
237
+ 3 ,
238
+ density = 0.1 ,
239
+ format = "csr" ,
240
+ dtype = global_dtype ,
241
+ random_state = global_random_seed ,
242
+ )
225
243
226
- clf = manifold .Isomap (
244
+ iso_dense = manifold .Isomap (
227
245
n_components = 2 ,
228
246
eigen_solver = eigen_solver ,
229
247
path_method = path_method ,
230
248
n_neighbors = 8 ,
231
249
)
232
- clf .fit (X )
233
- clf .transform (X )
250
+ iso_sparse = clone (iso_dense )
251
+
252
+ X_trans_dense = iso_dense .fit_transform (X .toarray ())
253
+ X_trans_sparse = iso_sparse .fit_transform (X )
234
254
255
+ assert_allclose (X_trans_sparse , X_trans_dense , rtol = 1e-4 , atol = 1e-4 )
235
256
236
- def test_isomap_fit_precomputed_radius_graph ():
257
+
258
+ def test_isomap_fit_precomputed_radius_graph (global_dtype ):
237
259
# Isomap.fit_transform must yield similar result when using
238
260
# a precomputed distance matrix.
239
261
240
262
X , y = datasets .make_s_curve (200 , random_state = 0 )
263
+ X = X .astype (global_dtype , copy = False )
241
264
radius = 10
242
265
243
266
g = neighbors .radius_neighbors_graph (X , radius = radius , mode = "distance" )
@@ -247,7 +270,8 @@ def test_isomap_fit_precomputed_radius_graph():
247
270
248
271
isomap = manifold .Isomap (n_neighbors = None , radius = radius , metric = "minkowski" )
249
272
result = isomap .fit_transform (X )
250
- assert_allclose (precomputed_result , result )
273
+ atol = 1e-5 if global_dtype == np .float32 else 0
274
+ assert_allclose (precomputed_result , result , atol = atol )
251
275
252
276
253
277
def test_isomap_fitted_attributes_dtype (global_dtype ):
@@ -294,10 +318,10 @@ def test_multiple_connected_components():
294
318
manifold .Isomap (n_neighbors = 2 ).fit (X )
295
319
296
320
297
- def test_multiple_connected_components_metric_precomputed ():
321
+ def test_multiple_connected_components_metric_precomputed (global_dtype ):
298
322
# Test that an error is raised when the graph has multiple components
299
323
# and when X is a precomputed neighbors graph.
300
- X = np .array ([0 , 1 , 2 , 5 , 6 , 7 ])[:, None ]
324
+ X = np .array ([0 , 1 , 2 , 5 , 6 , 7 ])[:, None ]. astype ( global_dtype , copy = False )
301
325
302
326
# works with a precomputed distance matrix (dense)
303
327
X_distances = pairwise_distances (X )
0 commit comments