diff --git a/sklearn/cluster/_spectral.py b/sklearn/cluster/_spectral.py index d3cc2979d4af2..dc70d6a515476 100644 --- a/sklearn/cluster/_spectral.py +++ b/sklearn/cluster/_spectral.py @@ -744,7 +744,7 @@ def fit(self, X, y=None): params["coef0"] = self.coef0 self.affinity_matrix_ = pairwise_kernels( X, metric=self.affinity, filter_params=True, **params - ) + ).astype(X.dtype, copy=False) random_state = check_random_state(self.random_state) self.labels_ = spectral_clustering( @@ -787,5 +787,6 @@ def fit_predict(self, X, y=None): def _more_tags(self): return { "pairwise": self.affinity - in ["precomputed", "precomputed_nearest_neighbors"] + in ["precomputed", "precomputed_nearest_neighbors"], + "preserves_dtype": [np.float64, np.float32], } diff --git a/sklearn/cluster/tests/test_spectral.py b/sklearn/cluster/tests/test_spectral.py index ea40bc7c79139..5fd181e9efae7 100644 --- a/sklearn/cluster/tests/test_spectral.py +++ b/sklearn/cluster/tests/test_spectral.py @@ -40,7 +40,7 @@ @pytest.mark.parametrize("eigen_solver", ("arpack", "lobpcg")) @pytest.mark.parametrize("assign_labels", ("kmeans", "discretize", "cluster_qr")) -def test_spectral_clustering(eigen_solver, assign_labels): +def test_spectral_clustering(eigen_solver, assign_labels, global_dtype): S = np.array( [ [1.0, 1.0, 1.0, 0.2, 0.0, 0.0, 0.0], @@ -50,7 +50,8 @@ def test_spectral_clustering(eigen_solver, assign_labels): [0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0], [0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0], [0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0], - ] + ], + dtype=global_dtype, ) for mat in (S, sparse.csr_matrix(S)): @@ -74,14 +75,14 @@ def test_spectral_clustering(eigen_solver, assign_labels): @pytest.mark.parametrize("assign_labels", ("kmeans", "discretize", "cluster_qr")) -def test_spectral_clustering_sparse(assign_labels): +def test_spectral_clustering_sparse(assign_labels, global_dtype): X, y = make_blobs( n_samples=20, random_state=0, centers=[[1, 1], [-1, -1]], cluster_std=0.01 ) S = rbf_kernel(X, gamma=1) S = np.maximum(S - 1e-4, 0) - S = sparse.coo_matrix(S) + S = sparse.coo_matrix(S, dtype=global_dtype) labels = ( SpectralClustering( @@ -96,11 +97,12 @@ def test_spectral_clustering_sparse(assign_labels): assert adjusted_rand_score(y, labels) == 1 -def test_precomputed_nearest_neighbors_filtering(): +def test_precomputed_nearest_neighbors_filtering(global_dtype): # Test precomputed graph filtering when containing too many neighbors X, y = make_blobs( n_samples=200, random_state=0, centers=[[1, 1], [-1, -1]], cluster_std=0.01 ) + X = X.astype(global_dtype, copy=False) n_neighbors = 2 results = [] @@ -122,13 +124,14 @@ def test_precomputed_nearest_neighbors_filtering(): assert_array_equal(results[0], results[1]) -def test_affinities(): +def test_affinities(global_dtype): # Note: in the following, random_state has been selected to have # a dataset that yields a stable eigen decomposition both when built # on OSX and Linux X, y = make_blobs( n_samples=20, random_state=0, centers=[[1, 1], [-1, -1]], cluster_std=0.01 ) + X = X.astype(global_dtype, copy=False) # nearest neighbors affinity sp = SpectralClustering(n_clusters=2, affinity="nearest_neighbors", random_state=0) with pytest.warns(UserWarning, match="not fully connected"): @@ -140,6 +143,7 @@ def test_affinities(): assert adjusted_rand_score(y, labels) == 1 X = check_random_state(10).rand(10, 5) * 10 + X = X.astype(global_dtype, copy=False) kernels_available = kernel_metrics() for kern in kernels_available: @@ -182,13 +186,13 @@ def test_cluster_qr(): assert np.array_equal(labels_float64, labels_float32) -def test_cluster_qr_permutation_invariance(): +def test_cluster_qr_permutation_invariance(global_dtype): # cluster_qr must be invariant to sample permutation. random_state = np.random.RandomState(seed=8) n_samples, n_components = 100, 5 - data = random_state.randn(n_samples, n_components) + data = random_state.randn(n_samples, n_components).astype(global_dtype, copy=False) perm = random_state.permutation(n_samples) - assert np.array_equal( + assert_array_equal( cluster_qr(data)[perm], cluster_qr(data[perm]), ) @@ -263,12 +267,13 @@ def test_spectral_clustering_with_arpack_amg_solvers(): spectral_clustering(graph, n_clusters=2, eigen_solver="amg", random_state=0) -def test_n_components(): +def test_n_components(global_dtype): # Test that after adding n_components, result is different and # n_components = n_clusters by default X, y = make_blobs( n_samples=20, random_state=0, centers=[[1, 1], [-1, -1]], cluster_std=0.01 ) + X = X.astype(global_dtype, copy=False) sp = SpectralClustering(n_clusters=2, random_state=0) labels = sp.fit(X).labels_ # set n_components = n_cluster and test if result is the same