diff --git a/sklearn/manifold/_t_sne.py b/sklearn/manifold/_t_sne.py index 8813ee7ae5de0..a43a43942b274 100644 --- a/sklearn/manifold/_t_sne.py +++ b/sklearn/manifold/_t_sne.py @@ -724,6 +724,11 @@ class TSNE(BaseEstimator): LocallyLinearEmbedding : Manifold learning using Locally Linear Embedding. SpectralEmbedding : Spectral embedding for non-linear dimensionality. + Notes + ----- + TSNE only preserves inputs' dtypes when `init` is provided as a float64 numpy array. + In all the other cases, the outputs' dtype is float32. + References ---------- diff --git a/sklearn/manifold/tests/test_t_sne.py b/sklearn/manifold/tests/test_t_sne.py index 4b00c7b228969..f1461dede5200 100644 --- a/sklearn/manifold/tests/test_t_sne.py +++ b/sklearn/manifold/tests/test_t_sne.py @@ -1,7 +1,6 @@ import sys from io import StringIO import numpy as np -from numpy.testing import assert_allclose import scipy.sparse as sp import pytest @@ -10,8 +9,8 @@ from sklearn.exceptions import EfficiencyWarning from sklearn.utils._testing import ignore_warnings from sklearn.utils._testing import assert_almost_equal -from sklearn.utils._testing import assert_array_equal from sklearn.utils._testing import assert_array_almost_equal +from sklearn.utils._testing import assert_allclose from sklearn.utils._testing import skip_if_32bit from sklearn.utils import check_random_state from sklearn.manifold._t_sne import _joint_probabilities @@ -134,6 +133,7 @@ def test_binary_search(): # Test if the binary search finds Gaussians with desired perplexity. random_state = check_random_state(0) data = random_state.randn(50, 5) + # _binary_search_perplexity only support float32 inputs distances = pairwise_distances(data).astype(np.float32) desired_perplexity = 25.0 P = _binary_search_perplexity(distances, desired_perplexity, verbose=0) @@ -149,6 +149,7 @@ def test_binary_search_underflow(): # A more challenging case than the one above, producing numeric # underflow in float precision (see issue #19471 and PR #19472). random_state = check_random_state(42) + # _binary_search_perplexity only support float32 inputs data = random_state.randn(1, 90).astype(np.float32) + 100 desired_perplexity = 30.0 P = _binary_search_perplexity(data, desired_perplexity, verbose=0) @@ -163,6 +164,7 @@ def test_binary_search_neighbors(): n_samples = 200 desired_perplexity = 25.0 random_state = check_random_state(0) + # _binary_search_perplexity only support float32 inputs data = random_state.randn(n_samples, 2).astype(np.float32, copy=False) distances = pairwise_distances(data) P1 = _binary_search_perplexity(distances, desired_perplexity, verbose=0) @@ -210,6 +212,7 @@ def test_binary_perplexity_stability(): data = random_state.randn(n_samples, 5) nn = NearestNeighbors().fit(data) distance_graph = nn.kneighbors_graph(n_neighbors=n_neighbors, mode="distance") + # _binary_search_perplexity only support float32 inputs distances = distance_graph.data.astype(np.float32, copy=False) distances = distances.reshape(n_samples, n_neighbors) last_P = None @@ -236,10 +239,10 @@ def test_gradient(): n_components = 2 alpha = 1.0 - distances = random_state.randn(n_samples, n_features).astype(np.float32) + distances = random_state.randn(n_samples, n_features) distances = np.abs(distances.dot(distances.T)) np.fill_diagonal(distances, 0.0) - X_embedded = random_state.randn(n_samples, n_components).astype(np.float32) + X_embedded = random_state.randn(n_samples, n_components) P = _joint_probabilities(distances, desired_perplexity=25.0, verbose=0) @@ -249,27 +252,27 @@ def fun(params): def grad(params): return _kl_divergence(params, P, alpha, n_samples, n_components)[1] - assert_almost_equal(check_grad(fun, grad, X_embedded.ravel()), 0.0, decimal=5) + assert_allclose(check_grad(fun, grad, X_embedded.ravel()), 0.0, atol=1e-5) -def test_trustworthiness(): +def test_trustworthiness(global_dtype): # Test trustworthiness score. random_state = check_random_state(0) # Affine transformation - X = random_state.randn(100, 2) - assert trustworthiness(X, 5.0 + X / 10.0) == 1.0 + X = random_state.randn(100, 2).astype(global_dtype) + assert trustworthiness(X, 5.0 + X / 10.0) == pytest.approx(1.0) # Randomly shuffled - X = np.arange(100).reshape(-1, 1) + X = np.arange(100).reshape(-1, 1).astype(global_dtype) X_embedded = X.copy() random_state.shuffle(X_embedded) assert trustworthiness(X, X_embedded) < 0.6 # Completely different - X = np.arange(5).reshape(-1, 1) - X_embedded = np.array([[0], [2], [4], [1], [3]]) - assert_almost_equal(trustworthiness(X, X_embedded, n_neighbors=1), 0.2) + X = np.arange(5).reshape(-1, 1).astype(global_dtype) + X_embedded = np.array([[0], [2], [4], [1], [3]], dtype=global_dtype) + assert trustworthiness(X, X_embedded, n_neighbors=1) == pytest.approx(0.2) def test_trustworthiness_n_neighbors_error(): @@ -290,11 +293,11 @@ def test_trustworthiness_n_neighbors_error(): @pytest.mark.parametrize("method", ["exact", "barnes_hut"]) @pytest.mark.parametrize("init", ("random", "pca")) -def test_preserve_trustworthiness_approximately(method, init): +def test_preserve_trustworthiness_approximately(method, init, global_dtype): # Nearest neighbors should be preserved approximately. random_state = check_random_state(0) n_components = 2 - X = random_state.randn(50, n_components).astype(np.float32) + X = random_state.randn(50, n_components).astype(global_dtype) tsne = TSNE( n_components=n_components, init=init, @@ -304,14 +307,17 @@ def test_preserve_trustworthiness_approximately(method, init): learning_rate="auto", ) X_embedded = tsne.fit_transform(X) + # TNSE.fit_transform does not preserve dtype for the default initialisation + assert X_embedded.dtype == np.float32 t = trustworthiness(X, X_embedded, n_neighbors=1) assert t > 0.85 -def test_optimization_minimizes_kl_divergence(): +def test_optimization_minimizes_kl_divergence(global_dtype): """t-SNE should give a lower KL divergence with more iterations.""" random_state = check_random_state(0) X, _ = make_blobs(n_features=3, random_state=random_state) + X = X.astype(global_dtype) kl_divergences = [] for n_iter in [250, 300, 350]: tsne = TSNE( @@ -329,12 +335,12 @@ def test_optimization_minimizes_kl_divergence(): @pytest.mark.parametrize("method", ["exact", "barnes_hut"]) -def test_fit_transform_csr_matrix(method): +def test_fit_transform_csr_matrix(method, global_dtype): # TODO: compare results on dense and sparse data as proposed in: # https://github.com/scikit-learn/scikit-learn/pull/23585#discussion_r968388186 # X can be a sparse matrix. rng = check_random_state(0) - X = rng.randn(50, 2) + X = rng.randn(50, 2).astype(global_dtype) X[(rng.randint(0, 50, 25), rng.randint(0, 2, 25))] = 0.0 X_csr = sp.csr_matrix(X) tsne = TSNE( @@ -347,7 +353,9 @@ def test_fit_transform_csr_matrix(method): n_iter=750, ) X_embedded = tsne.fit_transform(X_csr) - assert_allclose(trustworthiness(X_csr, X_embedded, n_neighbors=1), 1.0, rtol=1.1e-1) + assert trustworthiness(X_csr, X_embedded, n_neighbors=1) == pytest.approx( + 1.0, rel=1.1e-1 + ) def test_preserve_trustworthiness_approximately_with_precomputed_distances(): @@ -372,11 +380,11 @@ def test_preserve_trustworthiness_approximately_with_precomputed_distances(): assert t > 0.95 -def test_trustworthiness_not_euclidean_metric(): +def test_trustworthiness_not_euclidean_metric(global_dtype): # Test trustworthiness with a metric different from 'euclidean' and # 'precomputed' random_state = check_random_state(0) - X = random_state.randn(100, 2) + X = random_state.randn(100, 2).astype(global_dtype) assert trustworthiness(X, X, metric="cosine") == trustworthiness( pairwise_distances(X, metric="cosine"), X, metric="precomputed" ) @@ -432,15 +440,18 @@ def test_high_perplexity_precomputed_sparse_distances(): @ignore_warnings(category=EfficiencyWarning) -def test_sparse_precomputed_distance(): +def test_sparse_precomputed_distance(global_dtype): """Make sure that TSNE works identically for sparse and dense matrix""" + atol = 1e-5 if global_dtype == np.float32 else 1e-7 random_state = check_random_state(0) X = random_state.randn(100, 2) - D_sparse = kneighbors_graph(X, n_neighbors=100, mode="distance", include_self=True) - D = pairwise_distances(X) + D_sparse = kneighbors_graph( + X, n_neighbors=100, mode="distance", include_self=True + ).astype(global_dtype) + D = pairwise_distances(X).astype(global_dtype) assert sp.issparse(D_sparse) - assert_almost_equal(D_sparse.A, D) + assert_allclose(D_sparse.A, D, atol=atol) tsne = TSNE( metric="precomputed", random_state=0, init="random", learning_rate="auto" @@ -449,7 +460,7 @@ def test_sparse_precomputed_distance(): for fmt in ["csr", "lil"]: Xt_sparse = tsne.fit_transform(D_sparse.asformat(fmt)) - assert_almost_equal(Xt_dense, Xt_sparse) + assert_allclose(Xt_dense, Xt_sparse, atol=atol) def test_non_positive_computed_distances(): @@ -464,11 +475,14 @@ def metric(x, y): tsne.fit_transform(X) -def test_init_ndarray(): +def test_init_ndarray(global_dtype): # Initialize TSNE with ndarray and test fit - tsne = TSNE(init=np.zeros((100, 2)), learning_rate="auto") - X_embedded = tsne.fit_transform(np.ones((100, 5))) - assert_array_equal(np.zeros((100, 2)), X_embedded) + tsne = TSNE(init=np.zeros((100, 2), dtype=global_dtype), learning_rate="auto") + X_embedded = tsne.fit_transform(np.ones((100, 5), dtype=global_dtype)) + + # TNSE.fit_transform _does_ preserves dtype when initialized with a custom array. + assert X_embedded.dtype == global_dtype + assert_allclose(np.zeros((100, 2), dtype=global_dtype), X_embedded) def test_init_ndarray_precomputed(): @@ -506,60 +520,58 @@ def test_n_components_range(): tsne.fit_transform(np.array([[0.0], [1.0]])) -def test_early_exaggeration_used(): +@pytest.mark.parametrize("method", ["exact", "barnes_hut"]) +def test_early_exaggeration_used(method, global_dtype): # check that the ``early_exaggeration`` parameter has an effect random_state = check_random_state(0) n_components = 2 - methods = ["exact", "barnes_hut"] - X = random_state.randn(25, n_components).astype(np.float32) - for method in methods: - tsne = TSNE( - n_components=n_components, - perplexity=1, - learning_rate=100.0, - init="pca", - random_state=0, - method=method, - early_exaggeration=1.0, - n_iter=250, - ) - X_embedded1 = tsne.fit_transform(X) - tsne = TSNE( - n_components=n_components, - perplexity=1, - learning_rate=100.0, - init="pca", - random_state=0, - method=method, - early_exaggeration=10.0, - n_iter=250, - ) - X_embedded2 = tsne.fit_transform(X) + X = random_state.randn(25, n_components).astype(global_dtype) + tsne = TSNE( + n_components=n_components, + perplexity=1, + learning_rate=100.0, + init="pca", + random_state=0, + method=method, + early_exaggeration=1.0, + n_iter=250, + ) + X_embedded1 = tsne.fit_transform(X) + tsne = TSNE( + n_components=n_components, + perplexity=1, + learning_rate=100.0, + init="pca", + random_state=0, + method=method, + early_exaggeration=10.0, + n_iter=250, + ) + X_embedded2 = tsne.fit_transform(X) - assert not np.allclose(X_embedded1, X_embedded2) + assert not np.allclose(X_embedded1, X_embedded2) -def test_n_iter_used(): +@pytest.mark.parametrize("method", ["exact", "barnes_hut"]) +@pytest.mark.parametrize("n_iter", [251, 500]) +def test_n_iter_used(method, n_iter, global_dtype): # check that the ``n_iter`` parameter has an effect random_state = check_random_state(0) n_components = 2 - methods = ["exact", "barnes_hut"] - X = random_state.randn(25, n_components).astype(np.float32) - for method in methods: - for n_iter in [251, 500]: - tsne = TSNE( - n_components=n_components, - perplexity=1, - learning_rate=0.5, - init="random", - random_state=0, - method=method, - early_exaggeration=1.0, - n_iter=n_iter, - ) - tsne.fit_transform(X) - - assert tsne.n_iter_ == n_iter - 1 + X = random_state.randn(25, n_components).astype(global_dtype) + tsne = TSNE( + n_components=n_components, + perplexity=1, + learning_rate=0.5, + init="random", + random_state=0, + method=method, + early_exaggeration=1.0, + n_iter=n_iter, + ) + tsne.fit_transform(X) + + assert tsne.n_iter_ == n_iter - 1 def test_answer_gradient_two_points(): @@ -703,12 +715,10 @@ def test_reduction_to_one_component(): @pytest.mark.parametrize("method", ["barnes_hut", "exact"]) -@pytest.mark.parametrize("dt", [np.float32, np.float64]) -def test_64bit(method, dt): - # Ensure 64bit arrays are handled correctly. +def test_64bit(method): random_state = check_random_state(0) - X = random_state.randn(10, 2).astype(dt, copy=False) + X = random_state.randn(10, 2).astype(np.float64, copy=False) tsne = TSNE( n_components=2, perplexity=2, @@ -749,7 +759,7 @@ def test_kl_divergence_not_nan(method): assert not np.isnan(tsne.kl_divergence_) -def test_barnes_hut_angle(): +def test_barnes_hut_angle(global_dtype): # When Barnes-Hut's angle=0 this corresponds to the exact method. angle = 0.0 perplexity = 10 @@ -759,9 +769,9 @@ def test_barnes_hut_angle(): degrees_of_freedom = float(n_components - 1.0) random_state = check_random_state(0) - data = random_state.randn(n_samples, n_features) + data = random_state.randn(n_samples, n_features).astype(global_dtype) distances = pairwise_distances(data) - params = random_state.randn(n_samples, n_components) + params = random_state.randn(n_samples, n_components).astype(global_dtype) P = _joint_probabilities(distances, perplexity, verbose=0) kl_exact, grad_exact = _kl_divergence( params, P, degrees_of_freedom, n_samples, n_components @@ -792,40 +802,40 @@ def test_barnes_hut_angle(): @skip_if_32bit -def test_n_iter_without_progress(): +@pytest.mark.parametrize("method", ["barnes_hut", "exact"]) +def test_n_iter_without_progress(method, global_dtype): # Use a dummy negative n_iter_without_progress and check output on stdout random_state = check_random_state(0) - X = random_state.randn(100, 10) - for method in ["barnes_hut", "exact"]: - tsne = TSNE( - n_iter_without_progress=-1, - verbose=2, - learning_rate=1e8, - random_state=0, - method=method, - n_iter=351, - init="random", - ) - tsne._N_ITER_CHECK = 1 - tsne._EXPLORATION_N_ITER = 0 + X = random_state.randn(100, 10).astype(global_dtype) + tsne = TSNE( + n_iter_without_progress=-1, + verbose=2, + learning_rate=1e8, + random_state=0, + method=method, + n_iter=351, + init="random", + ) + tsne._N_ITER_CHECK = 1 + tsne._EXPLORATION_N_ITER = 0 - old_stdout = sys.stdout - sys.stdout = StringIO() - try: - tsne.fit_transform(X) - finally: - out = sys.stdout.getvalue() - sys.stdout.close() - sys.stdout = old_stdout + old_stdout = sys.stdout + sys.stdout = StringIO() + try: + tsne.fit_transform(X) + finally: + out = sys.stdout.getvalue() + sys.stdout.close() + sys.stdout = old_stdout - # The output needs to contain the value of n_iter_without_progress - assert "did not make any progress during the last -1 episodes. Finished." in out + # The output needs to contain the value of n_iter_without_progress + assert "did not make any progress during the last -1 episodes. Finished." in out -def test_min_grad_norm(): +def test_min_grad_norm(global_dtype): # Make sure that the parameter min_grad_norm is used correctly random_state = check_random_state(0) - X = random_state.randn(100, 2) + X = random_state.randn(100, 2).astype(global_dtype) min_grad_norm = 0.002 tsne = TSNE(min_grad_norm=min_grad_norm, verbose=2, random_state=0, method="exact") @@ -865,10 +875,10 @@ def test_min_grad_norm(): assert n_smaller_gradient_norms <= 1 -def test_accessible_kl_divergence(): +def test_accessible_kl_divergence(global_dtype): # Ensures that the accessible kl_divergence matches the computed value random_state = check_random_state(0) - X = random_state.randn(50, 2) + X = random_state.randn(50, 2).astype(global_dtype) tsne = TSNE( n_iter_without_progress=2, verbose=2, random_state=0, method="exact", n_iter=500 ) @@ -948,12 +958,12 @@ def assert_uniform_grid(Y, try_name=None): assert largest_to_mean < 2, try_name -def test_bh_match_exact(): +def test_bh_match_exact(global_dtype): # check that the ``barnes_hut`` method match the exact one when # ``angle = 0`` and ``perplexity > n_samples / 3`` random_state = check_random_state(0) n_features = 10 - X = random_state.randn(30, n_features).astype(np.float32) + X = random_state.randn(30, n_features).astype(global_dtype) X_embeddeds = {} n_iter = {} for method in ["exact", "barnes_hut"]: @@ -976,7 +986,7 @@ def test_bh_match_exact(): assert_allclose(X_embeddeds["exact"], X_embeddeds["barnes_hut"], rtol=1e-4) -def test_gradient_bh_multithread_match_sequential(): +def test_gradient_bh_multithread_match_sequential(global_dtype): # check that the bh gradient with different num_threads gives the same # results @@ -989,8 +999,8 @@ def test_gradient_bh_multithread_match_sequential(): perplexity = 5 random_state = check_random_state(0) - data = random_state.randn(n_samples, n_features).astype(np.float32) - params = random_state.randn(n_samples, n_components) + data = random_state.randn(n_samples, n_features).astype(global_dtype) + params = random_state.randn(n_samples, n_components).astype(global_dtype) n_neighbors = n_samples - 1 distances_csr = ( @@ -1032,7 +1042,7 @@ def test_gradient_bh_multithread_match_sequential(): [("manhattan", manhattan_distances), ("cosine", cosine_distances)], ) @pytest.mark.parametrize("method", ["barnes_hut", "exact"]) -def test_tsne_with_different_distance_metrics(metric, dist_func, method): +def test_tsne_with_different_distance_metrics(metric, dist_func, method, global_dtype): """Make sure that TSNE works for different distance metrics""" if method == "barnes_hut" and metric == "manhattan": @@ -1055,7 +1065,7 @@ def test_tsne_with_different_distance_metrics(metric, dist_func, method): random_state = check_random_state(0) n_components_original = 3 n_components_embedding = 2 - X = random_state.randn(50, n_components_original).astype(np.float32) + X = random_state.randn(50, n_components_original).astype(global_dtype) X_transformed_tsne = TSNE( metric=metric, method=method, @@ -1074,7 +1084,12 @@ def test_tsne_with_different_distance_metrics(metric, dist_func, method): init="random", learning_rate="auto", ).fit_transform(dist_func(X)) - assert_array_equal(X_transformed_tsne, X_transformed_tsne_precomputed) + + # TSNE does not preserve dtype for random initialisation (see docstring) + assert ( + X_transformed_tsne.dtype == X_transformed_tsne_precomputed.dtype == np.float32 + ) + assert_allclose(X_transformed_tsne, X_transformed_tsne_precomputed) @pytest.mark.parametrize("method", ["exact", "barnes_hut"]) @@ -1138,6 +1153,7 @@ def test_tsne_with_mahalanobis_distance(): # FIXME: remove in 1.3 after deprecation of `square_distances` +@pytest.mark.filterwarnings("ignore:The PCA initialization in TSNE will change") def test_tsne_deprecation_square_distances(): """Check that we raise a warning regarding the removal of `square_distances`.