From 890481c91269c73b88ecc73c5763f46cabb0909d Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Thu, 3 Mar 2022 17:01:11 +0100
Subject: [PATCH 1/9] TST Adapt test_t_sne.py to test implementations on 32bit
 datasets

---
 sklearn/manifold/tests/test_t_sne.py | 127 ++++++++++++++++-----------
 1 file changed, 78 insertions(+), 49 deletions(-)

diff --git a/sklearn/manifold/tests/test_t_sne.py b/sklearn/manifold/tests/test_t_sne.py
index c601d956e6f29..8f1e04190a835 100644
--- a/sklearn/manifold/tests/test_t_sne.py
+++ b/sklearn/manifold/tests/test_t_sne.py
@@ -47,6 +47,8 @@
     "ignore:The PCA initialization in TSNE will change to have the standard deviation",
 )
 
+DTYPES = (np.float64, np.float32)
+
 
 def test_gradient_descent_stops():
     # Test stopping conditions of gradient descent.
@@ -138,6 +140,7 @@ def test_binary_search():
     # Test if the binary search finds Gaussians with desired perplexity.
     random_state = check_random_state(0)
     data = random_state.randn(50, 5)
+    # _binary_search_perplexity only support float32 inputs
     distances = pairwise_distances(data).astype(np.float32)
     desired_perplexity = 25.0
     P = _binary_search_perplexity(distances, desired_perplexity, verbose=0)
@@ -153,6 +156,7 @@ def test_binary_search_underflow():
     # A more challenging case than the one above, producing numeric
     # underflow in float precision (see issue #19471 and PR #19472).
     random_state = check_random_state(42)
+    # _binary_search_perplexity only support float32 inputs
     data = random_state.randn(1, 90).astype(np.float32) + 100
     desired_perplexity = 30.0
     P = _binary_search_perplexity(data, desired_perplexity, verbose=0)
@@ -167,6 +171,7 @@ def test_binary_search_neighbors():
     n_samples = 200
     desired_perplexity = 25.0
     random_state = check_random_state(0)
+    # _binary_search_perplexity only support float32 inputs
     data = random_state.randn(n_samples, 2).astype(np.float32, copy=False)
     distances = pairwise_distances(data)
     P1 = _binary_search_perplexity(distances, desired_perplexity, verbose=0)
@@ -214,6 +219,7 @@ def test_binary_perplexity_stability():
     data = random_state.randn(n_samples, 5)
     nn = NearestNeighbors().fit(data)
     distance_graph = nn.kneighbors_graph(n_neighbors=n_neighbors, mode="distance")
+    # _binary_search_perplexity only support float32 inputs
     distances = distance_graph.data.astype(np.float32, copy=False)
     distances = distances.reshape(n_samples, n_neighbors)
     last_P = None
@@ -231,7 +237,8 @@ def test_binary_perplexity_stability():
             assert_array_almost_equal(P1, last_P1, decimal=4)
 
 
-def test_gradient():
+@pytest.mark.parametrize("dtype", DTYPES)
+def test_gradient(dtype):
     # Test gradient of Kullback-Leibler divergence.
     random_state = check_random_state(0)
 
@@ -240,10 +247,10 @@ def test_gradient():
     n_components = 2
     alpha = 1.0
 
-    distances = random_state.randn(n_samples, n_features).astype(np.float32)
+    distances = random_state.randn(n_samples, n_features).astype(dtype)
     distances = np.abs(distances.dot(distances.T))
     np.fill_diagonal(distances, 0.0)
-    X_embedded = random_state.randn(n_samples, n_components).astype(np.float32)
+    X_embedded = random_state.randn(n_samples, n_components).astype(dtype)
 
     P = _joint_probabilities(distances, desired_perplexity=25.0, verbose=0)
 
@@ -256,12 +263,13 @@ def grad(params):
     assert_almost_equal(check_grad(fun, grad, X_embedded.ravel()), 0.0, decimal=5)
 
 
-def test_trustworthiness():
+@pytest.mark.parametrize("dtype", DTYPES)
+def test_trustworthiness(dtype):
     # Test trustworthiness score.
     random_state = check_random_state(0)
 
     # Affine transformation
-    X = random_state.randn(100, 2)
+    X = random_state.randn(100, 2).astype(dtype)
     assert trustworthiness(X, 5.0 + X / 10.0) == 1.0
 
     # Randomly shuffled
@@ -278,11 +286,12 @@ def test_trustworthiness():
 
 @pytest.mark.parametrize("method", ["exact", "barnes_hut"])
 @pytest.mark.parametrize("init", ("random", "pca"))
-def test_preserve_trustworthiness_approximately(method, init):
+@pytest.mark.parametrize("dtype", DTYPES)
+def test_preserve_trustworthiness_approximately(method, init, dtype):
     # Nearest neighbors should be preserved approximately.
     random_state = check_random_state(0)
     n_components = 2
-    X = random_state.randn(50, n_components).astype(np.float32)
+    X = random_state.randn(50, n_components).astype(dtype)
     tsne = TSNE(
         n_components=n_components,
         init=init,
@@ -296,10 +305,12 @@ def test_preserve_trustworthiness_approximately(method, init):
     assert t > 0.85
 
 
-def test_optimization_minimizes_kl_divergence():
+@pytest.mark.parametrize("dtype", DTYPES)
+def test_optimization_minimizes_kl_divergence(dtype):
     """t-SNE should give a lower KL divergence with more iterations."""
     random_state = check_random_state(0)
     X, _ = make_blobs(n_features=3, random_state=random_state)
+    X = X.astype(dtype)
     kl_divergences = []
     for n_iter in [250, 300, 350]:
         tsne = TSNE(
@@ -317,10 +328,11 @@ def test_optimization_minimizes_kl_divergence():
 
 
 @pytest.mark.parametrize("method", ["exact", "barnes_hut"])
-def test_fit_csr_matrix(method):
+@pytest.mark.parametrize("dtype", DTYPES)
+def test_fit_csr_matrix(method, dtype):
     # X can be a sparse matrix.
     rng = check_random_state(0)
-    X = rng.randn(50, 2)
+    X = rng.randn(50, 2).astype(dtype)
     X[(rng.randint(0, 50, 25), rng.randint(0, 2, 25))] = 0.0
     X_csr = sp.csr_matrix(X)
     tsne = TSNE(
@@ -358,11 +370,12 @@ def test_preserve_trustworthiness_approximately_with_precomputed_distances():
         assert t > 0.95
 
 
-def test_trustworthiness_not_euclidean_metric():
+@pytest.mark.parametrize("dtype", DTYPES)
+def test_trustworthiness_not_euclidean_metric(dtype):
     # Test trustworthiness with a metric different from 'euclidean' and
     # 'precomputed'
     random_state = check_random_state(0)
-    X = random_state.randn(100, 2)
+    X = random_state.randn(100, 2).astype(dtype)
     assert trustworthiness(X, X, metric="cosine") == trustworthiness(
         pairwise_distances(X, metric="cosine"), X, metric="precomputed"
     )
@@ -437,13 +450,16 @@ def test_high_perplexity_precomputed_sparse_distances():
 
 
 @ignore_warnings(category=EfficiencyWarning)
-def test_sparse_precomputed_distance():
+@pytest.mark.parametrize("dtype", DTYPES)
+def test_sparse_precomputed_distance(dtype):
     """Make sure that TSNE works identically for sparse and dense matrix"""
     random_state = check_random_state(0)
     X = random_state.randn(100, 2)
 
-    D_sparse = kneighbors_graph(X, n_neighbors=100, mode="distance", include_self=True)
-    D = pairwise_distances(X)
+    D_sparse = kneighbors_graph(
+        X, n_neighbors=100, mode="distance", include_self=True
+    ).astype(dtype)
+    D = pairwise_distances(X).astype(dtype)
     assert sp.issparse(D_sparse)
     assert_almost_equal(D_sparse.A, D)
 
@@ -480,11 +496,12 @@ def test_init_not_available():
         tsne.fit_transform(np.array([[0.0], [1.0]]))
 
 
-def test_init_ndarray():
+@pytest.mark.parametrize("dtype", DTYPES)
+def test_init_ndarray(dtype):
     # Initialize TSNE with ndarray and test fit
-    tsne = TSNE(init=np.zeros((100, 2)), learning_rate="auto")
-    X_embedded = tsne.fit_transform(np.ones((100, 5)))
-    assert_array_equal(np.zeros((100, 2)), X_embedded)
+    tsne = TSNE(init=np.zeros((100, 2), dtype=dtype), learning_rate="auto")
+    X_embedded = tsne.fit_transform(np.ones((100, 5), dtype=dtype))
+    assert_array_equal(np.zeros((100, 2), dtype=dtype), X_embedded)
 
 
 def test_init_ndarray_precomputed():
@@ -557,12 +574,13 @@ def test_n_components_range():
         tsne.fit_transform(np.array([[0.0], [1.0]]))
 
 
-def test_early_exaggeration_used():
+@pytest.mark.parametrize("dtype", DTYPES)
+def test_early_exaggeration_used(dtype):
     # check that the ``early_exaggeration`` parameter has an effect
     random_state = check_random_state(0)
     n_components = 2
     methods = ["exact", "barnes_hut"]
-    X = random_state.randn(25, n_components).astype(np.float32)
+    X = random_state.randn(25, n_components).astype(dtype)
     for method in methods:
         tsne = TSNE(
             n_components=n_components,
@@ -590,12 +608,13 @@ def test_early_exaggeration_used():
         assert not np.allclose(X_embedded1, X_embedded2)
 
 
-def test_n_iter_used():
+@pytest.mark.parametrize("dtype", DTYPES)
+def test_n_iter_used(dtype):
     # check that the ``n_iter`` parameter has an effect
     random_state = check_random_state(0)
     n_components = 2
     methods = ["exact", "barnes_hut"]
-    X = random_state.randn(25, n_components).astype(np.float32)
+    X = random_state.randn(25, n_components).astype(dtype)
     for method in methods:
         for n_iter in [251, 500]:
             tsne = TSNE(
@@ -760,12 +779,12 @@ def test_reduction_to_one_component():
 
 
 @pytest.mark.parametrize("method", ["barnes_hut", "exact"])
-@pytest.mark.parametrize("dt", [np.float32, np.float64])
-def test_64bit(method, dt):
+@pytest.mark.parametrize("dtype", DTYPES)
+def test_64bit(method, dtype):
     # Ensure 64bit arrays are handled correctly.
     random_state = check_random_state(0)
 
-    X = random_state.randn(10, 2).astype(dt, copy=False)
+    X = random_state.randn(10, 2).astype(dtype, copy=False)
     tsne = TSNE(
         n_components=2,
         perplexity=2,
@@ -806,7 +825,8 @@ def test_kl_divergence_not_nan(method):
     assert not np.isnan(tsne.kl_divergence_)
 
 
-def test_barnes_hut_angle():
+@pytest.mark.parametrize("dtype", DTYPES)
+def test_barnes_hut_angle(dtype):
     # When Barnes-Hut's angle=0 this corresponds to the exact method.
     angle = 0.0
     perplexity = 10
@@ -816,9 +836,9 @@ def test_barnes_hut_angle():
         degrees_of_freedom = float(n_components - 1.0)
 
         random_state = check_random_state(0)
-        data = random_state.randn(n_samples, n_features)
+        data = random_state.randn(n_samples, n_features).astype(dtype)
         distances = pairwise_distances(data)
-        params = random_state.randn(n_samples, n_components)
+        params = random_state.randn(n_samples, n_components).astype(dtype)
         P = _joint_probabilities(distances, perplexity, verbose=0)
         kl_exact, grad_exact = _kl_divergence(
             params, P, degrees_of_freedom, n_samples, n_components
@@ -849,10 +869,11 @@ def test_barnes_hut_angle():
 
 
 @skip_if_32bit
-def test_n_iter_without_progress():
+@pytest.mark.parametrize("dtype", DTYPES)
+def test_n_iter_without_progress(dtype):
     # Use a dummy negative n_iter_without_progress and check output on stdout
     random_state = check_random_state(0)
-    X = random_state.randn(100, 10)
+    X = random_state.randn(100, 10).astype(dtype)
     for method in ["barnes_hut", "exact"]:
         tsne = TSNE(
             n_iter_without_progress=-1,
@@ -881,10 +902,11 @@ def test_n_iter_without_progress():
 
 @pytest.mark.filterwarnings("ignore:The default learning rate in TSNE")
 @pytest.mark.filterwarnings("ignore:The default initialization in TSNE")
-def test_min_grad_norm():
+@pytest.mark.parametrize("dtype", DTYPES)
+def test_min_grad_norm(dtype):
     # Make sure that the parameter min_grad_norm is used correctly
     random_state = check_random_state(0)
-    X = random_state.randn(100, 2)
+    X = random_state.randn(100, 2).astype(dtype)
     min_grad_norm = 0.002
     tsne = TSNE(min_grad_norm=min_grad_norm, verbose=2, random_state=0, method="exact")
 
@@ -926,10 +948,11 @@ def test_min_grad_norm():
 
 @pytest.mark.filterwarnings("ignore:The default learning rate in TSNE")
 @pytest.mark.filterwarnings("ignore:The default initialization in TSNE")
-def test_accessible_kl_divergence():
+@pytest.mark.parametrize("dtype", DTYPES)
+def test_accessible_kl_divergence(dtype):
     # Ensures that the accessible kl_divergence matches the computed value
     random_state = check_random_state(0)
-    X = random_state.randn(50, 2)
+    X = random_state.randn(50, 2).astype(dtype)
     tsne = TSNE(
         n_iter_without_progress=2, verbose=2, random_state=0, method="exact", n_iter=500
     )
@@ -1009,12 +1032,13 @@ def assert_uniform_grid(Y, try_name=None):
     assert largest_to_mean < 2, try_name
 
 
-def test_bh_match_exact():
+@pytest.mark.parametrize("dtype", DTYPES)
+def test_bh_match_exact(dtype):
     # check that the ``barnes_hut`` method match the exact one when
     # ``angle = 0`` and ``perplexity > n_samples / 3``
     random_state = check_random_state(0)
     n_features = 10
-    X = random_state.randn(30, n_features).astype(np.float32)
+    X = random_state.randn(30, n_features).astype(dtype)
     X_embeddeds = {}
     n_iter = {}
     for method in ["exact", "barnes_hut"]:
@@ -1037,7 +1061,8 @@ def test_bh_match_exact():
     assert_allclose(X_embeddeds["exact"], X_embeddeds["barnes_hut"], rtol=1e-4)
 
 
-def test_gradient_bh_multithread_match_sequential():
+@pytest.mark.parametrize("dtype", DTYPES)
+def test_gradient_bh_multithread_match_sequential(dtype):
     # check that the bh gradient with different num_threads gives the same
     # results
 
@@ -1050,8 +1075,8 @@ def test_gradient_bh_multithread_match_sequential():
     perplexity = 5
 
     random_state = check_random_state(0)
-    data = random_state.randn(n_samples, n_features).astype(np.float32)
-    params = random_state.randn(n_samples, n_components)
+    data = random_state.randn(n_samples, n_features).astype(dtype)
+    params = random_state.randn(n_samples, n_components).astype(dtype)
 
     n_neighbors = n_samples - 1
     distances_csr = (
@@ -1088,12 +1113,13 @@ def test_gradient_bh_multithread_match_sequential():
         assert_allclose(grad_multithread, grad_multithread)
 
 
-def test_tsne_with_different_distance_metrics():
+@pytest.mark.parametrize("dtype", DTYPES)
+def test_tsne_with_different_distance_metrics(dtype):
     """Make sure that TSNE works for different distance metrics"""
     random_state = check_random_state(0)
     n_components_original = 3
     n_components_embedding = 2
-    X = random_state.randn(50, n_components_original).astype(np.float32)
+    X = random_state.randn(50, n_components_original).astype(dtype)
     metrics = ["manhattan", "cosine"]
     dist_funcs = [manhattan_distances, cosine_distances]
     for metric, dist_func in zip(metrics, dist_funcs):
@@ -1118,12 +1144,13 @@ def test_tsne_with_different_distance_metrics():
 
 # TODO: Remove in 1.2
 @pytest.mark.parametrize("init", [None, "random", "pca"])
-def test_tsne_init_futurewarning(init):
+@pytest.mark.parametrize("dtype", DTYPES)
+def test_tsne_init_futurewarning(init, dtype):
     """Make sure that a FutureWarning is only raised when the
     init is not specified or is 'pca'."""
     random_state = check_random_state(0)
 
-    X = random_state.randn(5, 2)
+    X = random_state.randn(5, 2).astype(dtype)
     kwargs = dict(learning_rate=200.0, init=init)
     tsne = TSNE(**{k: v for k, v in kwargs.items() if v is not None})
 
@@ -1141,12 +1168,13 @@ def test_tsne_init_futurewarning(init):
 
 # TODO: Remove in 1.2
 @pytest.mark.parametrize("learning_rate", [None, 200.0])
-def test_tsne_learning_rate_futurewarning(learning_rate):
+@pytest.mark.parametrize("dtype", DTYPES)
+def test_tsne_learning_rate_futurewarning(learning_rate, dtype):
     """Make sure that a FutureWarning is only raised when the learning rate
     is not specified"""
     random_state = check_random_state(0)
 
-    X = random_state.randn(5, 2)
+    X = random_state.randn(5, 2).astype(dtype)
     kwargs = dict(learning_rate=learning_rate, init="random")
     tsne = TSNE(**{k: v for k, v in kwargs.items() if v is not None})
 
@@ -1198,16 +1226,17 @@ def test_tsne_n_jobs(method):
     assert_allclose(X_tr_ref, X_tr)
 
 
-@pytest.mark.filterwarnings("ignore:The PCA initialization in TSNE will change")
 # FIXME: remove in 1.3 after deprecation of `square_distances`
-def test_tsne_deprecation_square_distances():
+@pytest.mark.filterwarnings("ignore:The PCA initialization in TSNE will change")
+@pytest.mark.parametrize("dtype", DTYPES)
+def test_tsne_deprecation_square_distances(dtype):
     """Check that we raise a warning regarding the removal of
     `square_distances`.
 
     Also check the parameters do not have any effect.
     """
     random_state = check_random_state(0)
-    X = random_state.randn(30, 10)
+    X = random_state.randn(30, 10).astype(dtype)
     tsne = TSNE(
         n_components=2,
         init="pca",

From 85409ceebb62dca9348e159d22d72a81b9252527 Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Thu, 17 Mar 2022 17:26:13 +0100
Subject: [PATCH 2/9] TST Use global_dtype

---
 sklearn/manifold/tests/test_t_sne.py | 120 +++++++++++----------------
 1 file changed, 48 insertions(+), 72 deletions(-)

diff --git a/sklearn/manifold/tests/test_t_sne.py b/sklearn/manifold/tests/test_t_sne.py
index de3e598517d20..fa14e1e18b661 100644
--- a/sklearn/manifold/tests/test_t_sne.py
+++ b/sklearn/manifold/tests/test_t_sne.py
@@ -47,8 +47,6 @@
     "ignore:The PCA initialization in TSNE will change to have the standard deviation",
 )
 
-DTYPES = (np.float64, np.float32)
-
 
 def test_gradient_descent_stops():
     # Test stopping conditions of gradient descent.
@@ -237,8 +235,7 @@ def test_binary_perplexity_stability():
             assert_array_almost_equal(P1, last_P1, decimal=4)
 
 
-@pytest.mark.parametrize("dtype", DTYPES)
-def test_gradient(dtype):
+def test_gradient(global_dtype):
     # Test gradient of Kullback-Leibler divergence.
     random_state = check_random_state(0)
 
@@ -247,10 +244,10 @@ def test_gradient(dtype):
     n_components = 2
     alpha = 1.0
 
-    distances = random_state.randn(n_samples, n_features).astype(dtype)
+    distances = random_state.randn(n_samples, n_features).astype(global_dtype)
     distances = np.abs(distances.dot(distances.T))
     np.fill_diagonal(distances, 0.0)
-    X_embedded = random_state.randn(n_samples, n_components).astype(dtype)
+    X_embedded = random_state.randn(n_samples, n_components).astype(global_dtype)
 
     P = _joint_probabilities(distances, desired_perplexity=25.0, verbose=0)
 
@@ -263,13 +260,12 @@ def grad(params):
     assert_almost_equal(check_grad(fun, grad, X_embedded.ravel()), 0.0, decimal=5)
 
 
-@pytest.mark.parametrize("dtype", DTYPES)
-def test_trustworthiness(dtype):
+def test_trustworthiness(global_dtype):
     # Test trustworthiness score.
     random_state = check_random_state(0)
 
     # Affine transformation
-    X = random_state.randn(100, 2).astype(dtype)
+    X = random_state.randn(100, 2).astype(global_dtype)
     assert trustworthiness(X, 5.0 + X / 10.0) == 1.0
 
     # Randomly shuffled
@@ -286,12 +282,11 @@ def test_trustworthiness(dtype):
 
 @pytest.mark.parametrize("method", ["exact", "barnes_hut"])
 @pytest.mark.parametrize("init", ("random", "pca"))
-@pytest.mark.parametrize("dtype", DTYPES)
-def test_preserve_trustworthiness_approximately(method, init, dtype):
+def test_preserve_trustworthiness_approximately(method, init, global_dtype):
     # Nearest neighbors should be preserved approximately.
     random_state = check_random_state(0)
     n_components = 2
-    X = random_state.randn(50, n_components).astype(dtype)
+    X = random_state.randn(50, n_components).astype(global_dtype)
     tsne = TSNE(
         n_components=n_components,
         init=init,
@@ -305,12 +300,11 @@ def test_preserve_trustworthiness_approximately(method, init, dtype):
     assert t > 0.85
 
 
-@pytest.mark.parametrize("dtype", DTYPES)
-def test_optimization_minimizes_kl_divergence(dtype):
+def test_optimization_minimizes_kl_divergence(global_dtype):
     """t-SNE should give a lower KL divergence with more iterations."""
     random_state = check_random_state(0)
     X, _ = make_blobs(n_features=3, random_state=random_state)
-    X = X.astype(dtype)
+    X = X.astype(global_dtype)
     kl_divergences = []
     for n_iter in [250, 300, 350]:
         tsne = TSNE(
@@ -328,11 +322,10 @@ def test_optimization_minimizes_kl_divergence(dtype):
 
 
 @pytest.mark.parametrize("method", ["exact", "barnes_hut"])
-@pytest.mark.parametrize("dtype", DTYPES)
-def test_fit_csr_matrix(method, dtype):
+def test_fit_csr_matrix(method, global_dtype):
     # X can be a sparse matrix.
     rng = check_random_state(0)
-    X = rng.randn(50, 2).astype(dtype)
+    X = rng.randn(50, 2).astype(global_dtype)
     X[(rng.randint(0, 50, 25), rng.randint(0, 2, 25))] = 0.0
     X_csr = sp.csr_matrix(X)
     tsne = TSNE(
@@ -370,12 +363,11 @@ def test_preserve_trustworthiness_approximately_with_precomputed_distances():
         assert t > 0.95
 
 
-@pytest.mark.parametrize("dtype", DTYPES)
-def test_trustworthiness_not_euclidean_metric(dtype):
+def test_trustworthiness_not_euclidean_metric(global_dtype):
     # Test trustworthiness with a metric different from 'euclidean' and
     # 'precomputed'
     random_state = check_random_state(0)
-    X = random_state.randn(100, 2).astype(dtype)
+    X = random_state.randn(100, 2).astype(global_dtype)
     assert trustworthiness(X, X, metric="cosine") == trustworthiness(
         pairwise_distances(X, metric="cosine"), X, metric="precomputed"
     )
@@ -450,16 +442,15 @@ def test_high_perplexity_precomputed_sparse_distances():
 
 
 @ignore_warnings(category=EfficiencyWarning)
-@pytest.mark.parametrize("dtype", DTYPES)
-def test_sparse_precomputed_distance(dtype):
+def test_sparse_precomputed_distance(global_dtype):
     """Make sure that TSNE works identically for sparse and dense matrix"""
     random_state = check_random_state(0)
     X = random_state.randn(100, 2)
 
     D_sparse = kneighbors_graph(
         X, n_neighbors=100, mode="distance", include_self=True
-    ).astype(dtype)
-    D = pairwise_distances(X).astype(dtype)
+    ).astype(global_dtype)
+    D = pairwise_distances(X).astype(global_dtype)
     assert sp.issparse(D_sparse)
     assert_almost_equal(D_sparse.A, D)
 
@@ -496,12 +487,11 @@ def test_init_not_available():
         tsne.fit_transform(np.array([[0.0], [1.0]]))
 
 
-@pytest.mark.parametrize("dtype", DTYPES)
-def test_init_ndarray(dtype):
+def test_init_ndarray(global_dtype):
     # Initialize TSNE with ndarray and test fit
-    tsne = TSNE(init=np.zeros((100, 2), dtype=dtype), learning_rate="auto")
-    X_embedded = tsne.fit_transform(np.ones((100, 5), dtype=dtype))
-    assert_array_equal(np.zeros((100, 2), dtype=dtype), X_embedded)
+    tsne = TSNE(init=np.zeros((100, 2), dtype=global_dtype), learning_rate="auto")
+    X_embedded = tsne.fit_transform(np.ones((100, 5), dtype=global_dtype))
+    assert_array_equal(np.zeros((100, 2), dtype=global_dtype), X_embedded)
 
 
 def test_init_ndarray_precomputed():
@@ -574,13 +564,12 @@ def test_n_components_range():
         tsne.fit_transform(np.array([[0.0], [1.0]]))
 
 
-@pytest.mark.parametrize("dtype", DTYPES)
-def test_early_exaggeration_used(dtype):
+def test_early_exaggeration_used(global_dtype):
     # check that the ``early_exaggeration`` parameter has an effect
     random_state = check_random_state(0)
     n_components = 2
     methods = ["exact", "barnes_hut"]
-    X = random_state.randn(25, n_components).astype(dtype)
+    X = random_state.randn(25, n_components).astype(global_dtype)
     for method in methods:
         tsne = TSNE(
             n_components=n_components,
@@ -608,13 +597,12 @@ def test_early_exaggeration_used(dtype):
         assert not np.allclose(X_embedded1, X_embedded2)
 
 
-@pytest.mark.parametrize("dtype", DTYPES)
-def test_n_iter_used(dtype):
+def test_n_iter_used(global_dtype):
     # check that the ``n_iter`` parameter has an effect
     random_state = check_random_state(0)
     n_components = 2
     methods = ["exact", "barnes_hut"]
-    X = random_state.randn(25, n_components).astype(dtype)
+    X = random_state.randn(25, n_components).astype(global_dtype)
     for method in methods:
         for n_iter in [251, 500]:
             tsne = TSNE(
@@ -779,12 +767,10 @@ def test_reduction_to_one_component():
 
 
 @pytest.mark.parametrize("method", ["barnes_hut", "exact"])
-@pytest.mark.parametrize("dtype", DTYPES)
-def test_64bit(method, dtype):
-    # Ensure 64bit arrays are handled correctly.
+def test_64bit(method):
     random_state = check_random_state(0)
 
-    X = random_state.randn(10, 2).astype(dtype, copy=False)
+    X = random_state.randn(10, 2).astype(np.float64, copy=False)
     tsne = TSNE(
         n_components=2,
         perplexity=2,
@@ -825,8 +811,7 @@ def test_kl_divergence_not_nan(method):
     assert not np.isnan(tsne.kl_divergence_)
 
 
-@pytest.mark.parametrize("dtype", DTYPES)
-def test_barnes_hut_angle(dtype):
+def test_barnes_hut_angle(global_dtype):
     # When Barnes-Hut's angle=0 this corresponds to the exact method.
     angle = 0.0
     perplexity = 10
@@ -836,9 +821,9 @@ def test_barnes_hut_angle(dtype):
         degrees_of_freedom = float(n_components - 1.0)
 
         random_state = check_random_state(0)
-        data = random_state.randn(n_samples, n_features).astype(dtype)
+        data = random_state.randn(n_samples, n_features).astype(global_dtype)
         distances = pairwise_distances(data)
-        params = random_state.randn(n_samples, n_components).astype(dtype)
+        params = random_state.randn(n_samples, n_components).astype(global_dtype)
         P = _joint_probabilities(distances, perplexity, verbose=0)
         kl_exact, grad_exact = _kl_divergence(
             params, P, degrees_of_freedom, n_samples, n_components
@@ -869,11 +854,10 @@ def test_barnes_hut_angle(dtype):
 
 
 @skip_if_32bit
-@pytest.mark.parametrize("dtype", DTYPES)
-def test_n_iter_without_progress(dtype):
+def test_n_iter_without_progress(global_dtype):
     # Use a dummy negative n_iter_without_progress and check output on stdout
     random_state = check_random_state(0)
-    X = random_state.randn(100, 10).astype(dtype)
+    X = random_state.randn(100, 10).astype(global_dtype)
     for method in ["barnes_hut", "exact"]:
         tsne = TSNE(
             n_iter_without_progress=-1,
@@ -902,11 +886,10 @@ def test_n_iter_without_progress(dtype):
 
 @pytest.mark.filterwarnings("ignore:The default learning rate in TSNE")
 @pytest.mark.filterwarnings("ignore:The default initialization in TSNE")
-@pytest.mark.parametrize("dtype", DTYPES)
-def test_min_grad_norm(dtype):
+def test_min_grad_norm(global_dtype):
     # Make sure that the parameter min_grad_norm is used correctly
     random_state = check_random_state(0)
-    X = random_state.randn(100, 2).astype(dtype)
+    X = random_state.randn(100, 2).astype(global_dtype)
     min_grad_norm = 0.002
     tsne = TSNE(min_grad_norm=min_grad_norm, verbose=2, random_state=0, method="exact")
 
@@ -948,11 +931,10 @@ def test_min_grad_norm(dtype):
 
 @pytest.mark.filterwarnings("ignore:The default learning rate in TSNE")
 @pytest.mark.filterwarnings("ignore:The default initialization in TSNE")
-@pytest.mark.parametrize("dtype", DTYPES)
-def test_accessible_kl_divergence(dtype):
+def test_accessible_kl_divergence(global_dtype):
     # Ensures that the accessible kl_divergence matches the computed value
     random_state = check_random_state(0)
-    X = random_state.randn(50, 2).astype(dtype)
+    X = random_state.randn(50, 2).astype(global_dtype)
     tsne = TSNE(
         n_iter_without_progress=2, verbose=2, random_state=0, method="exact", n_iter=500
     )
@@ -1032,13 +1014,12 @@ def assert_uniform_grid(Y, try_name=None):
     assert largest_to_mean < 2, try_name
 
 
-@pytest.mark.parametrize("dtype", DTYPES)
-def test_bh_match_exact(dtype):
+def test_bh_match_exact(global_dtype):
     # check that the ``barnes_hut`` method match the exact one when
     # ``angle = 0`` and ``perplexity > n_samples / 3``
     random_state = check_random_state(0)
     n_features = 10
-    X = random_state.randn(30, n_features).astype(dtype)
+    X = random_state.randn(30, n_features).astype(global_dtype)
     X_embeddeds = {}
     n_iter = {}
     for method in ["exact", "barnes_hut"]:
@@ -1061,8 +1042,7 @@ def test_bh_match_exact(dtype):
     assert_allclose(X_embeddeds["exact"], X_embeddeds["barnes_hut"], rtol=1e-4)
 
 
-@pytest.mark.parametrize("dtype", DTYPES)
-def test_gradient_bh_multithread_match_sequential(dtype):
+def test_gradient_bh_multithread_match_sequential(global_dtype):
     # check that the bh gradient with different num_threads gives the same
     # results
 
@@ -1075,8 +1055,8 @@ def test_gradient_bh_multithread_match_sequential(dtype):
     perplexity = 5
 
     random_state = check_random_state(0)
-    data = random_state.randn(n_samples, n_features).astype(dtype)
-    params = random_state.randn(n_samples, n_components).astype(dtype)
+    data = random_state.randn(n_samples, n_features).astype(global_dtype)
+    params = random_state.randn(n_samples, n_components).astype(global_dtype)
 
     n_neighbors = n_samples - 1
     distances_csr = (
@@ -1113,13 +1093,12 @@ def test_gradient_bh_multithread_match_sequential(dtype):
         assert_allclose(grad_multithread, grad_multithread)
 
 
-@pytest.mark.parametrize("dtype", DTYPES)
-def test_tsne_with_different_distance_metrics(dtype):
+def test_tsne_with_different_distance_metrics(global_dtype):
     """Make sure that TSNE works for different distance metrics"""
     random_state = check_random_state(0)
     n_components_original = 3
     n_components_embedding = 2
-    X = random_state.randn(50, n_components_original).astype(dtype)
+    X = random_state.randn(50, n_components_original).astype(global_dtype)
     metrics = ["manhattan", "cosine"]
     dist_funcs = [manhattan_distances, cosine_distances]
     for metric, dist_func in zip(metrics, dist_funcs):
@@ -1144,13 +1123,12 @@ def test_tsne_with_different_distance_metrics(dtype):
 
 # TODO: Remove in 1.2
 @pytest.mark.parametrize("init", [None, "random", "pca"])
-@pytest.mark.parametrize("dtype", DTYPES)
-def test_tsne_init_futurewarning(init, dtype):
+def test_tsne_init_futurewarning(init, global_dtype):
     """Make sure that a FutureWarning is only raised when the
     init is not specified or is 'pca'."""
     random_state = check_random_state(0)
 
-    X = random_state.randn(5, 2).astype(dtype)
+    X = random_state.randn(5, 2).astype(global_dtype)
     kwargs = dict(learning_rate=200.0, init=init)
     tsne = TSNE(**{k: v for k, v in kwargs.items() if v is not None})
 
@@ -1168,13 +1146,12 @@ def test_tsne_init_futurewarning(init, dtype):
 
 # TODO: Remove in 1.2
 @pytest.mark.parametrize("learning_rate", [None, 200.0])
-@pytest.mark.parametrize("dtype", DTYPES)
-def test_tsne_learning_rate_futurewarning(learning_rate, dtype):
+def test_tsne_learning_rate_futurewarning(learning_rate, global_dtype):
     """Make sure that a FutureWarning is only raised when the learning rate
     is not specified"""
     random_state = check_random_state(0)
 
-    X = random_state.randn(5, 2).astype(dtype)
+    X = random_state.randn(5, 2).astype(global_dtype)
     kwargs = dict(learning_rate=learning_rate, init="random")
     tsne = TSNE(**{k: v for k, v in kwargs.items() if v is not None})
 
@@ -1259,15 +1236,14 @@ def test_tsne_with_mahalanobis_distance():
 
 # FIXME: remove in 1.3 after deprecation of `square_distances`
 @pytest.mark.filterwarnings("ignore:The PCA initialization in TSNE will change")
-@pytest.mark.parametrize("dtype", DTYPES)
-def test_tsne_deprecation_square_distances(dtype):
+def test_tsne_deprecation_square_distances(global_dtype):
     """Check that we raise a warning regarding the removal of
     `square_distances`.
 
     Also check the parameters do not have any effect.
     """
     random_state = check_random_state(0)
-    X = random_state.randn(30, 10).astype(dtype)
+    X = random_state.randn(30, 10).astype(global_dtype)
     tsne = TSNE(
         n_components=2,
         init="pca",

From 69a7e7da88b8ed20b83fe18412ab094a48f64fb8 Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Thu, 17 Mar 2022 18:02:28 +0100
Subject: [PATCH 3/9] Address review comments

Co-authored-by: Olivier Grisel <olivier.grisel@ensta.org>
---
 sklearn/manifold/tests/test_t_sne.py | 119 ++++++++++++++-------------
 1 file changed, 64 insertions(+), 55 deletions(-)

diff --git a/sklearn/manifold/tests/test_t_sne.py b/sklearn/manifold/tests/test_t_sne.py
index fa14e1e18b661..9cff14838a359 100644
--- a/sklearn/manifold/tests/test_t_sne.py
+++ b/sklearn/manifold/tests/test_t_sne.py
@@ -1,7 +1,6 @@
 import sys
 from io import StringIO
 import numpy as np
-from numpy.testing import assert_allclose
 import scipy.sparse as sp
 import pytest
 
@@ -12,6 +11,7 @@
 from sklearn.utils._testing import assert_almost_equal
 from sklearn.utils._testing import assert_array_equal
 from sklearn.utils._testing import assert_array_almost_equal
+from sklearn.utils._testing import assert_allclose
 from sklearn.utils._testing import skip_if_32bit
 from sklearn.utils import check_random_state
 from sklearn.manifold._t_sne import _joint_probabilities
@@ -266,7 +266,7 @@ def test_trustworthiness(global_dtype):
 
     # Affine transformation
     X = random_state.randn(100, 2).astype(global_dtype)
-    assert trustworthiness(X, 5.0 + X / 10.0) == 1.0
+    assert trustworthiness(X, 5.0 + X / 10.0) == pytest.approx(1.0)
 
     # Randomly shuffled
     X = np.arange(100).reshape(-1, 1)
@@ -296,6 +296,8 @@ def test_preserve_trustworthiness_approximately(method, init, global_dtype):
         learning_rate="auto",
     )
     X_embedded = tsne.fit_transform(X)
+    # TNSE.fit_transform does not preserves dtype in this case
+    assert X_embedded.dtype == np.float32
     t = trustworthiness(X, X_embedded, n_neighbors=1)
     assert t > 0.85
 
@@ -491,6 +493,10 @@ def test_init_ndarray(global_dtype):
     # Initialize TSNE with ndarray and test fit
     tsne = TSNE(init=np.zeros((100, 2), dtype=global_dtype), learning_rate="auto")
     X_embedded = tsne.fit_transform(np.ones((100, 5), dtype=global_dtype))
+
+    # TNSE.fit_transform _does_ preserves dtype in this case
+    # (initialisation with a custom array)
+    assert X_embedded.dtype == global_dtype
     assert_array_equal(np.zeros((100, 2), dtype=global_dtype), X_embedded)
 
 
@@ -564,37 +570,36 @@ def test_n_components_range():
         tsne.fit_transform(np.array([[0.0], [1.0]]))
 
 
-def test_early_exaggeration_used(global_dtype):
+@pytest.mark.parametrize("method", ["exact", "barnes_hut"])
+def test_early_exaggeration_used(method, global_dtype):
     # check that the ``early_exaggeration`` parameter has an effect
     random_state = check_random_state(0)
     n_components = 2
-    methods = ["exact", "barnes_hut"]
     X = random_state.randn(25, n_components).astype(global_dtype)
-    for method in methods:
-        tsne = TSNE(
-            n_components=n_components,
-            perplexity=1,
-            learning_rate=100.0,
-            init="pca",
-            random_state=0,
-            method=method,
-            early_exaggeration=1.0,
-            n_iter=250,
-        )
-        X_embedded1 = tsne.fit_transform(X)
-        tsne = TSNE(
-            n_components=n_components,
-            perplexity=1,
-            learning_rate=100.0,
-            init="pca",
-            random_state=0,
-            method=method,
-            early_exaggeration=10.0,
-            n_iter=250,
-        )
-        X_embedded2 = tsne.fit_transform(X)
+    tsne = TSNE(
+        n_components=n_components,
+        perplexity=1,
+        learning_rate=100.0,
+        init="pca",
+        random_state=0,
+        method=method,
+        early_exaggeration=1.0,
+        n_iter=250,
+    )
+    X_embedded1 = tsne.fit_transform(X)
+    tsne = TSNE(
+        n_components=n_components,
+        perplexity=1,
+        learning_rate=100.0,
+        init="pca",
+        random_state=0,
+        method=method,
+        early_exaggeration=10.0,
+        n_iter=250,
+    )
+    X_embedded2 = tsne.fit_transform(X)
 
-        assert not np.allclose(X_embedded1, X_embedded2)
+    assert not np.allclose(X_embedded1, X_embedded2)
 
 
 def test_n_iter_used(global_dtype):
@@ -1093,42 +1098,46 @@ def test_gradient_bh_multithread_match_sequential(global_dtype):
         assert_allclose(grad_multithread, grad_multithread)
 
 
-def test_tsne_with_different_distance_metrics(global_dtype):
+@pytest.mark.parametrize("metric", ["manhattan", "cosine"])
+@pytest.mark.parametrize("dist_func", [manhattan_distances, cosine_distances])
+def test_tsne_with_different_distance_metrics(global_dtype, metric, dist_func):
     """Make sure that TSNE works for different distance metrics"""
     random_state = check_random_state(0)
     n_components_original = 3
     n_components_embedding = 2
     X = random_state.randn(50, n_components_original).astype(global_dtype)
-    metrics = ["manhattan", "cosine"]
-    dist_funcs = [manhattan_distances, cosine_distances]
-    for metric, dist_func in zip(metrics, dist_funcs):
-        X_transformed_tsne = TSNE(
-            metric=metric,
-            n_components=n_components_embedding,
-            random_state=0,
-            n_iter=300,
-            init="random",
-            learning_rate="auto",
-        ).fit_transform(X)
-        X_transformed_tsne_precomputed = TSNE(
-            metric="precomputed",
-            n_components=n_components_embedding,
-            random_state=0,
-            n_iter=300,
-            init="random",
-            learning_rate="auto",
-        ).fit_transform(dist_func(X))
-        assert_array_equal(X_transformed_tsne, X_transformed_tsne_precomputed)
+    X_transformed_tsne = TSNE(
+        metric=metric,
+        n_components=n_components_embedding,
+        random_state=0,
+        n_iter=300,
+        init="random",
+        learning_rate="auto",
+    ).fit_transform(X)
+    X_transformed_tsne_precomputed = TSNE(
+        metric="precomputed",
+        n_components=n_components_embedding,
+        random_state=0,
+        n_iter=300,
+        init="random",
+        learning_rate="auto",
+    ).fit_transform(dist_func(X))
+
+    # TSNE does not preserve dtype in those cases
+    assert (
+        X_transformed_tsne.dtype == X_transformed_tsne_precomputed.dtype == np.float32
+    )
+    assert_array_equal(X_transformed_tsne, X_transformed_tsne_precomputed)
 
 
 # TODO: Remove in 1.2
 @pytest.mark.parametrize("init", [None, "random", "pca"])
-def test_tsne_init_futurewarning(init, global_dtype):
+def test_tsne_init_futurewarning(init):
     """Make sure that a FutureWarning is only raised when the
     init is not specified or is 'pca'."""
     random_state = check_random_state(0)
 
-    X = random_state.randn(5, 2).astype(global_dtype)
+    X = random_state.randn(5, 2)
     kwargs = dict(learning_rate=200.0, init=init)
     tsne = TSNE(**{k: v for k, v in kwargs.items() if v is not None})
 
@@ -1146,12 +1155,12 @@ def test_tsne_init_futurewarning(init, global_dtype):
 
 # TODO: Remove in 1.2
 @pytest.mark.parametrize("learning_rate", [None, 200.0])
-def test_tsne_learning_rate_futurewarning(learning_rate, global_dtype):
+def test_tsne_learning_rate_futurewarning(learning_rate):
     """Make sure that a FutureWarning is only raised when the learning rate
     is not specified"""
     random_state = check_random_state(0)
 
-    X = random_state.randn(5, 2).astype(global_dtype)
+    X = random_state.randn(5, 2)
     kwargs = dict(learning_rate=learning_rate, init="random")
     tsne = TSNE(**{k: v for k, v in kwargs.items() if v is not None})
 
@@ -1236,14 +1245,14 @@ def test_tsne_with_mahalanobis_distance():
 
 # FIXME: remove in 1.3 after deprecation of `square_distances`
 @pytest.mark.filterwarnings("ignore:The PCA initialization in TSNE will change")
-def test_tsne_deprecation_square_distances(global_dtype):
+def test_tsne_deprecation_square_distances():
     """Check that we raise a warning regarding the removal of
     `square_distances`.
 
     Also check the parameters do not have any effect.
     """
     random_state = check_random_state(0)
-    X = random_state.randn(30, 10).astype(global_dtype)
+    X = random_state.randn(30, 10)
     tsne = TSNE(
         n_components=2,
         init="pca",

From 8e046aae61b78519be08c90ac7765be7ec22a2fe Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Thu, 17 Mar 2022 18:12:35 +0100
Subject: [PATCH 4/9] TST Fix parametrisation

---
 sklearn/manifold/tests/test_t_sne.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/sklearn/manifold/tests/test_t_sne.py b/sklearn/manifold/tests/test_t_sne.py
index 9cff14838a359..2f393c23a8dee 100644
--- a/sklearn/manifold/tests/test_t_sne.py
+++ b/sklearn/manifold/tests/test_t_sne.py
@@ -1098,8 +1098,10 @@ def test_gradient_bh_multithread_match_sequential(global_dtype):
         assert_allclose(grad_multithread, grad_multithread)
 
 
-@pytest.mark.parametrize("metric", ["manhattan", "cosine"])
-@pytest.mark.parametrize("dist_func", [manhattan_distances, cosine_distances])
+@pytest.mark.parametrize(
+    "metric, dist_func",
+    [("manhattan", manhattan_distances), ("cosine", cosine_distances)],
+)
 def test_tsne_with_different_distance_metrics(global_dtype, metric, dist_func):
     """Make sure that TSNE works for different distance metrics"""
     random_state = check_random_state(0)
@@ -1110,7 +1112,7 @@ def test_tsne_with_different_distance_metrics(global_dtype, metric, dist_func):
         metric=metric,
         n_components=n_components_embedding,
         random_state=0,
-        n_iter=300,
+        n_iter=1000,
         init="random",
         learning_rate="auto",
     ).fit_transform(X)
@@ -1118,7 +1120,7 @@ def test_tsne_with_different_distance_metrics(global_dtype, metric, dist_func):
         metric="precomputed",
         n_components=n_components_embedding,
         random_state=0,
-        n_iter=300,
+        n_iter=1000,
         init="random",
         learning_rate="auto",
     ).fit_transform(dist_func(X))

From 1d816bebb77e2446bca41323998e561151955138 Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Fri, 10 Jun 2022 13:54:56 +0200
Subject: [PATCH 5/9] Review comments

Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/manifold/_t_sne.py           |   5 ++
 sklearn/manifold/tests/test_t_sne.py | 110 +++++++++++++--------------
 2 files changed, 60 insertions(+), 55 deletions(-)

diff --git a/sklearn/manifold/_t_sne.py b/sklearn/manifold/_t_sne.py
index 54ebc1e66d4ae..0c5697d5a2f4f 100644
--- a/sklearn/manifold/_t_sne.py
+++ b/sklearn/manifold/_t_sne.py
@@ -700,6 +700,11 @@ class TSNE(BaseEstimator):
     LocallyLinearEmbedding : Manifold learning using Locally Linear Embedding.
     SpectralEmbedding : Spectral embedding for non-linear dimensionality.
 
+    Notes
+    -----
+    TSNE only preserves inputs' dtypes when `init` is provided as a float64 numpy array.
+    In all the other cases, the outputs' dtype is float32.
+
     References
     ----------
 
diff --git a/sklearn/manifold/tests/test_t_sne.py b/sklearn/manifold/tests/test_t_sne.py
index 2f393c23a8dee..15cdd606789ca 100644
--- a/sklearn/manifold/tests/test_t_sne.py
+++ b/sklearn/manifold/tests/test_t_sne.py
@@ -9,7 +9,6 @@
 from sklearn.exceptions import EfficiencyWarning
 from sklearn.utils._testing import ignore_warnings
 from sklearn.utils._testing import assert_almost_equal
-from sklearn.utils._testing import assert_array_equal
 from sklearn.utils._testing import assert_array_almost_equal
 from sklearn.utils._testing import assert_allclose
 from sklearn.utils._testing import skip_if_32bit
@@ -235,7 +234,7 @@ def test_binary_perplexity_stability():
             assert_array_almost_equal(P1, last_P1, decimal=4)
 
 
-def test_gradient(global_dtype):
+def test_gradient():
     # Test gradient of Kullback-Leibler divergence.
     random_state = check_random_state(0)
 
@@ -244,10 +243,10 @@ def test_gradient(global_dtype):
     n_components = 2
     alpha = 1.0
 
-    distances = random_state.randn(n_samples, n_features).astype(global_dtype)
+    distances = random_state.randn(n_samples, n_features)
     distances = np.abs(distances.dot(distances.T))
     np.fill_diagonal(distances, 0.0)
-    X_embedded = random_state.randn(n_samples, n_components).astype(global_dtype)
+    X_embedded = random_state.randn(n_samples, n_components)
 
     P = _joint_probabilities(distances, desired_perplexity=25.0, verbose=0)
 
@@ -257,7 +256,7 @@ def fun(params):
     def grad(params):
         return _kl_divergence(params, P, alpha, n_samples, n_components)[1]
 
-    assert_almost_equal(check_grad(fun, grad, X_embedded.ravel()), 0.0, decimal=5)
+    assert_allclose(check_grad(fun, grad, X_embedded.ravel()), 0.0, decimal=5)
 
 
 def test_trustworthiness(global_dtype):
@@ -269,15 +268,15 @@ def test_trustworthiness(global_dtype):
     assert trustworthiness(X, 5.0 + X / 10.0) == pytest.approx(1.0)
 
     # Randomly shuffled
-    X = np.arange(100).reshape(-1, 1)
+    X = np.arange(100).reshape(-1, 1).astype(global_dtype)
     X_embedded = X.copy()
     random_state.shuffle(X_embedded)
     assert trustworthiness(X, X_embedded) < 0.6
 
     # Completely different
-    X = np.arange(5).reshape(-1, 1)
-    X_embedded = np.array([[0], [2], [4], [1], [3]])
-    assert_almost_equal(trustworthiness(X, X_embedded, n_neighbors=1), 0.2)
+    X = np.arange(5).reshape(-1, 1).astype(global_dtype)
+    X_embedded = np.array([[0], [2], [4], [1], [3]], dtype=global_dtype)
+    assert trustworthiness(X, X_embedded, n_neighbors=1) == pytest.approx(0.2)
 
 
 @pytest.mark.parametrize("method", ["exact", "barnes_hut"])
@@ -296,7 +295,7 @@ def test_preserve_trustworthiness_approximately(method, init, global_dtype):
         learning_rate="auto",
     )
     X_embedded = tsne.fit_transform(X)
-    # TNSE.fit_transform does not preserves dtype in this case
+    # TNSE.fit_transform does not preserve dtype for the default initialisation
     assert X_embedded.dtype == np.float32
     t = trustworthiness(X, X_embedded, n_neighbors=1)
     assert t > 0.85
@@ -340,7 +339,9 @@ def test_fit_csr_matrix(method, global_dtype):
         n_iter=750,
     )
     X_embedded = tsne.fit_transform(X_csr)
-    assert_allclose(trustworthiness(X_csr, X_embedded, n_neighbors=1), 1.0, rtol=1.1e-1)
+    assert trustworthiness(X_csr, X_embedded, n_neighbors=1) == pytest.approx(
+        1.0, rel=1.1e-1
+    )
 
 
 def test_preserve_trustworthiness_approximately_with_precomputed_distances():
@@ -454,7 +455,7 @@ def test_sparse_precomputed_distance(global_dtype):
     ).astype(global_dtype)
     D = pairwise_distances(X).astype(global_dtype)
     assert sp.issparse(D_sparse)
-    assert_almost_equal(D_sparse.A, D)
+    assert_allclose(D_sparse.A, D)
 
     tsne = TSNE(
         metric="precomputed", random_state=0, init="random", learning_rate="auto"
@@ -463,7 +464,7 @@ def test_sparse_precomputed_distance(global_dtype):
 
     for fmt in ["csr", "lil"]:
         Xt_sparse = tsne.fit_transform(D_sparse.asformat(fmt))
-        assert_almost_equal(Xt_dense, Xt_sparse)
+        assert_allclose(Xt_dense, Xt_sparse)
 
 
 @pytest.mark.filterwarnings("ignore:The default learning rate in TSNE")
@@ -497,7 +498,7 @@ def test_init_ndarray(global_dtype):
     # TNSE.fit_transform _does_ preserves dtype in this case
     # (initialisation with a custom array)
     assert X_embedded.dtype == global_dtype
-    assert_array_equal(np.zeros((100, 2), dtype=global_dtype), X_embedded)
+    assert_allclose(np.zeros((100, 2), dtype=global_dtype), X_embedded)
 
 
 def test_init_ndarray_precomputed():
@@ -602,27 +603,26 @@ def test_early_exaggeration_used(method, global_dtype):
     assert not np.allclose(X_embedded1, X_embedded2)
 
 
-def test_n_iter_used(global_dtype):
+@pytest.mark.parametrize("method", ["exact", "barnes_hut"])
+@pytest.mark.parametrize("n_iter", [251, 500])
+def test_n_iter_used(method, n_iter, global_dtype):
     # check that the ``n_iter`` parameter has an effect
     random_state = check_random_state(0)
     n_components = 2
-    methods = ["exact", "barnes_hut"]
     X = random_state.randn(25, n_components).astype(global_dtype)
-    for method in methods:
-        for n_iter in [251, 500]:
-            tsne = TSNE(
-                n_components=n_components,
-                perplexity=1,
-                learning_rate=0.5,
-                init="random",
-                random_state=0,
-                method=method,
-                early_exaggeration=1.0,
-                n_iter=n_iter,
-            )
-            tsne.fit_transform(X)
+    tsne = TSNE(
+        n_components=n_components,
+        perplexity=1,
+        learning_rate=0.5,
+        init="random",
+        random_state=0,
+        method=method,
+        early_exaggeration=1.0,
+        n_iter=n_iter,
+    )
+    tsne.fit_transform(X)
 
-            assert tsne.n_iter_ == n_iter - 1
+    assert tsne.n_iter_ == n_iter - 1
 
 
 def test_answer_gradient_two_points():
@@ -859,34 +859,34 @@ def test_barnes_hut_angle(global_dtype):
 
 
 @skip_if_32bit
-def test_n_iter_without_progress(global_dtype):
+@pytest.mark.parametrize("method", ["barnes_hut", "exact"])
+def test_n_iter_without_progress(method, global_dtype):
     # Use a dummy negative n_iter_without_progress and check output on stdout
     random_state = check_random_state(0)
     X = random_state.randn(100, 10).astype(global_dtype)
-    for method in ["barnes_hut", "exact"]:
-        tsne = TSNE(
-            n_iter_without_progress=-1,
-            verbose=2,
-            learning_rate=1e8,
-            random_state=0,
-            method=method,
-            n_iter=351,
-            init="random",
-        )
-        tsne._N_ITER_CHECK = 1
-        tsne._EXPLORATION_N_ITER = 0
+    tsne = TSNE(
+        n_iter_without_progress=-1,
+        verbose=2,
+        learning_rate=1e8,
+        random_state=0,
+        method=method,
+        n_iter=351,
+        init="random",
+    )
+    tsne._N_ITER_CHECK = 1
+    tsne._EXPLORATION_N_ITER = 0
 
-        old_stdout = sys.stdout
-        sys.stdout = StringIO()
-        try:
-            tsne.fit_transform(X)
-        finally:
-            out = sys.stdout.getvalue()
-            sys.stdout.close()
-            sys.stdout = old_stdout
+    old_stdout = sys.stdout
+    sys.stdout = StringIO()
+    try:
+        tsne.fit_transform(X)
+    finally:
+        out = sys.stdout.getvalue()
+        sys.stdout.close()
+        sys.stdout = old_stdout
 
-        # The output needs to contain the value of n_iter_without_progress
-        assert "did not make any progress during the last -1 episodes. Finished." in out
+    # The output needs to contain the value of n_iter_without_progress
+    assert "did not make any progress during the last -1 episodes. Finished." in out
 
 
 @pytest.mark.filterwarnings("ignore:The default learning rate in TSNE")
@@ -1125,11 +1125,11 @@ def test_tsne_with_different_distance_metrics(global_dtype, metric, dist_func):
         learning_rate="auto",
     ).fit_transform(dist_func(X))
 
-    # TSNE does not preserve dtype in those cases
+    # TSNE does not preserve dtype for random initialisation (see docstring)
     assert (
         X_transformed_tsne.dtype == X_transformed_tsne_precomputed.dtype == np.float32
     )
-    assert_array_equal(X_transformed_tsne, X_transformed_tsne_precomputed)
+    assert_allclose(X_transformed_tsne, X_transformed_tsne_precomputed)
 
 
 # TODO: Remove in 1.2

From 5fb1aeae62a0c397be5733402b13bcacf913a9d0 Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Fri, 10 Jun 2022 13:57:03 +0200
Subject: [PATCH 6/9] fixup! Review comments

---
 sklearn/manifold/tests/test_t_sne.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/manifold/tests/test_t_sne.py b/sklearn/manifold/tests/test_t_sne.py
index 15cdd606789ca..ad0929168316e 100644
--- a/sklearn/manifold/tests/test_t_sne.py
+++ b/sklearn/manifold/tests/test_t_sne.py
@@ -256,7 +256,7 @@ def fun(params):
     def grad(params):
         return _kl_divergence(params, P, alpha, n_samples, n_components)[1]
 
-    assert_allclose(check_grad(fun, grad, X_embedded.ravel()), 0.0, decimal=5)
+    assert_allclose(check_grad(fun, grad, X_embedded.ravel()), 0.0, atol=1e-5)
 
 
 def test_trustworthiness(global_dtype):

From 8850014d37e65cce64486fea99567a3d9c1ab755 Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Tue, 25 Oct 2022 14:26:23 +0200
Subject: [PATCH 7/9] DOC Adapt tolerance for test_sparse_precomputed_distance

---
 sklearn/manifold/tests/test_t_sne.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sklearn/manifold/tests/test_t_sne.py b/sklearn/manifold/tests/test_t_sne.py
index 339b4317385a8..c33dc176f1781 100644
--- a/sklearn/manifold/tests/test_t_sne.py
+++ b/sklearn/manifold/tests/test_t_sne.py
@@ -442,6 +442,7 @@ def test_high_perplexity_precomputed_sparse_distances():
 @ignore_warnings(category=EfficiencyWarning)
 def test_sparse_precomputed_distance(global_dtype):
     """Make sure that TSNE works identically for sparse and dense matrix"""
+    atol = 1e-5 if global_dtype == np.float32 else 0
     random_state = check_random_state(0)
     X = random_state.randn(100, 2)
 
@@ -459,7 +460,7 @@ def test_sparse_precomputed_distance(global_dtype):
 
     for fmt in ["csr", "lil"]:
         Xt_sparse = tsne.fit_transform(D_sparse.asformat(fmt))
-        assert_allclose(Xt_dense, Xt_sparse)
+        assert_allclose(Xt_dense, Xt_sparse, atol=atol)
 
 
 def test_non_positive_computed_distances():

From b9ec8ebff6665675c30344d9ee3545fed072f184 Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Fri, 2 Dec 2022 16:07:58 +0100
Subject: [PATCH 8/9] TST Remove test_init_not_available

The parameter validation now tests this.
---
 sklearn/manifold/tests/test_t_sne.py | 13 +------------
 1 file changed, 1 insertion(+), 12 deletions(-)

diff --git a/sklearn/manifold/tests/test_t_sne.py b/sklearn/manifold/tests/test_t_sne.py
index 09bebf3c05e33..6a0a31f1ca28d 100644
--- a/sklearn/manifold/tests/test_t_sne.py
+++ b/sklearn/manifold/tests/test_t_sne.py
@@ -451,7 +451,7 @@ def test_sparse_precomputed_distance(global_dtype):
     ).astype(global_dtype)
     D = pairwise_distances(X).astype(global_dtype)
     assert sp.issparse(D_sparse)
-    assert_allclose(D_sparse.A, D)
+    assert_allclose(D_sparse.A, D, atol=atol)
 
     tsne = TSNE(
         metric="precomputed", random_state=0, init="random", learning_rate="auto"
@@ -475,17 +475,6 @@ def metric(x, y):
         tsne.fit_transform(X)
 
 
-def test_init_not_available():
-    # 'init' must be 'pca', 'random', or numpy array.
-    tsne = TSNE(init="not available", perplexity=1)
-    m = (
-        "The 'init' parameter of TSNE must be a str among {'random', 'pca'}"
-        " or an instance of 'numpy.ndarray'"
-    )
-    with pytest.raises(ValueError, match=m):
-        tsne.fit_transform(np.array([[0.0], [1.0]]))
-
-
 def test_init_ndarray(global_dtype):
     # Initialize TSNE with ndarray and test fit
     tsne = TSNE(init=np.zeros((100, 2), dtype=global_dtype), learning_rate="auto")

From 0f6f2b8a60d10c82a9b2b20a8e0cecba90f184d3 Mon Sep 17 00:00:00 2001
From: Julien Jerphanion <git@jjerphan.xyz>
Date: Fri, 2 Dec 2022 17:03:43 +0100
Subject: [PATCH 9/9] TST Adapt atol for `test_sparse_precomputed_distance` on
 float64

---
 sklearn/manifold/tests/test_t_sne.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/manifold/tests/test_t_sne.py b/sklearn/manifold/tests/test_t_sne.py
index 6a0a31f1ca28d..f1461dede5200 100644
--- a/sklearn/manifold/tests/test_t_sne.py
+++ b/sklearn/manifold/tests/test_t_sne.py
@@ -442,7 +442,7 @@ def test_high_perplexity_precomputed_sparse_distances():
 @ignore_warnings(category=EfficiencyWarning)
 def test_sparse_precomputed_distance(global_dtype):
     """Make sure that TSNE works identically for sparse and dense matrix"""
-    atol = 1e-5 if global_dtype == np.float32 else 0
+    atol = 1e-5 if global_dtype == np.float32 else 1e-7
     random_state = check_random_state(0)
     X = random_state.randn(100, 2)