diff --git a/sklearn/manifold/_t_sne.py b/sklearn/manifold/_t_sne.py
index 8813ee7ae5de0..a43a43942b274 100644
--- a/sklearn/manifold/_t_sne.py
+++ b/sklearn/manifold/_t_sne.py
@@ -724,6 +724,11 @@ class TSNE(BaseEstimator):
     LocallyLinearEmbedding : Manifold learning using Locally Linear Embedding.
     SpectralEmbedding : Spectral embedding for non-linear dimensionality.
 
+    Notes
+    -----
+    TSNE only preserves inputs' dtypes when `init` is provided as a float64 numpy array.
+    In all the other cases, the outputs' dtype is float32.
+
     References
     ----------
 
diff --git a/sklearn/manifold/tests/test_t_sne.py b/sklearn/manifold/tests/test_t_sne.py
index 4b00c7b228969..f1461dede5200 100644
--- a/sklearn/manifold/tests/test_t_sne.py
+++ b/sklearn/manifold/tests/test_t_sne.py
@@ -1,7 +1,6 @@
 import sys
 from io import StringIO
 import numpy as np
-from numpy.testing import assert_allclose
 import scipy.sparse as sp
 import pytest
 
@@ -10,8 +9,8 @@
 from sklearn.exceptions import EfficiencyWarning
 from sklearn.utils._testing import ignore_warnings
 from sklearn.utils._testing import assert_almost_equal
-from sklearn.utils._testing import assert_array_equal
 from sklearn.utils._testing import assert_array_almost_equal
+from sklearn.utils._testing import assert_allclose
 from sklearn.utils._testing import skip_if_32bit
 from sklearn.utils import check_random_state
 from sklearn.manifold._t_sne import _joint_probabilities
@@ -134,6 +133,7 @@ def test_binary_search():
     # Test if the binary search finds Gaussians with desired perplexity.
     random_state = check_random_state(0)
     data = random_state.randn(50, 5)
+    # _binary_search_perplexity only support float32 inputs
     distances = pairwise_distances(data).astype(np.float32)
     desired_perplexity = 25.0
     P = _binary_search_perplexity(distances, desired_perplexity, verbose=0)
@@ -149,6 +149,7 @@ def test_binary_search_underflow():
     # A more challenging case than the one above, producing numeric
     # underflow in float precision (see issue #19471 and PR #19472).
     random_state = check_random_state(42)
+    # _binary_search_perplexity only support float32 inputs
     data = random_state.randn(1, 90).astype(np.float32) + 100
     desired_perplexity = 30.0
     P = _binary_search_perplexity(data, desired_perplexity, verbose=0)
@@ -163,6 +164,7 @@ def test_binary_search_neighbors():
     n_samples = 200
     desired_perplexity = 25.0
     random_state = check_random_state(0)
+    # _binary_search_perplexity only support float32 inputs
     data = random_state.randn(n_samples, 2).astype(np.float32, copy=False)
     distances = pairwise_distances(data)
     P1 = _binary_search_perplexity(distances, desired_perplexity, verbose=0)
@@ -210,6 +212,7 @@ def test_binary_perplexity_stability():
     data = random_state.randn(n_samples, 5)
     nn = NearestNeighbors().fit(data)
     distance_graph = nn.kneighbors_graph(n_neighbors=n_neighbors, mode="distance")
+    # _binary_search_perplexity only support float32 inputs
     distances = distance_graph.data.astype(np.float32, copy=False)
     distances = distances.reshape(n_samples, n_neighbors)
     last_P = None
@@ -236,10 +239,10 @@ def test_gradient():
     n_components = 2
     alpha = 1.0
 
-    distances = random_state.randn(n_samples, n_features).astype(np.float32)
+    distances = random_state.randn(n_samples, n_features)
     distances = np.abs(distances.dot(distances.T))
     np.fill_diagonal(distances, 0.0)
-    X_embedded = random_state.randn(n_samples, n_components).astype(np.float32)
+    X_embedded = random_state.randn(n_samples, n_components)
 
     P = _joint_probabilities(distances, desired_perplexity=25.0, verbose=0)
 
@@ -249,27 +252,27 @@ def fun(params):
     def grad(params):
         return _kl_divergence(params, P, alpha, n_samples, n_components)[1]
 
-    assert_almost_equal(check_grad(fun, grad, X_embedded.ravel()), 0.0, decimal=5)
+    assert_allclose(check_grad(fun, grad, X_embedded.ravel()), 0.0, atol=1e-5)
 
 
-def test_trustworthiness():
+def test_trustworthiness(global_dtype):
     # Test trustworthiness score.
     random_state = check_random_state(0)
 
     # Affine transformation
-    X = random_state.randn(100, 2)
-    assert trustworthiness(X, 5.0 + X / 10.0) == 1.0
+    X = random_state.randn(100, 2).astype(global_dtype)
+    assert trustworthiness(X, 5.0 + X / 10.0) == pytest.approx(1.0)
 
     # Randomly shuffled
-    X = np.arange(100).reshape(-1, 1)
+    X = np.arange(100).reshape(-1, 1).astype(global_dtype)
     X_embedded = X.copy()
     random_state.shuffle(X_embedded)
     assert trustworthiness(X, X_embedded) < 0.6
 
     # Completely different
-    X = np.arange(5).reshape(-1, 1)
-    X_embedded = np.array([[0], [2], [4], [1], [3]])
-    assert_almost_equal(trustworthiness(X, X_embedded, n_neighbors=1), 0.2)
+    X = np.arange(5).reshape(-1, 1).astype(global_dtype)
+    X_embedded = np.array([[0], [2], [4], [1], [3]], dtype=global_dtype)
+    assert trustworthiness(X, X_embedded, n_neighbors=1) == pytest.approx(0.2)
 
 
 def test_trustworthiness_n_neighbors_error():
@@ -290,11 +293,11 @@ def test_trustworthiness_n_neighbors_error():
 
 @pytest.mark.parametrize("method", ["exact", "barnes_hut"])
 @pytest.mark.parametrize("init", ("random", "pca"))
-def test_preserve_trustworthiness_approximately(method, init):
+def test_preserve_trustworthiness_approximately(method, init, global_dtype):
     # Nearest neighbors should be preserved approximately.
     random_state = check_random_state(0)
     n_components = 2
-    X = random_state.randn(50, n_components).astype(np.float32)
+    X = random_state.randn(50, n_components).astype(global_dtype)
     tsne = TSNE(
         n_components=n_components,
         init=init,
@@ -304,14 +307,17 @@ def test_preserve_trustworthiness_approximately(method, init):
         learning_rate="auto",
     )
     X_embedded = tsne.fit_transform(X)
+    # TNSE.fit_transform does not preserve dtype for the default initialisation
+    assert X_embedded.dtype == np.float32
     t = trustworthiness(X, X_embedded, n_neighbors=1)
     assert t > 0.85
 
 
-def test_optimization_minimizes_kl_divergence():
+def test_optimization_minimizes_kl_divergence(global_dtype):
     """t-SNE should give a lower KL divergence with more iterations."""
     random_state = check_random_state(0)
     X, _ = make_blobs(n_features=3, random_state=random_state)
+    X = X.astype(global_dtype)
     kl_divergences = []
     for n_iter in [250, 300, 350]:
         tsne = TSNE(
@@ -329,12 +335,12 @@ def test_optimization_minimizes_kl_divergence():
 
 
 @pytest.mark.parametrize("method", ["exact", "barnes_hut"])
-def test_fit_transform_csr_matrix(method):
+def test_fit_transform_csr_matrix(method, global_dtype):
     # TODO: compare results on dense and sparse data as proposed in:
     # https://github.com/scikit-learn/scikit-learn/pull/23585#discussion_r968388186
     # X can be a sparse matrix.
     rng = check_random_state(0)
-    X = rng.randn(50, 2)
+    X = rng.randn(50, 2).astype(global_dtype)
     X[(rng.randint(0, 50, 25), rng.randint(0, 2, 25))] = 0.0
     X_csr = sp.csr_matrix(X)
     tsne = TSNE(
@@ -347,7 +353,9 @@ def test_fit_transform_csr_matrix(method):
         n_iter=750,
     )
     X_embedded = tsne.fit_transform(X_csr)
-    assert_allclose(trustworthiness(X_csr, X_embedded, n_neighbors=1), 1.0, rtol=1.1e-1)
+    assert trustworthiness(X_csr, X_embedded, n_neighbors=1) == pytest.approx(
+        1.0, rel=1.1e-1
+    )
 
 
 def test_preserve_trustworthiness_approximately_with_precomputed_distances():
@@ -372,11 +380,11 @@ def test_preserve_trustworthiness_approximately_with_precomputed_distances():
         assert t > 0.95
 
 
-def test_trustworthiness_not_euclidean_metric():
+def test_trustworthiness_not_euclidean_metric(global_dtype):
     # Test trustworthiness with a metric different from 'euclidean' and
     # 'precomputed'
     random_state = check_random_state(0)
-    X = random_state.randn(100, 2)
+    X = random_state.randn(100, 2).astype(global_dtype)
     assert trustworthiness(X, X, metric="cosine") == trustworthiness(
         pairwise_distances(X, metric="cosine"), X, metric="precomputed"
     )
@@ -432,15 +440,18 @@ def test_high_perplexity_precomputed_sparse_distances():
 
 
 @ignore_warnings(category=EfficiencyWarning)
-def test_sparse_precomputed_distance():
+def test_sparse_precomputed_distance(global_dtype):
     """Make sure that TSNE works identically for sparse and dense matrix"""
+    atol = 1e-5 if global_dtype == np.float32 else 1e-7
     random_state = check_random_state(0)
     X = random_state.randn(100, 2)
 
-    D_sparse = kneighbors_graph(X, n_neighbors=100, mode="distance", include_self=True)
-    D = pairwise_distances(X)
+    D_sparse = kneighbors_graph(
+        X, n_neighbors=100, mode="distance", include_self=True
+    ).astype(global_dtype)
+    D = pairwise_distances(X).astype(global_dtype)
     assert sp.issparse(D_sparse)
-    assert_almost_equal(D_sparse.A, D)
+    assert_allclose(D_sparse.A, D, atol=atol)
 
     tsne = TSNE(
         metric="precomputed", random_state=0, init="random", learning_rate="auto"
@@ -449,7 +460,7 @@ def test_sparse_precomputed_distance():
 
     for fmt in ["csr", "lil"]:
         Xt_sparse = tsne.fit_transform(D_sparse.asformat(fmt))
-        assert_almost_equal(Xt_dense, Xt_sparse)
+        assert_allclose(Xt_dense, Xt_sparse, atol=atol)
 
 
 def test_non_positive_computed_distances():
@@ -464,11 +475,14 @@ def metric(x, y):
         tsne.fit_transform(X)
 
 
-def test_init_ndarray():
+def test_init_ndarray(global_dtype):
     # Initialize TSNE with ndarray and test fit
-    tsne = TSNE(init=np.zeros((100, 2)), learning_rate="auto")
-    X_embedded = tsne.fit_transform(np.ones((100, 5)))
-    assert_array_equal(np.zeros((100, 2)), X_embedded)
+    tsne = TSNE(init=np.zeros((100, 2), dtype=global_dtype), learning_rate="auto")
+    X_embedded = tsne.fit_transform(np.ones((100, 5), dtype=global_dtype))
+
+    # TNSE.fit_transform _does_ preserves dtype when initialized with a custom array.
+    assert X_embedded.dtype == global_dtype
+    assert_allclose(np.zeros((100, 2), dtype=global_dtype), X_embedded)
 
 
 def test_init_ndarray_precomputed():
@@ -506,60 +520,58 @@ def test_n_components_range():
         tsne.fit_transform(np.array([[0.0], [1.0]]))
 
 
-def test_early_exaggeration_used():
+@pytest.mark.parametrize("method", ["exact", "barnes_hut"])
+def test_early_exaggeration_used(method, global_dtype):
     # check that the ``early_exaggeration`` parameter has an effect
     random_state = check_random_state(0)
     n_components = 2
-    methods = ["exact", "barnes_hut"]
-    X = random_state.randn(25, n_components).astype(np.float32)
-    for method in methods:
-        tsne = TSNE(
-            n_components=n_components,
-            perplexity=1,
-            learning_rate=100.0,
-            init="pca",
-            random_state=0,
-            method=method,
-            early_exaggeration=1.0,
-            n_iter=250,
-        )
-        X_embedded1 = tsne.fit_transform(X)
-        tsne = TSNE(
-            n_components=n_components,
-            perplexity=1,
-            learning_rate=100.0,
-            init="pca",
-            random_state=0,
-            method=method,
-            early_exaggeration=10.0,
-            n_iter=250,
-        )
-        X_embedded2 = tsne.fit_transform(X)
+    X = random_state.randn(25, n_components).astype(global_dtype)
+    tsne = TSNE(
+        n_components=n_components,
+        perplexity=1,
+        learning_rate=100.0,
+        init="pca",
+        random_state=0,
+        method=method,
+        early_exaggeration=1.0,
+        n_iter=250,
+    )
+    X_embedded1 = tsne.fit_transform(X)
+    tsne = TSNE(
+        n_components=n_components,
+        perplexity=1,
+        learning_rate=100.0,
+        init="pca",
+        random_state=0,
+        method=method,
+        early_exaggeration=10.0,
+        n_iter=250,
+    )
+    X_embedded2 = tsne.fit_transform(X)
 
-        assert not np.allclose(X_embedded1, X_embedded2)
+    assert not np.allclose(X_embedded1, X_embedded2)
 
 
-def test_n_iter_used():
+@pytest.mark.parametrize("method", ["exact", "barnes_hut"])
+@pytest.mark.parametrize("n_iter", [251, 500])
+def test_n_iter_used(method, n_iter, global_dtype):
     # check that the ``n_iter`` parameter has an effect
     random_state = check_random_state(0)
     n_components = 2
-    methods = ["exact", "barnes_hut"]
-    X = random_state.randn(25, n_components).astype(np.float32)
-    for method in methods:
-        for n_iter in [251, 500]:
-            tsne = TSNE(
-                n_components=n_components,
-                perplexity=1,
-                learning_rate=0.5,
-                init="random",
-                random_state=0,
-                method=method,
-                early_exaggeration=1.0,
-                n_iter=n_iter,
-            )
-            tsne.fit_transform(X)
-
-            assert tsne.n_iter_ == n_iter - 1
+    X = random_state.randn(25, n_components).astype(global_dtype)
+    tsne = TSNE(
+        n_components=n_components,
+        perplexity=1,
+        learning_rate=0.5,
+        init="random",
+        random_state=0,
+        method=method,
+        early_exaggeration=1.0,
+        n_iter=n_iter,
+    )
+    tsne.fit_transform(X)
+
+    assert tsne.n_iter_ == n_iter - 1
 
 
 def test_answer_gradient_two_points():
@@ -703,12 +715,10 @@ def test_reduction_to_one_component():
 
 
 @pytest.mark.parametrize("method", ["barnes_hut", "exact"])
-@pytest.mark.parametrize("dt", [np.float32, np.float64])
-def test_64bit(method, dt):
-    # Ensure 64bit arrays are handled correctly.
+def test_64bit(method):
     random_state = check_random_state(0)
 
-    X = random_state.randn(10, 2).astype(dt, copy=False)
+    X = random_state.randn(10, 2).astype(np.float64, copy=False)
     tsne = TSNE(
         n_components=2,
         perplexity=2,
@@ -749,7 +759,7 @@ def test_kl_divergence_not_nan(method):
     assert not np.isnan(tsne.kl_divergence_)
 
 
-def test_barnes_hut_angle():
+def test_barnes_hut_angle(global_dtype):
     # When Barnes-Hut's angle=0 this corresponds to the exact method.
     angle = 0.0
     perplexity = 10
@@ -759,9 +769,9 @@ def test_barnes_hut_angle():
         degrees_of_freedom = float(n_components - 1.0)
 
         random_state = check_random_state(0)
-        data = random_state.randn(n_samples, n_features)
+        data = random_state.randn(n_samples, n_features).astype(global_dtype)
         distances = pairwise_distances(data)
-        params = random_state.randn(n_samples, n_components)
+        params = random_state.randn(n_samples, n_components).astype(global_dtype)
         P = _joint_probabilities(distances, perplexity, verbose=0)
         kl_exact, grad_exact = _kl_divergence(
             params, P, degrees_of_freedom, n_samples, n_components
@@ -792,40 +802,40 @@ def test_barnes_hut_angle():
 
 
 @skip_if_32bit
-def test_n_iter_without_progress():
+@pytest.mark.parametrize("method", ["barnes_hut", "exact"])
+def test_n_iter_without_progress(method, global_dtype):
     # Use a dummy negative n_iter_without_progress and check output on stdout
     random_state = check_random_state(0)
-    X = random_state.randn(100, 10)
-    for method in ["barnes_hut", "exact"]:
-        tsne = TSNE(
-            n_iter_without_progress=-1,
-            verbose=2,
-            learning_rate=1e8,
-            random_state=0,
-            method=method,
-            n_iter=351,
-            init="random",
-        )
-        tsne._N_ITER_CHECK = 1
-        tsne._EXPLORATION_N_ITER = 0
+    X = random_state.randn(100, 10).astype(global_dtype)
+    tsne = TSNE(
+        n_iter_without_progress=-1,
+        verbose=2,
+        learning_rate=1e8,
+        random_state=0,
+        method=method,
+        n_iter=351,
+        init="random",
+    )
+    tsne._N_ITER_CHECK = 1
+    tsne._EXPLORATION_N_ITER = 0
 
-        old_stdout = sys.stdout
-        sys.stdout = StringIO()
-        try:
-            tsne.fit_transform(X)
-        finally:
-            out = sys.stdout.getvalue()
-            sys.stdout.close()
-            sys.stdout = old_stdout
+    old_stdout = sys.stdout
+    sys.stdout = StringIO()
+    try:
+        tsne.fit_transform(X)
+    finally:
+        out = sys.stdout.getvalue()
+        sys.stdout.close()
+        sys.stdout = old_stdout
 
-        # The output needs to contain the value of n_iter_without_progress
-        assert "did not make any progress during the last -1 episodes. Finished." in out
+    # The output needs to contain the value of n_iter_without_progress
+    assert "did not make any progress during the last -1 episodes. Finished." in out
 
 
-def test_min_grad_norm():
+def test_min_grad_norm(global_dtype):
     # Make sure that the parameter min_grad_norm is used correctly
     random_state = check_random_state(0)
-    X = random_state.randn(100, 2)
+    X = random_state.randn(100, 2).astype(global_dtype)
     min_grad_norm = 0.002
     tsne = TSNE(min_grad_norm=min_grad_norm, verbose=2, random_state=0, method="exact")
 
@@ -865,10 +875,10 @@ def test_min_grad_norm():
     assert n_smaller_gradient_norms <= 1
 
 
-def test_accessible_kl_divergence():
+def test_accessible_kl_divergence(global_dtype):
     # Ensures that the accessible kl_divergence matches the computed value
     random_state = check_random_state(0)
-    X = random_state.randn(50, 2)
+    X = random_state.randn(50, 2).astype(global_dtype)
     tsne = TSNE(
         n_iter_without_progress=2, verbose=2, random_state=0, method="exact", n_iter=500
     )
@@ -948,12 +958,12 @@ def assert_uniform_grid(Y, try_name=None):
     assert largest_to_mean < 2, try_name
 
 
-def test_bh_match_exact():
+def test_bh_match_exact(global_dtype):
     # check that the ``barnes_hut`` method match the exact one when
     # ``angle = 0`` and ``perplexity > n_samples / 3``
     random_state = check_random_state(0)
     n_features = 10
-    X = random_state.randn(30, n_features).astype(np.float32)
+    X = random_state.randn(30, n_features).astype(global_dtype)
     X_embeddeds = {}
     n_iter = {}
     for method in ["exact", "barnes_hut"]:
@@ -976,7 +986,7 @@ def test_bh_match_exact():
     assert_allclose(X_embeddeds["exact"], X_embeddeds["barnes_hut"], rtol=1e-4)
 
 
-def test_gradient_bh_multithread_match_sequential():
+def test_gradient_bh_multithread_match_sequential(global_dtype):
     # check that the bh gradient with different num_threads gives the same
     # results
 
@@ -989,8 +999,8 @@ def test_gradient_bh_multithread_match_sequential():
     perplexity = 5
 
     random_state = check_random_state(0)
-    data = random_state.randn(n_samples, n_features).astype(np.float32)
-    params = random_state.randn(n_samples, n_components)
+    data = random_state.randn(n_samples, n_features).astype(global_dtype)
+    params = random_state.randn(n_samples, n_components).astype(global_dtype)
 
     n_neighbors = n_samples - 1
     distances_csr = (
@@ -1032,7 +1042,7 @@ def test_gradient_bh_multithread_match_sequential():
     [("manhattan", manhattan_distances), ("cosine", cosine_distances)],
 )
 @pytest.mark.parametrize("method", ["barnes_hut", "exact"])
-def test_tsne_with_different_distance_metrics(metric, dist_func, method):
+def test_tsne_with_different_distance_metrics(metric, dist_func, method, global_dtype):
     """Make sure that TSNE works for different distance metrics"""
 
     if method == "barnes_hut" and metric == "manhattan":
@@ -1055,7 +1065,7 @@ def test_tsne_with_different_distance_metrics(metric, dist_func, method):
     random_state = check_random_state(0)
     n_components_original = 3
     n_components_embedding = 2
-    X = random_state.randn(50, n_components_original).astype(np.float32)
+    X = random_state.randn(50, n_components_original).astype(global_dtype)
     X_transformed_tsne = TSNE(
         metric=metric,
         method=method,
@@ -1074,7 +1084,12 @@ def test_tsne_with_different_distance_metrics(metric, dist_func, method):
         init="random",
         learning_rate="auto",
     ).fit_transform(dist_func(X))
-    assert_array_equal(X_transformed_tsne, X_transformed_tsne_precomputed)
+
+    # TSNE does not preserve dtype for random initialisation (see docstring)
+    assert (
+        X_transformed_tsne.dtype == X_transformed_tsne_precomputed.dtype == np.float32
+    )
+    assert_allclose(X_transformed_tsne, X_transformed_tsne_precomputed)
 
 
 @pytest.mark.parametrize("method", ["exact", "barnes_hut"])
@@ -1138,6 +1153,7 @@ def test_tsne_with_mahalanobis_distance():
 
 
 # FIXME: remove in 1.3 after deprecation of `square_distances`
+@pytest.mark.filterwarnings("ignore:The PCA initialization in TSNE will change")
 def test_tsne_deprecation_square_distances():
     """Check that we raise a warning regarding the removal of
     `square_distances`.