diff --git a/doc/whats_new/v1.2.rst b/doc/whats_new/v1.2.rst
index 101cda43c8747..f7ea25e3a8e8a 100644
--- a/doc/whats_new/v1.2.rst
+++ b/doc/whats_new/v1.2.rst
@@ -354,6 +354,12 @@ Changelog
 :mod:`sklearn.feature_selection`
 ................................
 
+- |Fix| Fix a bug in :func:`feature_selection.mutual_info_regression` and
+  :func:`feature_selction.mutual_info_classif`, where the continuous features
+  in `X` should be scaled to a unit variance independently if the target `y` is
+  continuous or discrete.
+  :pr:`24747` by :user:`Guillaume Lemaitre <glemaitre>`
+
 :mod:`sklearn.gaussian_process`
 ...............................
 
diff --git a/sklearn/feature_selection/_mutual_info.py b/sklearn/feature_selection/_mutual_info.py
index 3d036d8ee7e0b..2a03eb7dfd2fe 100644
--- a/sklearn/feature_selection/_mutual_info.py
+++ b/sklearn/feature_selection/_mutual_info.py
@@ -280,10 +280,9 @@ def _estimate_mi(
         if copy:
             X = X.copy()
 
-        if not discrete_target:
-            X[:, continuous_mask] = scale(
-                X[:, continuous_mask], with_mean=False, copy=False
-            )
+        X[:, continuous_mask] = scale(
+            X[:, continuous_mask], with_mean=False, copy=False
+        )
 
         # Add small noise to continuous features as advised in Kraskov et. al.
         X = X.astype(np.float64, copy=False)
diff --git a/sklearn/feature_selection/tests/test_mutual_info.py b/sklearn/feature_selection/tests/test_mutual_info.py
index af2b733efd62d..f39e4a5738b21 100644
--- a/sklearn/feature_selection/tests/test_mutual_info.py
+++ b/sklearn/feature_selection/tests/test_mutual_info.py
@@ -207,3 +207,32 @@ def test_mutual_info_options(global_dtype):
         assert_allclose(mi_5, mi_6)
 
         assert not np.allclose(mi_1, mi_3)
+
+
+@pytest.mark.parametrize("correlated", [True, False])
+def test_mutual_information_symmetry_classif_regression(correlated, global_random_seed):
+    """Check that `mutual_info_classif` and `mutual_info_regression` are
+    symmetric by switching the target `y` as `feature` in `X` and vice
+    versa.
+
+    Non-regression test for:
+    https://github.com/scikit-learn/scikit-learn/issues/23720
+    """
+    rng = np.random.RandomState(global_random_seed)
+    n = 100
+    d = rng.randint(10, size=n)
+
+    if correlated:
+        c = d.astype(np.float64)
+    else:
+        c = rng.normal(0, 1, size=n)
+
+    mi_classif = mutual_info_classif(
+        c[:, None], d, discrete_features=[False], random_state=global_random_seed
+    )
+
+    mi_regression = mutual_info_regression(
+        d[:, None], c, discrete_features=[True], random_state=global_random_seed
+    )
+
+    assert mi_classif == pytest.approx(mi_regression)