ENH Add eigh solver to FastICA (scikit-learn#22527)

Micky774 · pierreablin · thomasjpfan · ogrisel · commit 35f1db0dec0e · 2022-07-11T15:15:23.000+02:00
Co-authored-by: Pierre Ablin &lt;pierreablin@gmail.com&gt;
Co-authored-by: Thomas J. Fan &lt;thomasjpfan@gmail.com&gt;
diff --git a/doc/whats_new/v1.2.rst b/doc/whats_new/v1.2.rst
@@ -105,6 +105,20 @@ Changelog
 - |Efficiency| Improve runtime performance of :class:`ensemble.IsolationForest`
   by avoiding data copies. :pr:`23252` by :user:`Zhehao Liu <MaxwellLZH>`.
 
+:mod:`sklearn.decomposition`
+............................
+
+- |Enhancement| :class:`decomposition.FastICA` now allows the user to select
+  how whitening is performed through the new `whiten_solver` parameter, which
+  supports `svd` and `eigh`. `whiten_solver` defaults to `svd` although `eigh`
+  may be faster and more memory efficient in cases where
+  `num_features > num_samples`. An additional `sign_flip` parameter is added.
+  When `sign_flip=True`, then the output of both solvers will be reconciled
+  during `fit` so that their outputs match. This may change the output of the
+  default solver, and hence may not be backwards compatible.
+  :pr:`11860` by :user:`Pierre Ablin <pierreablin>`,
+  :pr:`22527` by :user:`Meekail Zain <micky774>` and `Thomas Fan`_.
+
 :mod:`sklearn.impute`
 .....................
 
diff --git a/sklearn/decomposition/_fastica.py b/sklearn/decomposition/_fastica.py
@@ -13,7 +13,6 @@
 
 import numpy as np
 from scipy import linalg
-
 from ..base import BaseEstimator, TransformerMixin, _ClassNamePrefixFeaturesOutMixin
 from ..exceptions import ConvergenceWarning
 
@@ -162,10 +161,12 @@ def fastica(
     max_iter=200,
     tol=1e-04,
     w_init=None,
+    whiten_solver="svd",
     random_state=None,
     return_X_mean=False,
     compute_sources=True,
     return_n_iter=False,
+    sign_flip=False,
 ):
     """Perform Fast Independent Component Analysis.
 
@@ -228,6 +229,18 @@ def my_g(x):
         Initial un-mixing array. If `w_init=None`, then an array of values
         drawn from a normal distribution is used.
 
+    whiten_solver : {"eigh", "svd"}, default="svd"
+        The solver to use for whitening.
+
+        - "svd" is more stable numerically if the problem is degenerate, and
+          often faster when `n_samples <= n_features`.
+
+        - "eigh" is generally more memory efficient when
+          `n_samples >= n_features`, and can be faster when
+          `n_samples >= 50 * n_features`.
+
+        .. versionadded:: 1.2
+
     random_state : int, RandomState instance or None, default=None
         Used to initialize ``w_init`` when not specified, with a
         normal distribution. Pass an int, for reproducible results
@@ -244,6 +257,21 @@ def my_g(x):
     return_n_iter : bool, default=False
         Whether or not to return the number of iterations.
 
+    sign_flip : bool, default=False
+        Used to determine whether to enable sign flipping during whitening for
+        consistency in output between solvers.
+
+        - If `sign_flip=False` then the output of different choices for
+          `whiten_solver` may not be equal. Both outputs will still be correct,
+          but may differ numerically.
+
+        - If `sign_flip=True` then the output of both solvers will be
+          reconciled during fit so that their outputs match. This may produce
+          a different output for each solver when compared to
+          `sign_flip=False`.
+
+        .. versionadded:: 1.2
+
     Returns
     -------
     K : ndarray of shape (n_components, n_features) or None
@@ -300,7 +328,9 @@ def my_g(x):
         max_iter=max_iter,
         tol=tol,
         w_init=w_init,
+        whiten_solver=whiten_solver,
         random_state=random_state,
+        sign_flip=sign_flip,
     )
     S = est._fit(X, compute_sources=compute_sources)
 
@@ -378,12 +408,39 @@ def my_g(x):
         Initial un-mixing array. If `w_init=None`, then an array of values
         drawn from a normal distribution is used.
 
+    whiten_solver : {"eigh", "svd"}, default="svd"
+        The solver to use for whitening.
+
+        - "svd" is more stable numerically if the problem is degenerate, and
+          often faster when `n_samples <= n_features`.
+
+        - "eigh" is generally more memory efficient when
+          `n_samples >= n_features`, and can be faster when
+          `n_samples >= 50 * n_features`.
+
+        .. versionadded:: 1.2
+
     random_state : int, RandomState instance or None, default=None
         Used to initialize ``w_init`` when not specified, with a
         normal distribution. Pass an int, for reproducible results
         across multiple function calls.
         See :term:`Glossary <random_state>`.
 
+    sign_flip : bool, default=False
+        Used to determine whether to enable sign flipping during whitening for
+        consistency in output between solvers.
+
+        - If `sign_flip=False` then the output of different choices for
+          `whiten_solver` may not be equal. Both outputs will still be correct,
+          but may differ numerically.
+
+        - If `sign_flip=True` then the output of both solvers will be
+          reconciled during fit so that their outputs match. This may produce
+          a different output for each solver when compared to
+          `sign_flip=False`.
+
+        .. versionadded:: 1.2
+
     Attributes
     ----------
     components_ : ndarray of shape (n_components, n_features)
@@ -457,7 +514,9 @@ def __init__(
         max_iter=200,
         tol=1e-4,
         w_init=None,
+        whiten_solver="svd",
         random_state=None,
+        sign_flip=False,
     ):
         super().__init__()
         self.n_components = n_components
@@ -468,7 +527,9 @@ def __init__(
         self.max_iter = max_iter
         self.tol = tol
         self.w_init = w_init
+        self.whiten_solver = whiten_solver
         self.random_state = random_state
+        self.sign_flip = sign_flip
 
     def _fit(self, X, compute_sources=False):
         """Fit the model.
@@ -557,9 +618,33 @@ def g(x, fun_args):
             XT -= X_mean[:, np.newaxis]
 
             # Whitening and preprocessing by PCA
-            u, d, _ = linalg.svd(XT, full_matrices=False, check_finite=False)
+            if self.whiten_solver == "eigh":
+                # Faster when num_samples >> n_features
+                d, u = linalg.eigh(XT.dot(X))
+                sort_indices = np.argsort(d)[::-1]
+                eps = np.finfo(d.dtype).eps
+                degenerate_idx = d < eps
+                if np.any(degenerate_idx):
+                    warnings.warn(
+                        "There are some small singular values, using "
+                        "whiten_solver = 'svd' might lead to more "
+                        "accurate results."
+                    )
+                d[degenerate_idx] = eps  # For numerical issues
+                np.sqrt(d, out=d)
+                d, u = d[sort_indices], u[:, sort_indices]
+            elif self.whiten_solver == "svd":
+                u, d = linalg.svd(XT, full_matrices=False, check_finite=False)[:2]
+            else:
+                raise ValueError(
+                    "`whiten_solver` must be 'eigh' or 'svd' but got"
+                    f" {self.whiten_solver} instead"
+                )
+
+            # Give consistent eigenvectors for both svd solvers
+            if self.sign_flip:
+                u *= np.sign(u[0])
 
-            del _
             K = (u / d).T[:n_components]  # see (6.33) p.140
             del u, d
             X1 = np.dot(K, XT)
diff --git a/sklearn/decomposition/tests/test_fastica.py b/sklearn/decomposition/tests/test_fastica.py
@@ -7,6 +7,7 @@
 
 import numpy as np
 from scipy import stats
+from sklearn.datasets import make_low_rank_matrix
 
 from sklearn.utils._testing import assert_array_equal
 from sklearn.utils._testing import assert_allclose
@@ -422,7 +423,10 @@ def test_fastica_whiten_backwards_compatibility():
 
     # No warning must be raised in this case.
     av_ica = FastICA(
-        n_components=n_components, whiten="arbitrary-variance", random_state=0
+        n_components=n_components,
+        whiten="arbitrary-variance",
+        random_state=0,
+        whiten_solver="svd",
     )
     with warnings.catch_warnings():
         warnings.simplefilter("error", FutureWarning)
@@ -457,3 +461,60 @@ def test_fastica_output_shape(whiten, return_X_mean, return_n_iter):
     assert len(out) == expected_len
     if not whiten:
         assert out[0] is None
+
+
+@pytest.mark.parametrize("add_noise", [True, False])
+def test_fastica_simple_different_solvers(add_noise, global_random_seed):
+    """Test FastICA is consistent between whiten_solvers when `sign_flip=True`."""
+    rng = np.random.RandomState(global_random_seed)
+    n_samples = 1000
+    # Generate two sources:
+    s1 = (2 * np.sin(np.linspace(0, 100, n_samples)) > 0) - 1
+    s2 = stats.t.rvs(1, size=n_samples, random_state=rng)
+    s = np.c_[s1, s2].T
+    center_and_norm(s)
+    s1, s2 = s
+
+    # Mixing angle
+    phi = rng.rand() * 2 * np.pi
+    mixing = np.array([[np.cos(phi), np.sin(phi)], [np.sin(phi), -np.cos(phi)]])
+    m = np.dot(mixing, s)
+
+    if add_noise:
+        m += 0.1 * rng.randn(2, 1000)
+
+    center_and_norm(m)
+
+    outs = {}
+    for solver in ("svd", "eigh"):
+        ica = FastICA(
+            random_state=0, whiten="unit-variance", whiten_solver=solver, sign_flip=True
+        )
+        sources = ica.fit_transform(m.T)
+        outs[solver] = sources
+        assert ica.components_.shape == (2, 2)
+        assert sources.shape == (1000, 2)
+
+    assert_allclose(outs["eigh"], outs["svd"])
+
+
+def test_fastica_eigh_low_rank_warning(global_random_seed):
+    """Test FastICA eigh solver raises warning for low-rank data."""
+    rng = np.random.RandomState(global_random_seed)
+    X = make_low_rank_matrix(
+        n_samples=10, n_features=10, random_state=rng, effective_rank=2
+    )
+    ica = FastICA(random_state=0, whiten="unit-variance", whiten_solver="eigh")
+    msg = "There are some small singular values"
+    with pytest.warns(UserWarning, match=msg):
+        ica.fit(X)
+
+
+@pytest.mark.parametrize("whiten_solver", ["this_should_fail", "test", 1, None])
+def test_fastica_whiten_solver_validation(whiten_solver):
+    rng = np.random.RandomState(0)
+    X = rng.random_sample((10, 2))
+    ica = FastICA(random_state=rng, whiten_solver=whiten_solver, whiten="unit-variance")
+    msg = f"`whiten_solver` must be 'eigh' or 'svd' but got {whiten_solver} instead"
+    with pytest.raises(ValueError, match=msg):
+        ica.fit_transform(X)