From 66dd7070a5d872db24c4f311fadb46a7fdb8258a Mon Sep 17 00:00:00 2001 From: "Thomas J. Fan" Date: Wed, 9 Mar 2022 13:20:02 -0500 Subject: [PATCH 1/7] TST Check solvers --- sklearn/decomposition/_fastica.py | 7 +-- sklearn/decomposition/tests/test_fastica.py | 51 +++++++++++---------- 2 files changed, 31 insertions(+), 27 deletions(-) diff --git a/sklearn/decomposition/_fastica.py b/sklearn/decomposition/_fastica.py index d03c99d4a4405..bb4a255eafbc8 100644 --- a/sklearn/decomposition/_fastica.py +++ b/sklearn/decomposition/_fastica.py @@ -574,12 +574,13 @@ def g(x, fun_args): ) d[degenerate_idx] = eps # For numerical issues d = np.sqrt(d, d) - d, u = d[sort_indices], u[sort_indices] - # Resize and reorder to match svd - u = u[::-1, : min(X.shape) : -1] + d, u = d[sort_indices], u[:, sort_indices] else: u, d = linalg.svd(XT, full_matrices=False, check_finite=False)[:2] + signs = np.sign(u[0]) + u *= signs + K = (u / d).T[:n_components] # see (6.33) p.140 del u, d X1 = np.dot(K, XT) diff --git a/sklearn/decomposition/tests/test_fastica.py b/sklearn/decomposition/tests/test_fastica.py index 6b93fd03a5422..bb97d37cc7878 100644 --- a/sklearn/decomposition/tests/test_fastica.py +++ b/sklearn/decomposition/tests/test_fastica.py @@ -118,36 +118,39 @@ def g_test(x): assert_almost_equal(np.dot(s1_, s1) / n_samples, 1, decimal=1) assert_almost_equal(np.dot(s2_, s2) / n_samples, 1, decimal=1) - # Test FastICA class + +@pytest.mark.parametrize("add_noise", [True, False]) +@pytest.mark.parametrize("seed", range(1)) +def test_fastica_simple_different_solvers(add_noise, seed): + """Test FastICA is consistent between svd_solvers.""" + rng = np.random.RandomState(seed) + # scipy.stats uses the global RNG: + n_samples = 1000 + # Generate two sources: + s1 = (2 * np.sin(np.linspace(0, 100, n_samples)) > 0) - 1 + s2 = stats.t.rvs(1, size=n_samples) + s = np.c_[s1, s2].T + center_and_norm(s) + s1, s2 = s + + # Mixing angle + phi = 0.6 + mixing = np.array([[np.cos(phi), np.sin(phi)], [np.sin(phi), -np.cos(phi)]]) + m = np.dot(mixing, s) + + if add_noise: + m += 0.1 * rng.randn(2, 1000) + + center_and_norm(m) + outs = {} - for solver in ("eigh", "svd"): - _, _, sources_fun = fastica( - m.T, fun=nl, algorithm=algo, random_state=0, svd_solver=solver - ) - ica = FastICA(fun=nl, algorithm=algo, random_state=0, svd_solver=solver) + for solver in ("svd", "eigh"): + ica = FastICA(random_state=0, svd_solver=solver) sources = ica.fit_transform(m.T) outs[solver] = sources assert ica.components_.shape == (2, 2) assert sources.shape == (1000, 2) - assert_array_almost_equal(sources_fun, sources) - assert_array_almost_equal(sources, ica.transform(m.T)) - - assert ica.mixing_.shape == (2, 2) - - for fn in [np.tanh, "exp(-.5(x^2))"]: - ica = FastICA(fun=fn, algorithm=algo, svd_solver=solver) - with pytest.raises(ValueError): - ica.fit(m.T) - - with pytest.raises(TypeError): - FastICA(fun=range(10), svd_solver=solver).fit(m.T) - - # Check equality up to column parity - for A in (outs["eigh"], outs["svd"]): - for c in range(A.shape[1]): - if A[0, c] < 0: - A[:, c] *= -1 assert_array_almost_equal(outs["eigh"], outs["svd"]) From e6e960209f31662563e3a690f3a07179405075b2 Mon Sep 17 00:00:00 2001 From: "Thomas J. Fan" Date: Wed, 9 Mar 2022 13:21:09 -0500 Subject: [PATCH 2/7] TST Create new tests --- sklearn/decomposition/tests/test_fastica.py | 80 +++++++++++++-------- 1 file changed, 50 insertions(+), 30 deletions(-) diff --git a/sklearn/decomposition/tests/test_fastica.py b/sklearn/decomposition/tests/test_fastica.py index bb97d37cc7878..b831d9e976f2c 100644 --- a/sklearn/decomposition/tests/test_fastica.py +++ b/sklearn/decomposition/tests/test_fastica.py @@ -118,40 +118,25 @@ def g_test(x): assert_almost_equal(np.dot(s1_, s1) / n_samples, 1, decimal=1) assert_almost_equal(np.dot(s2_, s2) / n_samples, 1, decimal=1) + # Test FastICA class + _, _, sources_fun = fastica(m.T, fun=nl, algorithm=algo, random_state=seed) + ica = FastICA(fun=nl, algorithm=algo, random_state=seed) + sources = ica.fit_transform(m.T) + assert ica.components_.shape == (2, 2) + assert sources.shape == (1000, 2) -@pytest.mark.parametrize("add_noise", [True, False]) -@pytest.mark.parametrize("seed", range(1)) -def test_fastica_simple_different_solvers(add_noise, seed): - """Test FastICA is consistent between svd_solvers.""" - rng = np.random.RandomState(seed) - # scipy.stats uses the global RNG: - n_samples = 1000 - # Generate two sources: - s1 = (2 * np.sin(np.linspace(0, 100, n_samples)) > 0) - 1 - s2 = stats.t.rvs(1, size=n_samples) - s = np.c_[s1, s2].T - center_and_norm(s) - s1, s2 = s + assert_array_almost_equal(sources_fun, sources) + assert_array_almost_equal(sources, ica.transform(m.T)) - # Mixing angle - phi = 0.6 - mixing = np.array([[np.cos(phi), np.sin(phi)], [np.sin(phi), -np.cos(phi)]]) - m = np.dot(mixing, s) + assert ica.mixing_.shape == (2, 2) - if add_noise: - m += 0.1 * rng.randn(2, 1000) - - center_and_norm(m) - - outs = {} - for solver in ("svd", "eigh"): - ica = FastICA(random_state=0, svd_solver=solver) - sources = ica.fit_transform(m.T) - outs[solver] = sources - assert ica.components_.shape == (2, 2) - assert sources.shape == (1000, 2) + for fn in [np.tanh, "exp(-.5(x^2))"]: + ica = FastICA(fun=fn, algorithm=algo) + with pytest.raises(ValueError): + ica.fit(m.T) - assert_array_almost_equal(outs["eigh"], outs["svd"]) + with pytest.raises(TypeError): + FastICA(fun=range(10)).fit(m.T) def test_fastica_nowhiten(): @@ -402,3 +387,38 @@ def test_fastica_output_shape(whiten, return_X_mean, return_n_iter): assert len(out) == expected_len if not whiten: assert out[0] is None + + +@pytest.mark.parametrize("add_noise", [True, False]) +@pytest.mark.parametrize("seed", range(1)) +def test_fastica_simple_different_solvers(add_noise, seed): + """Test FastICA is consistent between svd_solvers.""" + rng = np.random.RandomState(seed) + # scipy.stats uses the global RNG: + n_samples = 1000 + # Generate two sources: + s1 = (2 * np.sin(np.linspace(0, 100, n_samples)) > 0) - 1 + s2 = stats.t.rvs(1, size=n_samples) + s = np.c_[s1, s2].T + center_and_norm(s) + s1, s2 = s + + # Mixing angle + phi = 0.6 + mixing = np.array([[np.cos(phi), np.sin(phi)], [np.sin(phi), -np.cos(phi)]]) + m = np.dot(mixing, s) + + if add_noise: + m += 0.1 * rng.randn(2, 1000) + + center_and_norm(m) + + outs = {} + for solver in ("svd", "eigh"): + ica = FastICA(random_state=0, svd_solver=solver) + sources = ica.fit_transform(m.T) + outs[solver] = sources + assert ica.components_.shape == (2, 2) + assert sources.shape == (1000, 2) + + assert_array_almost_equal(outs["eigh"], outs["svd"]) From 9cdbef9b2919c629218b2f1f272a1cc3f8a4a1ef Mon Sep 17 00:00:00 2001 From: "Thomas J. Fan" Date: Wed, 9 Mar 2022 13:21:39 -0500 Subject: [PATCH 3/7] CLN Slightly better --- sklearn/decomposition/_fastica.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sklearn/decomposition/_fastica.py b/sklearn/decomposition/_fastica.py index bb4a255eafbc8..20e4bbc103a35 100644 --- a/sklearn/decomposition/_fastica.py +++ b/sklearn/decomposition/_fastica.py @@ -578,8 +578,7 @@ def g(x, fun_args): else: u, d = linalg.svd(XT, full_matrices=False, check_finite=False)[:2] - signs = np.sign(u[0]) - u *= signs + u *= np.sign(u[0]) K = (u / d).T[:n_components] # see (6.33) p.140 del u, d From c5ef5a967abcda93e718b9a6f590ae799319e3b0 Mon Sep 17 00:00:00 2001 From: "Thomas J. Fan" Date: Wed, 9 Mar 2022 13:24:24 -0500 Subject: [PATCH 4/7] TST Adjust seed --- sklearn/decomposition/tests/test_fastica.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/decomposition/tests/test_fastica.py b/sklearn/decomposition/tests/test_fastica.py index b831d9e976f2c..90ed63a187660 100644 --- a/sklearn/decomposition/tests/test_fastica.py +++ b/sklearn/decomposition/tests/test_fastica.py @@ -390,7 +390,7 @@ def test_fastica_output_shape(whiten, return_X_mean, return_n_iter): @pytest.mark.parametrize("add_noise", [True, False]) -@pytest.mark.parametrize("seed", range(1)) +@pytest.mark.parametrize("seed", range(2)) def test_fastica_simple_different_solvers(add_noise, seed): """Test FastICA is consistent between svd_solvers.""" rng = np.random.RandomState(seed) From 77422c879e1dfc5e3ea621c5f8ffa16af5f83d19 Mon Sep 17 00:00:00 2001 From: "Thomas J. Fan" Date: Wed, 9 Mar 2022 13:27:29 -0500 Subject: [PATCH 5/7] DOC Adds comment --- sklearn/decomposition/_fastica.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sklearn/decomposition/_fastica.py b/sklearn/decomposition/_fastica.py index 20e4bbc103a35..4c5a8e265838c 100644 --- a/sklearn/decomposition/_fastica.py +++ b/sklearn/decomposition/_fastica.py @@ -578,6 +578,7 @@ def g(x, fun_args): else: u, d = linalg.svd(XT, full_matrices=False, check_finite=False)[:2] + # Give consistent eigenvectors for both svd solvers u *= np.sign(u[0]) K = (u / d).T[:n_components] # see (6.33) p.140 From deaab6ebfb0a5e07c6051e4b705ae10fe750e135 Mon Sep 17 00:00:00 2001 From: "Thomas J. Fan" Date: Wed, 9 Mar 2022 13:48:32 -0500 Subject: [PATCH 6/7] FIX Give a random state --- sklearn/decomposition/tests/test_fastica.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/decomposition/tests/test_fastica.py b/sklearn/decomposition/tests/test_fastica.py index 90ed63a187660..a1129b355be18 100644 --- a/sklearn/decomposition/tests/test_fastica.py +++ b/sklearn/decomposition/tests/test_fastica.py @@ -398,7 +398,7 @@ def test_fastica_simple_different_solvers(add_noise, seed): n_samples = 1000 # Generate two sources: s1 = (2 * np.sin(np.linspace(0, 100, n_samples)) > 0) - 1 - s2 = stats.t.rvs(1, size=n_samples) + s2 = stats.t.rvs(1, size=n_samples, random_state=rng) s = np.c_[s1, s2].T center_and_norm(s) s1, s2 = s From c077bfe4098603bc77174308bfb443fdbece8baf Mon Sep 17 00:00:00 2001 From: "Thomas J. Fan" Date: Wed, 9 Mar 2022 13:48:44 -0500 Subject: [PATCH 7/7] FIX Give a random state --- sklearn/decomposition/tests/test_fastica.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sklearn/decomposition/tests/test_fastica.py b/sklearn/decomposition/tests/test_fastica.py index a1129b355be18..39e9d849ac970 100644 --- a/sklearn/decomposition/tests/test_fastica.py +++ b/sklearn/decomposition/tests/test_fastica.py @@ -394,7 +394,6 @@ def test_fastica_output_shape(whiten, return_X_mean, return_n_iter): def test_fastica_simple_different_solvers(add_noise, seed): """Test FastICA is consistent between svd_solvers.""" rng = np.random.RandomState(seed) - # scipy.stats uses the global RNG: n_samples = 1000 # Generate two sources: s1 = (2 * np.sin(np.linspace(0, 100, n_samples)) > 0) - 1