Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions sklearn/decomposition/_fastica.py
Original file line number Diff line number Diff line change
Expand Up @@ -574,12 +574,13 @@ def g(x, fun_args):
)
d[degenerate_idx] = eps # For numerical issues
d = np.sqrt(d, d)
d, u = d[sort_indices], u[sort_indices]
# Resize and reorder to match svd
u = u[::-1, : min(X.shape) : -1]
d, u = d[sort_indices], u[:, sort_indices]
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Based on eigh docs, the eigenvectors has shape (M, N) where N is the number of eigenvalues. Thus, I think the sorting should be done over axis=1.

else:
u, d = linalg.svd(XT, full_matrices=False, check_finite=False)[:2]

# Give consistent eigenvectors for both svd solvers
u *= np.sign(u[0])
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The alternative solution is:

    max_abs_cols = np.argmax(np.abs(u), axis=0)
    signs = np.sign(u[max_abs_cols, range(u.shape[1])])
    u *= signs

If we want to be strictly the same as svd_flip.


K = (u / d).T[:n_components] # see (6.33) p.140
del u, d
X1 = np.dot(K, XT)
Expand Down
74 changes: 48 additions & 26 deletions sklearn/decomposition/tests/test_fastica.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,36 +119,24 @@ def g_test(x):
assert_almost_equal(np.dot(s2_, s2) / n_samples, 1, decimal=1)

# Test FastICA class
outs = {}
for solver in ("eigh", "svd"):
_, _, sources_fun = fastica(
m.T, fun=nl, algorithm=algo, random_state=0, svd_solver=solver
)
ica = FastICA(fun=nl, algorithm=algo, random_state=0, svd_solver=solver)
sources = ica.fit_transform(m.T)
outs[solver] = sources
assert ica.components_.shape == (2, 2)
assert sources.shape == (1000, 2)
_, _, sources_fun = fastica(m.T, fun=nl, algorithm=algo, random_state=seed)
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This reverts the tests to what they were on upstream/main.

ica = FastICA(fun=nl, algorithm=algo, random_state=seed)
sources = ica.fit_transform(m.T)
assert ica.components_.shape == (2, 2)
assert sources.shape == (1000, 2)

assert_array_almost_equal(sources_fun, sources)
assert_array_almost_equal(sources, ica.transform(m.T))
assert_array_almost_equal(sources_fun, sources)
assert_array_almost_equal(sources, ica.transform(m.T))

assert ica.mixing_.shape == (2, 2)
assert ica.mixing_.shape == (2, 2)

for fn in [np.tanh, "exp(-.5(x^2))"]:
ica = FastICA(fun=fn, algorithm=algo, svd_solver=solver)
with pytest.raises(ValueError):
ica.fit(m.T)
for fn in [np.tanh, "exp(-.5(x^2))"]:
ica = FastICA(fun=fn, algorithm=algo)
with pytest.raises(ValueError):
ica.fit(m.T)

with pytest.raises(TypeError):
FastICA(fun=range(10), svd_solver=solver).fit(m.T)

# Check equality up to column parity
for A in (outs["eigh"], outs["svd"]):
for c in range(A.shape[1]):
if A[0, c] < 0:
A[:, c] *= -1
assert_array_almost_equal(outs["eigh"], outs["svd"])
with pytest.raises(TypeError):
FastICA(fun=range(10)).fit(m.T)


def test_fastica_nowhiten():
Expand Down Expand Up @@ -399,3 +387,37 @@ def test_fastica_output_shape(whiten, return_X_mean, return_n_iter):
assert len(out) == expected_len
if not whiten:
assert out[0] is None


@pytest.mark.parametrize("add_noise", [True, False])
@pytest.mark.parametrize("seed", range(2))
def test_fastica_simple_different_solvers(add_noise, seed):
Copy link
Author

@thomasjpfan thomasjpfan Mar 9, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a new test to check for consistency based on what you had.

"""Test FastICA is consistent between svd_solvers."""
rng = np.random.RandomState(seed)
n_samples = 1000
# Generate two sources:
s1 = (2 * np.sin(np.linspace(0, 100, n_samples)) > 0) - 1
s2 = stats.t.rvs(1, size=n_samples, random_state=rng)
s = np.c_[s1, s2].T
center_and_norm(s)
s1, s2 = s

# Mixing angle
phi = 0.6
mixing = np.array([[np.cos(phi), np.sin(phi)], [np.sin(phi), -np.cos(phi)]])
m = np.dot(mixing, s)

if add_noise:
m += 0.1 * rng.randn(2, 1000)

center_and_norm(m)

outs = {}
for solver in ("svd", "eigh"):
ica = FastICA(random_state=0, svd_solver=solver)
sources = ica.fit_transform(m.T)
outs[solver] = sources
assert ica.components_.shape == (2, 2)
assert sources.shape == (1000, 2)

assert_array_almost_equal(outs["eigh"], outs["svd"])