diff --git a/doc/modules/decomposition.rst b/doc/modules/decomposition.rst index 6d441004f8ae6..7e8e79d9d8bdd 100644 --- a/doc/modules/decomposition.rst +++ b/doc/modules/decomposition.rst @@ -759,9 +759,9 @@ and the regularized objective function is: + \frac{\alpha(1-\rho)}{2} ||W||_{\mathrm{Fro}} ^ 2 + \frac{\alpha(1-\rho)}{2} ||H||_{\mathrm{Fro}} ^ 2 -:class:`NMF` regularizes both W and H. The public function -:func:`non_negative_factorization` allows a finer control through the -:attr:`regularization` attribute, and may regularize only W, only H, or both. +:class:`NMF` regularizes both W and H by default. The :attr:`regularization` +parameter allows for finer control, with which only W, only H, +or both can be regularized. NMF with a beta-divergence -------------------------- diff --git a/doc/whats_new/v0.24.rst b/doc/whats_new/v0.24.rst index 217e0ed61cfde..c46bb51793ad1 100644 --- a/doc/whats_new/v0.24.rst +++ b/doc/whats_new/v0.24.rst @@ -114,6 +114,12 @@ Changelog argument `rotation`, which can take the value `None`, `'varimax'` or `'quartimax'.` :pr:`11064` by :user:`Jona Sassenhagen `. +- |Enhancement| :class:`decomposition.NMF` now supports the optional parameter + `regularization`, which can take the values `None`, `components`, + `transformation` or `both`, in accordance with + :func:`decomposition.NMF.non_negative_factorization`. + :pr:`17414` by :user:`Bharat Raghunathan `. + :mod:`sklearn.ensemble` ....................... diff --git a/sklearn/decomposition/_nmf.py b/sklearn/decomposition/_nmf.py index 24993102bb424..ebc905a7fbcb3 100644 --- a/sklearn/decomposition/_nmf.py +++ b/sklearn/decomposition/_nmf.py @@ -1081,7 +1081,7 @@ def non_negative_factorization(X, W=None, H=None, n_components=None, *, class NMF(TransformerMixin, BaseEstimator): - r"""Non-Negative Matrix Factorization (NMF) + """Non-Negative Matrix Factorization (NMF) Find two non-negative matrices (W, H) whose product approximates the non- negative matrix X. This factorization can be used for example for @@ -1097,8 +1097,8 @@ class NMF(TransformerMixin, BaseEstimator): Where:: - ||A||_Fro^2 = \sum_{i,j} A_{ij}^2 (Frobenius norm) - ||vec(A)||_1 = \sum_{i,j} abs(A_{ij}) (Elementwise L1 norm) + ||A||_Fro^2 = \\sum_{i,j} A_{ij}^2 (Frobenius norm) + ||vec(A)||_1 = \\sum_{i,j} abs(A_{ij}) (Elementwise L1 norm) For multiplicative-update ('mu') solver, the Frobenius norm (0.5 * ||X - WH||_Fro^2) can be changed into another beta-divergence loss, @@ -1198,6 +1198,13 @@ class NMF(TransformerMixin, BaseEstimator): .. versionadded:: 0.17 *shuffle* parameter used in the Coordinate Descent solver. + regularization : {'both', 'components', 'transformation', None}, \ + default='both' + Select whether the regularization affects the components (H), the + transformation (W), both or none of them. + + .. versionadded:: 0.24 + Attributes ---------- components_ : array, [n_components, n_features] @@ -1239,7 +1246,7 @@ class NMF(TransformerMixin, BaseEstimator): def __init__(self, n_components=None, *, init=None, solver='cd', beta_loss='frobenius', tol=1e-4, max_iter=200, random_state=None, alpha=0., l1_ratio=0., verbose=0, - shuffle=False): + shuffle=False, regularization='both'): self.n_components = n_components self.init = init self.solver = solver @@ -1251,6 +1258,7 @@ def __init__(self, n_components=None, *, init=None, solver='cd', self.l1_ratio = l1_ratio self.verbose = verbose self.shuffle = shuffle + self.regularization = regularization def _more_tags(self): return {'requires_positive_X': True} @@ -1285,7 +1293,7 @@ def fit_transform(self, X, y=None, W=None, H=None): X=X, W=W, H=H, n_components=self.n_components, init=self.init, update_H=True, solver=self.solver, beta_loss=self.beta_loss, tol=self.tol, max_iter=self.max_iter, alpha=self.alpha, - l1_ratio=self.l1_ratio, regularization='both', + l1_ratio=self.l1_ratio, regularization=self.regularization, random_state=self.random_state, verbose=self.verbose, shuffle=self.shuffle) @@ -1334,9 +1342,10 @@ def transform(self, X): X=X, W=None, H=self.components_, n_components=self.n_components_, init=self.init, update_H=False, solver=self.solver, beta_loss=self.beta_loss, tol=self.tol, max_iter=self.max_iter, - alpha=self.alpha, l1_ratio=self.l1_ratio, regularization='both', - random_state=self.random_state, verbose=self.verbose, - shuffle=self.shuffle) + alpha=self.alpha, l1_ratio=self.l1_ratio, + regularization=self.regularization, + random_state=self.random_state, + verbose=self.verbose, shuffle=self.shuffle) return W diff --git a/sklearn/decomposition/tests/test_nmf.py b/sklearn/decomposition/tests/test_nmf.py index 439ad0697031f..ea48be660a734 100644 --- a/sklearn/decomposition/tests/test_nmf.py +++ b/sklearn/decomposition/tests/test_nmf.py @@ -20,12 +20,14 @@ @pytest.mark.parametrize('solver', ['cd', 'mu']) -def test_convergence_warning(solver): +@pytest.mark.parametrize('regularization', + [None, 'both', 'components', 'transformation']) +def test_convergence_warning(solver, regularization): convergence_warning = ("Maximum number of iterations 1 reached. " "Increase it to improve convergence.") A = np.ones((2, 2)) with pytest.warns(ConvergenceWarning, match=convergence_warning): - NMF(solver=solver, max_iter=1).fit(A) + NMF(solver=solver, regularization=regularization, max_iter=1).fit(A) def test_initialize_nn_output(): @@ -44,6 +46,8 @@ def test_parameter_checking(): assert_raise_message(ValueError, msg, NMF(solver=name).fit, A) msg = "Invalid init parameter: got 'spam' instead of one of" assert_raise_message(ValueError, msg, NMF(init=name).fit, A) + msg = "Invalid regularization parameter: got 'spam' instead of one of" + assert_raise_message(ValueError, msg, NMF(regularization=name).fit, A) msg = "Invalid beta_loss parameter: got 'spam' instead of one" assert_raise_message(ValueError, msg, NMF(solver='mu', beta_loss=name).fit, A) @@ -97,36 +101,43 @@ def test_initialize_variants(): # ignore UserWarning raised when both solver='mu' and init='nndsvd' @ignore_warnings(category=UserWarning) -def test_nmf_fit_nn_output(): +@pytest.mark.parametrize('solver', ('cd', 'mu')) +@pytest.mark.parametrize('init', + (None, 'nndsvd', 'nndsvda', 'nndsvdar', 'random')) +@pytest.mark.parametrize('regularization', + (None, 'both', 'components', 'transformation')) +def test_nmf_fit_nn_output(solver, init, regularization): # Test that the decomposition does not contain negative values A = np.c_[5. - np.arange(1, 6), 5. + np.arange(1, 6)] - for solver in ('cd', 'mu'): - for init in (None, 'nndsvd', 'nndsvda', 'nndsvdar', 'random'): - model = NMF(n_components=2, solver=solver, init=init, - random_state=0) - transf = model.fit_transform(A) - assert not((model.components_ < 0).any() or - (transf < 0).any()) + model = NMF(n_components=2, solver=solver, init=init, + regularization=regularization, random_state=0) + transf = model.fit_transform(A) + assert not((model.components_ < 0).any() or + (transf < 0).any()) @pytest.mark.parametrize('solver', ('cd', 'mu')) -def test_nmf_fit_close(solver): +@pytest.mark.parametrize('regularization', + (None, 'both', 'components', 'transformation')) +def test_nmf_fit_close(solver, regularization): rng = np.random.mtrand.RandomState(42) # Test that the fit is not too far away pnmf = NMF(5, solver=solver, init='nndsvdar', random_state=0, - max_iter=600) + regularization=regularization, max_iter=600) X = np.abs(rng.randn(6, 5)) assert pnmf.fit(X).reconstruction_err_ < 0.1 @pytest.mark.parametrize('solver', ('cd', 'mu')) -def test_nmf_transform(solver): +@pytest.mark.parametrize('regularization', + (None, 'both', 'components', 'transformation')) +def test_nmf_transform(solver, regularization): # Test that NMF.transform returns close values rng = np.random.mtrand.RandomState(42) A = np.abs(rng.randn(6, 5)) m = NMF(solver=solver, n_components=3, init='random', - random_state=0, tol=1e-5) + regularization=regularization, random_state=0, tol=1e-5) ft = m.fit_transform(A) t = m.transform(A) assert_array_almost_equal(ft, t, decimal=2) @@ -148,12 +159,14 @@ def test_nmf_transform_custom_init(): @pytest.mark.parametrize('solver', ('cd', 'mu')) -def test_nmf_inverse_transform(solver): +@pytest.mark.parametrize('regularization', + (None, 'both', 'components', 'transformation')) +def test_nmf_inverse_transform(solver, regularization): # Test that NMF.inverse_transform returns close values random_state = np.random.RandomState(0) A = np.abs(random_state.randn(6, 4)) m = NMF(solver=solver, n_components=4, init='random', random_state=0, - max_iter=1000) + regularization=regularization, max_iter=1000) ft = m.fit_transform(A) A_new = m.inverse_transform(ft) assert_array_almost_equal(A, A_new, decimal=2) @@ -167,7 +180,9 @@ def test_n_components_greater_n_features(): @pytest.mark.parametrize('solver', ['cd', 'mu']) -def test_nmf_sparse_input(solver): +@pytest.mark.parametrize('regularization', + [None, 'both', 'components', 'transformation']) +def test_nmf_sparse_input(solver, regularization): # Test that sparse matrices are accepted as input from scipy.sparse import csc_matrix @@ -177,7 +192,8 @@ def test_nmf_sparse_input(solver): A_sparse = csc_matrix(A) est1 = NMF(solver=solver, n_components=5, init='random', - random_state=0, tol=1e-2) + regularization=regularization, random_state=0, + tol=1e-2) est2 = clone(est1) W1 = est1.fit_transform(A) @@ -204,28 +220,32 @@ def test_nmf_sparse_transform(): assert_array_almost_equal(A_fit_tr, A_tr, decimal=1) -def test_non_negative_factorization_consistency(): +@pytest.mark.parametrize('init', ['random', 'nndsvd']) +@pytest.mark.parametrize('solver', ('cd', 'mu')) +@pytest.mark.parametrize('regularization', + (None, 'both', 'components', 'transformation')) +def test_non_negative_factorization_consistency(init, solver, regularization): # Test that the function is called in the same way, either directly # or through the NMF class rng = np.random.mtrand.RandomState(42) A = np.abs(rng.randn(10, 10)) A[:, 2 * np.arange(5)] = 0 - for init in ['random', 'nndsvd']: - for solver in ('cd', 'mu'): - W_nmf, H, _ = non_negative_factorization( - A, init=init, solver=solver, random_state=1, tol=1e-2) - W_nmf_2, _, _ = non_negative_factorization( - A, H=H, update_H=False, init=init, solver=solver, - random_state=1, tol=1e-2) + W_nmf, H, _ = non_negative_factorization( + A, init=init, solver=solver, + regularization=regularization, random_state=1, tol=1e-2) + W_nmf_2, _, _ = non_negative_factorization( + A, H=H, update_H=False, init=init, solver=solver, + regularization=regularization, random_state=1, tol=1e-2) - model_class = NMF(init=init, solver=solver, random_state=1, - tol=1e-2) - W_cls = model_class.fit_transform(A) - W_cls_2 = model_class.transform(A) + model_class = NMF(init=init, solver=solver, + regularization=regularization, + random_state=1, tol=1e-2) + W_cls = model_class.fit_transform(A) + W_cls_2 = model_class.transform(A) - assert_array_almost_equal(W_nmf, W_cls, decimal=10) - assert_array_almost_equal(W_nmf_2, W_cls_2, decimal=10) + assert_array_almost_equal(W_nmf, W_cls, decimal=10) + assert_array_almost_equal(W_nmf_2, W_cls_2, decimal=10) def test_non_negative_factorization_checking(): @@ -515,11 +535,13 @@ def test_nmf_underflow(): (np.int32, np.float64), (np.int64, np.float64)]) @pytest.mark.parametrize("solver", ["cd", "mu"]) -def test_nmf_dtype_match(dtype_in, dtype_out, solver): +@pytest.mark.parametrize("regularization", + (None, "both", "components", "transformation")) +def test_nmf_dtype_match(dtype_in, dtype_out, solver, regularization): # Check that NMF preserves dtype (float32 and float64) X = np.random.RandomState(0).randn(20, 15).astype(dtype_in, copy=False) np.abs(X, out=X) - nmf = NMF(solver=solver) + nmf = NMF(solver=solver, regularization=regularization) assert nmf.fit(X).transform(X).dtype == dtype_out assert nmf.fit_transform(X).dtype == dtype_out @@ -527,13 +549,15 @@ def test_nmf_dtype_match(dtype_in, dtype_out, solver): @pytest.mark.parametrize("solver", ["cd", "mu"]) -def test_nmf_float32_float64_consistency(solver): +@pytest.mark.parametrize("regularization", + (None, "both", "components", "transformation")) +def test_nmf_float32_float64_consistency(solver, regularization): # Check that the result of NMF is the same between float32 and float64 X = np.random.RandomState(0).randn(50, 7) np.abs(X, out=X) - nmf32 = NMF(solver=solver, random_state=0) + nmf32 = NMF(solver=solver, regularization=regularization, random_state=0) W32 = nmf32.fit_transform(X.astype(np.float32)) - nmf64 = NMF(solver=solver, random_state=0) + nmf64 = NMF(solver=solver, regularization=regularization, random_state=0) W64 = nmf64.fit_transform(X) assert_allclose(W32, W64, rtol=1e-6, atol=1e-5)