From ff3349b964d563383ac5196b08e185617b3e2579 Mon Sep 17 00:00:00 2001 From: Bharat123rox Date: Tue, 2 Jun 2020 01:05:35 +0530 Subject: [PATCH 01/10] ENH: Add regularization to the main NMF class --- doc/modules/decomposition.rst | 6 +- sklearn/decomposition/_nmf.py | 20 ++++-- sklearn/decomposition/tests/test_nmf.py | 84 ++++++++++++++++--------- 3 files changed, 70 insertions(+), 40 deletions(-) diff --git a/doc/modules/decomposition.rst b/doc/modules/decomposition.rst index def755f42b796..330dc59c759ed 100644 --- a/doc/modules/decomposition.rst +++ b/doc/modules/decomposition.rst @@ -752,9 +752,9 @@ and the regularized objective function is: + \frac{\alpha(1-\rho)}{2} ||W||_{\mathrm{Fro}} ^ 2 + \frac{\alpha(1-\rho)}{2} ||H||_{\mathrm{Fro}} ^ 2 -:class:`NMF` regularizes both W and H. The public function -:func:`non_negative_factorization` allows a finer control through the -:attr:`regularization` attribute, and may regularize only W, only H, or both. +:class:`NMF` regularizes both W and H by default. The :attr:`regularization` +parameter allows for finer control, with which only W, only H, +or both can be regularized. NMF with a beta-divergence -------------------------- diff --git a/sklearn/decomposition/_nmf.py b/sklearn/decomposition/_nmf.py index f1385d21596e3..0c6846bb42b74 100644 --- a/sklearn/decomposition/_nmf.py +++ b/sklearn/decomposition/_nmf.py @@ -1163,6 +1163,12 @@ class NMF(TransformerMixin, BaseEstimator): max_iter : integer, default: 200 Maximum number of iterations before timing out. + regularization : 'both' | 'components' | 'transformation' | None + Select whether the regularization affects the components (H), the + transformation (W), both or none of them. Defaults to 'both'. + + .. versionadded:: 0.24 + random_state : int, RandomState instance, default=None Used for initialisation (when ``init`` == 'nndsvdar' or 'random'), and in Coordinate Descent. Pass an int for reproducible @@ -1236,14 +1242,15 @@ class NMF(TransformerMixin, BaseEstimator): @_deprecate_positional_args def __init__(self, n_components=None, *, init=None, solver='cd', beta_loss='frobenius', tol=1e-4, max_iter=200, - random_state=None, alpha=0., l1_ratio=0., verbose=0, - shuffle=False): + regularization='both', random_state=None, alpha=0., + l1_ratio=0., verbose=0, shuffle=False): self.n_components = n_components self.init = init self.solver = solver self.beta_loss = beta_loss self.tol = tol self.max_iter = max_iter + self.regularization = regularization self.random_state = random_state self.alpha = alpha self.l1_ratio = l1_ratio @@ -1283,7 +1290,7 @@ def fit_transform(self, X, y=None, W=None, H=None): X=X, W=W, H=H, n_components=self.n_components, init=self.init, update_H=True, solver=self.solver, beta_loss=self.beta_loss, tol=self.tol, max_iter=self.max_iter, alpha=self.alpha, - l1_ratio=self.l1_ratio, regularization='both', + l1_ratio=self.l1_ratio, regularization=self.regularization, random_state=self.random_state, verbose=self.verbose, shuffle=self.shuffle) @@ -1332,9 +1339,10 @@ def transform(self, X): X=X, W=None, H=self.components_, n_components=self.n_components_, init=self.init, update_H=False, solver=self.solver, beta_loss=self.beta_loss, tol=self.tol, max_iter=self.max_iter, - alpha=self.alpha, l1_ratio=self.l1_ratio, regularization='both', - random_state=self.random_state, verbose=self.verbose, - shuffle=self.shuffle) + alpha=self.alpha, l1_ratio=self.l1_ratio, + regularization=self.regularization, + random_state=self.random_state, + verbose=self.verbose, shuffle=self.shuffle) return W diff --git a/sklearn/decomposition/tests/test_nmf.py b/sklearn/decomposition/tests/test_nmf.py index a7ef1243d8e25..50b0e180e36af 100644 --- a/sklearn/decomposition/tests/test_nmf.py +++ b/sklearn/decomposition/tests/test_nmf.py @@ -20,12 +20,14 @@ @pytest.mark.parametrize('solver', ['cd', 'mu']) -def test_convergence_warning(solver): +@pytest.mark.parametrize('regularization', + [None, 'both', 'components', 'transformation']) +def test_convergence_warning(solver, regularization): convergence_warning = ("Maximum number of iterations 1 reached. " "Increase it to improve convergence.") A = np.ones((2, 2)) with pytest.warns(ConvergenceWarning, match=convergence_warning): - NMF(solver=solver, max_iter=1).fit(A) + NMF(solver=solver, regularization=regularization, max_iter=1).fit(A) def test_initialize_nn_output(): @@ -44,6 +46,8 @@ def test_parameter_checking(): assert_raise_message(ValueError, msg, NMF(solver=name).fit, A) msg = "Invalid init parameter: got 'spam' instead of one of" assert_raise_message(ValueError, msg, NMF(init=name).fit, A) + msg = "Invalid regularization parameter: got 'spam' instead of one of" + assert_raise_message(ValueError, msg, NMF(regularization=name).fit, A) msg = "Invalid beta_loss parameter: got 'spam' instead of one" assert_raise_message(ValueError, msg, NMF(solver='mu', beta_loss=name).fit, A) @@ -103,30 +107,36 @@ def test_nmf_fit_nn_output(): 5. + np.arange(1, 6)] for solver in ('cd', 'mu'): for init in (None, 'nndsvd', 'nndsvda', 'nndsvdar', 'random'): - model = NMF(n_components=2, solver=solver, init=init, - random_state=0) - transf = model.fit_transform(A) - assert not((model.components_ < 0).any() or - (transf < 0).any()) + for regularization in (None, 'both', + 'components', 'transformation'): + model = NMF(n_components=2, solver=solver, init=init, + regularization=regularization, random_state=0) + transf = model.fit_transform(A) + assert not((model.components_ < 0).any() or + (transf < 0).any()) @pytest.mark.parametrize('solver', ('cd', 'mu')) -def test_nmf_fit_close(solver): +@pytest.mark.parametrize('regularization', + (None, 'both', 'components', 'transformation')) +def test_nmf_fit_close(solver, regularization): rng = np.random.mtrand.RandomState(42) # Test that the fit is not too far away pnmf = NMF(5, solver=solver, init='nndsvdar', random_state=0, - max_iter=600) + regularization=regularization, max_iter=600) X = np.abs(rng.randn(6, 5)) assert pnmf.fit(X).reconstruction_err_ < 0.1 @pytest.mark.parametrize('solver', ('cd', 'mu')) -def test_nmf_transform(solver): +@pytest.mark.parametrize('regularization', + (None, 'both', 'components', 'transformation')) +def test_nmf_transform(solver, regularization): # Test that NMF.transform returns close values rng = np.random.mtrand.RandomState(42) A = np.abs(rng.randn(6, 5)) m = NMF(solver=solver, n_components=3, init='random', - random_state=0, tol=1e-5) + regularization=regularization, random_state=0, tol=1e-5) ft = m.fit_transform(A) t = m.transform(A) assert_array_almost_equal(ft, t, decimal=2) @@ -148,12 +158,14 @@ def test_nmf_transform_custom_init(): @pytest.mark.parametrize('solver', ('cd', 'mu')) -def test_nmf_inverse_transform(solver): +@pytest.mark.parametrize('regularization', + (None, 'both', 'components', 'transformation')) +def test_nmf_inverse_transform(solver, regularization): # Test that NMF.inverse_transform returns close values random_state = np.random.RandomState(0) A = np.abs(random_state.randn(6, 4)) m = NMF(solver=solver, n_components=4, init='random', random_state=0, - max_iter=1000) + regularization=regularization, max_iter=1000) ft = m.fit_transform(A) A_new = m.inverse_transform(ft) assert_array_almost_equal(A, A_new, decimal=2) @@ -176,9 +188,11 @@ def test_nmf_sparse_input(): A_sparse = csc_matrix(A) for solver in ('cd', 'mu'): - est1 = NMF(solver=solver, n_components=5, init='random', - random_state=0, tol=1e-2) - est2 = clone(est1) + for regularization in (None, 'both', 'components', 'transformation'): + est1 = NMF(solver=solver, n_components=5, init='random', + regularization=regularization, random_state=0, + tol=1e-2) + est2 = clone(est1) W1 = est1.fit_transform(A) W2 = est2.fit_transform(A_sparse) @@ -213,16 +227,20 @@ def test_non_negative_factorization_consistency(): for init in ['random', 'nndsvd']: for solver in ('cd', 'mu'): - W_nmf, H, _ = non_negative_factorization( - A, init=init, solver=solver, random_state=1, tol=1e-2) - W_nmf_2, _, _ = non_negative_factorization( - A, H=H, update_H=False, init=init, solver=solver, - random_state=1, tol=1e-2) - - model_class = NMF(init=init, solver=solver, random_state=1, - tol=1e-2) - W_cls = model_class.fit_transform(A) - W_cls_2 = model_class.transform(A) + for regularization in (None, 'both', + 'components', 'transformation'): + W_nmf, H, _ = non_negative_factorization( + A, init=init, solver=solver, + regularization=regularization, random_state=1, tol=1e-2) + W_nmf_2, _, _ = non_negative_factorization( + A, H=H, update_H=False, init=init, solver=solver, + regularization=regularization, random_state=1, tol=1e-2) + + model_class = NMF(init=init, solver=solver, + regularization=regularization, + random_state=1, tol=1e-2) + W_cls = model_class.fit_transform(A) + W_cls_2 = model_class.transform(A) assert_array_almost_equal(W_nmf, W_cls, decimal=10) assert_array_almost_equal(W_nmf_2, W_cls_2, decimal=10) @@ -515,11 +533,13 @@ def test_nmf_underflow(): (np.int32, np.float64), (np.int64, np.float64)]) @pytest.mark.parametrize("solver", ["cd", "mu"]) -def test_nmf_dtype_match(dtype_in, dtype_out, solver): +@pytest.mark.parametrize("regularization", + (None, "both", "components", "transformation")) +def test_nmf_dtype_match(dtype_in, dtype_out, solver, regularization): # Check that NMF preserves dtype (float32 and float64) X = np.random.RandomState(0).randn(20, 15).astype(dtype_in, copy=False) np.abs(X, out=X) - nmf = NMF(solver=solver) + nmf = NMF(solver=solver, regularization=regularization) assert nmf.fit(X).transform(X).dtype == dtype_out assert nmf.fit_transform(X).dtype == dtype_out @@ -527,13 +547,15 @@ def test_nmf_dtype_match(dtype_in, dtype_out, solver): @pytest.mark.parametrize("solver", ["cd", "mu"]) -def test_nmf_float32_float64_consistency(solver): +@pytest.mark.parametrize("regularization", + (None, "both", "components", "transformation")) +def test_nmf_float32_float64_consistency(solver, regularization): # Check that the result of NMF is the same between float32 and float64 X = np.random.RandomState(0).randn(50, 7) np.abs(X, out=X) - nmf32 = NMF(solver=solver, random_state=0) + nmf32 = NMF(solver=solver, regularization=regularization, random_state=0) W32 = nmf32.fit_transform(X.astype(np.float32)) - nmf64 = NMF(solver=solver, random_state=0) + nmf64 = NMF(solver=solver, regularization=regularization, random_state=0) W64 = nmf64.fit_transform(X) assert_allclose(W32, W64, rtol=1e-6, atol=1e-5) From 8033bbcced0c8db300f33900142a1245484685c0 Mon Sep 17 00:00:00 2001 From: Bharat Raghunathan Date: Tue, 14 Jul 2020 17:30:52 +0000 Subject: [PATCH 02/10] Update _nmf with suggestions from code review Update `_nmf.py` with suggestion from review Co-authored-by: Adrin Jalali --- sklearn/decomposition/_nmf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/decomposition/_nmf.py b/sklearn/decomposition/_nmf.py index 0c6846bb42b74..296341145844d 100644 --- a/sklearn/decomposition/_nmf.py +++ b/sklearn/decomposition/_nmf.py @@ -1163,7 +1163,7 @@ class NMF(TransformerMixin, BaseEstimator): max_iter : integer, default: 200 Maximum number of iterations before timing out. - regularization : 'both' | 'components' | 'transformation' | None + regularization : {'both', 'components', 'transformation', None}, default=None Select whether the regularization affects the components (H), the transformation (W), both or none of them. Defaults to 'both'. From 83c3c14037ed439c61bd16afae62f87f7bd0db36 Mon Sep 17 00:00:00 2001 From: Bharat123rox Date: Wed, 15 Jul 2020 00:03:54 +0530 Subject: [PATCH 03/10] Refactor tests, fix linter errors --- sklearn/decomposition/_nmf.py | 5 +- sklearn/decomposition/tests/test_nmf.py | 61 +++++++++++++------------ 2 files changed, 34 insertions(+), 32 deletions(-) diff --git a/sklearn/decomposition/_nmf.py b/sklearn/decomposition/_nmf.py index 296341145844d..27c5145780926 100644 --- a/sklearn/decomposition/_nmf.py +++ b/sklearn/decomposition/_nmf.py @@ -1163,9 +1163,10 @@ class NMF(TransformerMixin, BaseEstimator): max_iter : integer, default: 200 Maximum number of iterations before timing out. - regularization : {'both', 'components', 'transformation', None}, default=None + regularization : {'both', 'components', 'transformation', None} Select whether the regularization affects the components (H), the - transformation (W), both or none of them. Defaults to 'both'. + transformation (W), both or none of them. + Defaults to 'both'. .. versionadded:: 0.24 diff --git a/sklearn/decomposition/tests/test_nmf.py b/sklearn/decomposition/tests/test_nmf.py index 50b0e180e36af..466dc9d0f03f9 100644 --- a/sklearn/decomposition/tests/test_nmf.py +++ b/sklearn/decomposition/tests/test_nmf.py @@ -101,19 +101,20 @@ def test_initialize_variants(): # ignore UserWarning raised when both solver='mu' and init='nndsvd' @ignore_warnings(category=UserWarning) -def test_nmf_fit_nn_output(): +@pytest.mark.parametrize('solver', ('cd', 'mu')) +@pytest.mark.parametrize('init', + (None, 'nndsvd', 'nndsvda', 'nndsvdar', 'random')) +@pytest.mark.parametrize('regularization', + (None, 'both', 'components', 'transformation')) +def test_nmf_fit_nn_output(solver, init, regularization): # Test that the decomposition does not contain negative values A = np.c_[5. - np.arange(1, 6), 5. + np.arange(1, 6)] - for solver in ('cd', 'mu'): - for init in (None, 'nndsvd', 'nndsvda', 'nndsvdar', 'random'): - for regularization in (None, 'both', - 'components', 'transformation'): - model = NMF(n_components=2, solver=solver, init=init, - regularization=regularization, random_state=0) - transf = model.fit_transform(A) - assert not((model.components_ < 0).any() or - (transf < 0).any()) + model = NMF(n_components=2, solver=solver, init=init, + regularization=regularization, random_state=0) + transf = model.fit_transform(A) + assert not((model.components_ < 0).any() or + (transf < 0).any()) @pytest.mark.parametrize('solver', ('cd', 'mu')) @@ -218,32 +219,32 @@ def test_nmf_sparse_transform(): assert_array_almost_equal(A_fit_tr, A_tr, decimal=1) -def test_non_negative_factorization_consistency(): +@pytest.mark.parametrize('init', ['random', 'nndsvd']) +@pytest.mark.parametrize('solver', ('cd', 'mu')) +@pytest.mark.parametrize('regularization', + (None, 'both', 'components', 'transformation')) +def test_non_negative_factorization_consistency(init, solver, regularization): # Test that the function is called in the same way, either directly # or through the NMF class rng = np.random.mtrand.RandomState(42) A = np.abs(rng.randn(10, 10)) A[:, 2 * np.arange(5)] = 0 - for init in ['random', 'nndsvd']: - for solver in ('cd', 'mu'): - for regularization in (None, 'both', - 'components', 'transformation'): - W_nmf, H, _ = non_negative_factorization( - A, init=init, solver=solver, - regularization=regularization, random_state=1, tol=1e-2) - W_nmf_2, _, _ = non_negative_factorization( - A, H=H, update_H=False, init=init, solver=solver, - regularization=regularization, random_state=1, tol=1e-2) - - model_class = NMF(init=init, solver=solver, - regularization=regularization, - random_state=1, tol=1e-2) - W_cls = model_class.fit_transform(A) - W_cls_2 = model_class.transform(A) - - assert_array_almost_equal(W_nmf, W_cls, decimal=10) - assert_array_almost_equal(W_nmf_2, W_cls_2, decimal=10) + W_nmf, H, _ = non_negative_factorization( + A, init=init, solver=solver, + regularization=regularization, random_state=1, tol=1e-2) + W_nmf_2, _, _ = non_negative_factorization( + A, H=H, update_H=False, init=init, solver=solver, + regularization=regularization, random_state=1, tol=1e-2) + + model_class = NMF(init=init, solver=solver, + regularization=regularization, + random_state=1, tol=1e-2) + W_cls = model_class.fit_transform(A) + W_cls_2 = model_class.transform(A) + + assert_array_almost_equal(W_nmf, W_cls, decimal=10) + assert_array_almost_equal(W_nmf_2, W_cls_2, decimal=10) def test_non_negative_factorization_checking(): From d28048d3be53ffc430ede108d766a3b850a2a8ff Mon Sep 17 00:00:00 2001 From: Bharat123rox Date: Wed, 15 Jul 2020 10:22:09 +0530 Subject: [PATCH 04/10] Change default value to None --- doc/modules/decomposition.rst | 2 +- sklearn/decomposition/_nmf.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/modules/decomposition.rst b/doc/modules/decomposition.rst index 330dc59c759ed..ed00b267812c1 100644 --- a/doc/modules/decomposition.rst +++ b/doc/modules/decomposition.rst @@ -752,7 +752,7 @@ and the regularized objective function is: + \frac{\alpha(1-\rho)}{2} ||W||_{\mathrm{Fro}} ^ 2 + \frac{\alpha(1-\rho)}{2} ||H||_{\mathrm{Fro}} ^ 2 -:class:`NMF` regularizes both W and H by default. The :attr:`regularization` +:class:`NMF` regularizes both W and H. The :attr:`regularization` parameter allows for finer control, with which only W, only H, or both can be regularized. diff --git a/sklearn/decomposition/_nmf.py b/sklearn/decomposition/_nmf.py index 27c5145780926..69617a317aa90 100644 --- a/sklearn/decomposition/_nmf.py +++ b/sklearn/decomposition/_nmf.py @@ -1166,7 +1166,7 @@ class NMF(TransformerMixin, BaseEstimator): regularization : {'both', 'components', 'transformation', None} Select whether the regularization affects the components (H), the transformation (W), both or none of them. - Defaults to 'both'. + Default: None. .. versionadded:: 0.24 @@ -1243,7 +1243,7 @@ class NMF(TransformerMixin, BaseEstimator): @_deprecate_positional_args def __init__(self, n_components=None, *, init=None, solver='cd', beta_loss='frobenius', tol=1e-4, max_iter=200, - regularization='both', random_state=None, alpha=0., + regularization=None, random_state=None, alpha=0., l1_ratio=0., verbose=0, shuffle=False): self.n_components = n_components self.init = init From 76dc605e21213807b578d83d60af9aa0cc359606 Mon Sep 17 00:00:00 2001 From: Bharat123rox Date: Fri, 17 Jul 2020 14:31:32 +0530 Subject: [PATCH 05/10] Revert back to default value of "both" --- doc/modules/decomposition.rst | 2 +- sklearn/decomposition/_nmf.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/modules/decomposition.rst b/doc/modules/decomposition.rst index ed00b267812c1..330dc59c759ed 100644 --- a/doc/modules/decomposition.rst +++ b/doc/modules/decomposition.rst @@ -752,7 +752,7 @@ and the regularized objective function is: + \frac{\alpha(1-\rho)}{2} ||W||_{\mathrm{Fro}} ^ 2 + \frac{\alpha(1-\rho)}{2} ||H||_{\mathrm{Fro}} ^ 2 -:class:`NMF` regularizes both W and H. The :attr:`regularization` +:class:`NMF` regularizes both W and H by default. The :attr:`regularization` parameter allows for finer control, with which only W, only H, or both can be regularized. diff --git a/sklearn/decomposition/_nmf.py b/sklearn/decomposition/_nmf.py index 69617a317aa90..27c5145780926 100644 --- a/sklearn/decomposition/_nmf.py +++ b/sklearn/decomposition/_nmf.py @@ -1166,7 +1166,7 @@ class NMF(TransformerMixin, BaseEstimator): regularization : {'both', 'components', 'transformation', None} Select whether the regularization affects the components (H), the transformation (W), both or none of them. - Default: None. + Defaults to 'both'. .. versionadded:: 0.24 @@ -1243,7 +1243,7 @@ class NMF(TransformerMixin, BaseEstimator): @_deprecate_positional_args def __init__(self, n_components=None, *, init=None, solver='cd', beta_loss='frobenius', tol=1e-4, max_iter=200, - regularization=None, random_state=None, alpha=0., + regularization='both', random_state=None, alpha=0., l1_ratio=0., verbose=0, shuffle=False): self.n_components = n_components self.init = init From 053c2b139ae942277ef1e62ddba87738c94a60ea Mon Sep 17 00:00:00 2001 From: Bharat Raghunathan Date: Wed, 5 Aug 2020 15:20:17 +0000 Subject: [PATCH 06/10] Update default value documentation acc to @thomasjpfan Co-authored-by: Thomas J. Fan --- sklearn/decomposition/_nmf.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/decomposition/_nmf.py b/sklearn/decomposition/_nmf.py index f5e2e1be25a0e..ba5907caed85c 100644 --- a/sklearn/decomposition/_nmf.py +++ b/sklearn/decomposition/_nmf.py @@ -1165,10 +1165,10 @@ class NMF(TransformerMixin, BaseEstimator): max_iter : integer, default: 200 Maximum number of iterations before timing out. - regularization : {'both', 'components', 'transformation', None} + regularization : {'both', 'components', 'transformation', None}, \ + default='both' Select whether the regularization affects the components (H), the transformation (W), both or none of them. - Defaults to 'both'. .. versionadded:: 0.24 From a10a3e5cc2667ec91ce5d18318327bfcb68ad074 Mon Sep 17 00:00:00 2001 From: "Thomas J. Fan" Date: Wed, 5 Aug 2020 11:37:10 -0400 Subject: [PATCH 07/10] CLN Places regularization at the end --- sklearn/decomposition/_nmf.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/sklearn/decomposition/_nmf.py b/sklearn/decomposition/_nmf.py index ba5907caed85c..7420f7ade90ff 100644 --- a/sklearn/decomposition/_nmf.py +++ b/sklearn/decomposition/_nmf.py @@ -1165,13 +1165,6 @@ class NMF(TransformerMixin, BaseEstimator): max_iter : integer, default: 200 Maximum number of iterations before timing out. - regularization : {'both', 'components', 'transformation', None}, \ - default='both' - Select whether the regularization affects the components (H), the - transformation (W), both or none of them. - - .. versionadded:: 0.24 - random_state : int, RandomState instance, default=None Used for initialisation (when ``init`` == 'nndsvdar' or 'random'), and in Coordinate Descent. Pass an int for reproducible @@ -1205,6 +1198,13 @@ class NMF(TransformerMixin, BaseEstimator): .. versionadded:: 0.17 *shuffle* parameter used in the Coordinate Descent solver. + regularization : {'both', 'components', 'transformation', None}, \ + default='both' + Select whether the regularization affects the components (H), the + transformation (W), both or none of them. + + .. versionadded:: 0.24 + Attributes ---------- components_ : array, [n_components, n_features] @@ -1245,20 +1245,20 @@ class NMF(TransformerMixin, BaseEstimator): @_deprecate_positional_args def __init__(self, n_components=None, *, init=None, solver='cd', beta_loss='frobenius', tol=1e-4, max_iter=200, - regularization='both', random_state=None, alpha=0., - l1_ratio=0., verbose=0, shuffle=False): + random_state=None, alpha=0., l1_ratio=0., verbose=0, + shuffle=False, regularization='both'): self.n_components = n_components self.init = init self.solver = solver self.beta_loss = beta_loss self.tol = tol self.max_iter = max_iter - self.regularization = regularization self.random_state = random_state self.alpha = alpha self.l1_ratio = l1_ratio self.verbose = verbose self.shuffle = shuffle + self.regularization = regularization def _more_tags(self): return {'requires_positive_X': True} From 9af99afd3ef52dbfc77f638520d4b1ea4dbd04f9 Mon Sep 17 00:00:00 2001 From: Bharat123rox Date: Wed, 5 Aug 2020 22:22:55 +0530 Subject: [PATCH 08/10] Add whatsnew entry --- doc/whats_new/v0.24.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/doc/whats_new/v0.24.rst b/doc/whats_new/v0.24.rst index fb02533193444..4ea2f90b4bd58 100644 --- a/doc/whats_new/v0.24.rst +++ b/doc/whats_new/v0.24.rst @@ -142,6 +142,12 @@ Changelog for large ``batch_size`` and ``n_samples`` values.` :pr:`17985` by :user:`Alan Butler ` and :user:`Amanda Dsouza `. +- |Enhancement| :class:`decomposition.NMF` now supports the optional parameter + `regularization`, which can take the values `None`, `components`, + `transformation` or `both`, in accordance with + :func:`decomposition.NMF.non_negative_factorization`. + :pr:`17414` by :user:`Bharat Raghunathan `. + :mod:`sklearn.ensemble` ....................... From d56dbf74d6b57ec8e265f68af3ad87d92e7dc079 Mon Sep 17 00:00:00 2001 From: "Thomas J. Fan" Date: Thu, 6 Aug 2020 11:43:39 -0400 Subject: [PATCH 09/10] DOC Fix --- sklearn/decomposition/_nmf.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sklearn/decomposition/_nmf.py b/sklearn/decomposition/_nmf.py index 7420f7ade90ff..8f2c521e5afba 100644 --- a/sklearn/decomposition/_nmf.py +++ b/sklearn/decomposition/_nmf.py @@ -1198,8 +1198,7 @@ class NMF(TransformerMixin, BaseEstimator): .. versionadded:: 0.17 *shuffle* parameter used in the Coordinate Descent solver. - regularization : {'both', 'components', 'transformation', None}, \ - default='both' + regularization : {'both', 'components', 'transformation', None}, default='both' Select whether the regularization affects the components (H), the transformation (W), both or none of them. From 3b299be1c322edf9256580e35d2246b8fe0d77b1 Mon Sep 17 00:00:00 2001 From: "Thomas J. Fan" Date: Thu, 6 Aug 2020 11:48:18 -0400 Subject: [PATCH 10/10] DOC Fix --- doc/whats_new/v0.24.rst | 4 ---- sklearn/decomposition/_nmf.py | 9 +++++---- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/doc/whats_new/v0.24.rst b/doc/whats_new/v0.24.rst index 24c4b183565c9..c46bb51793ad1 100644 --- a/doc/whats_new/v0.24.rst +++ b/doc/whats_new/v0.24.rst @@ -114,10 +114,6 @@ Changelog argument `rotation`, which can take the value `None`, `'varimax'` or `'quartimax'.` :pr:`11064` by :user:`Jona Sassenhagen `. -- |Fix| Avoid overflows on Windows in :func:`decomposition.IncrementalPCA.partial_fit` - for large ``batch_size`` and ``n_samples`` values.` - :pr:`17985` by :user:`Alan Butler ` and :user:`Amanda Dsouza `. - - |Enhancement| :class:`decomposition.NMF` now supports the optional parameter `regularization`, which can take the values `None`, `components`, `transformation` or `both`, in accordance with diff --git a/sklearn/decomposition/_nmf.py b/sklearn/decomposition/_nmf.py index 8f2c521e5afba..ebc905a7fbcb3 100644 --- a/sklearn/decomposition/_nmf.py +++ b/sklearn/decomposition/_nmf.py @@ -1081,7 +1081,7 @@ def non_negative_factorization(X, W=None, H=None, n_components=None, *, class NMF(TransformerMixin, BaseEstimator): - r"""Non-Negative Matrix Factorization (NMF) + """Non-Negative Matrix Factorization (NMF) Find two non-negative matrices (W, H) whose product approximates the non- negative matrix X. This factorization can be used for example for @@ -1097,8 +1097,8 @@ class NMF(TransformerMixin, BaseEstimator): Where:: - ||A||_Fro^2 = \sum_{i,j} A_{ij}^2 (Frobenius norm) - ||vec(A)||_1 = \sum_{i,j} abs(A_{ij}) (Elementwise L1 norm) + ||A||_Fro^2 = \\sum_{i,j} A_{ij}^2 (Frobenius norm) + ||vec(A)||_1 = \\sum_{i,j} abs(A_{ij}) (Elementwise L1 norm) For multiplicative-update ('mu') solver, the Frobenius norm (0.5 * ||X - WH||_Fro^2) can be changed into another beta-divergence loss, @@ -1198,7 +1198,8 @@ class NMF(TransformerMixin, BaseEstimator): .. versionadded:: 0.17 *shuffle* parameter used in the Coordinate Descent solver. - regularization : {'both', 'components', 'transformation', None}, default='both' + regularization : {'both', 'components', 'transformation', None}, \ + default='both' Select whether the regularization affects the components (H), the transformation (W), both or none of them.