From ff3349b964d563383ac5196b08e185617b3e2579 Mon Sep 17 00:00:00 2001
From: Bharat123rox <bharatraghunthan9767@gmail.com>
Date: Tue, 2 Jun 2020 01:05:35 +0530
Subject: [PATCH 01/10] ENH: Add regularization to the main NMF class

---
 doc/modules/decomposition.rst           |  6 +-
 sklearn/decomposition/_nmf.py           | 20 ++++--
 sklearn/decomposition/tests/test_nmf.py | 84 ++++++++++++++++---------
 3 files changed, 70 insertions(+), 40 deletions(-)

diff --git a/doc/modules/decomposition.rst b/doc/modules/decomposition.rst
index def755f42b796..330dc59c759ed 100644
--- a/doc/modules/decomposition.rst
+++ b/doc/modules/decomposition.rst
@@ -752,9 +752,9 @@ and the regularized objective function is:
     + \frac{\alpha(1-\rho)}{2} ||W||_{\mathrm{Fro}} ^ 2
     + \frac{\alpha(1-\rho)}{2} ||H||_{\mathrm{Fro}} ^ 2
 
-:class:`NMF` regularizes both W and H. The public function
-:func:`non_negative_factorization` allows a finer control through the
-:attr:`regularization` attribute, and may regularize only W, only H, or both.
+:class:`NMF` regularizes both W and H by default. The :attr:`regularization`
+parameter allows for finer control, with which only W, only H,
+or both can be regularized.
 
 NMF with a beta-divergence
 --------------------------
diff --git a/sklearn/decomposition/_nmf.py b/sklearn/decomposition/_nmf.py
index f1385d21596e3..0c6846bb42b74 100644
--- a/sklearn/decomposition/_nmf.py
+++ b/sklearn/decomposition/_nmf.py
@@ -1163,6 +1163,12 @@ class NMF(TransformerMixin, BaseEstimator):
     max_iter : integer, default: 200
         Maximum number of iterations before timing out.
 
+    regularization : 'both' | 'components' | 'transformation' | None
+        Select whether the regularization affects the components (H), the
+        transformation (W), both or none of them. Defaults to 'both'.
+
+        .. versionadded:: 0.24
+
     random_state : int, RandomState instance, default=None
         Used for initialisation (when ``init`` == 'nndsvdar' or
         'random'), and in Coordinate Descent. Pass an int for reproducible
@@ -1236,14 +1242,15 @@ class NMF(TransformerMixin, BaseEstimator):
     @_deprecate_positional_args
     def __init__(self, n_components=None, *, init=None, solver='cd',
                  beta_loss='frobenius', tol=1e-4, max_iter=200,
-                 random_state=None, alpha=0., l1_ratio=0., verbose=0,
-                 shuffle=False):
+                 regularization='both', random_state=None, alpha=0.,
+                 l1_ratio=0., verbose=0, shuffle=False):
         self.n_components = n_components
         self.init = init
         self.solver = solver
         self.beta_loss = beta_loss
         self.tol = tol
         self.max_iter = max_iter
+        self.regularization = regularization
         self.random_state = random_state
         self.alpha = alpha
         self.l1_ratio = l1_ratio
@@ -1283,7 +1290,7 @@ def fit_transform(self, X, y=None, W=None, H=None):
             X=X, W=W, H=H, n_components=self.n_components, init=self.init,
             update_H=True, solver=self.solver, beta_loss=self.beta_loss,
             tol=self.tol, max_iter=self.max_iter, alpha=self.alpha,
-            l1_ratio=self.l1_ratio, regularization='both',
+            l1_ratio=self.l1_ratio, regularization=self.regularization,
             random_state=self.random_state, verbose=self.verbose,
             shuffle=self.shuffle)
 
@@ -1332,9 +1339,10 @@ def transform(self, X):
             X=X, W=None, H=self.components_, n_components=self.n_components_,
             init=self.init, update_H=False, solver=self.solver,
             beta_loss=self.beta_loss, tol=self.tol, max_iter=self.max_iter,
-            alpha=self.alpha, l1_ratio=self.l1_ratio, regularization='both',
-            random_state=self.random_state, verbose=self.verbose,
-            shuffle=self.shuffle)
+            alpha=self.alpha, l1_ratio=self.l1_ratio,
+            regularization=self.regularization,
+            random_state=self.random_state,
+            verbose=self.verbose, shuffle=self.shuffle)
 
         return W
 
diff --git a/sklearn/decomposition/tests/test_nmf.py b/sklearn/decomposition/tests/test_nmf.py
index a7ef1243d8e25..50b0e180e36af 100644
--- a/sklearn/decomposition/tests/test_nmf.py
+++ b/sklearn/decomposition/tests/test_nmf.py
@@ -20,12 +20,14 @@
 
 
 @pytest.mark.parametrize('solver', ['cd', 'mu'])
-def test_convergence_warning(solver):
+@pytest.mark.parametrize('regularization',
+                         [None, 'both', 'components', 'transformation'])
+def test_convergence_warning(solver, regularization):
     convergence_warning = ("Maximum number of iterations 1 reached. "
                            "Increase it to improve convergence.")
     A = np.ones((2, 2))
     with pytest.warns(ConvergenceWarning, match=convergence_warning):
-        NMF(solver=solver, max_iter=1).fit(A)
+        NMF(solver=solver, regularization=regularization, max_iter=1).fit(A)
 
 
 def test_initialize_nn_output():
@@ -44,6 +46,8 @@ def test_parameter_checking():
     assert_raise_message(ValueError, msg, NMF(solver=name).fit, A)
     msg = "Invalid init parameter: got 'spam' instead of one of"
     assert_raise_message(ValueError, msg, NMF(init=name).fit, A)
+    msg = "Invalid regularization parameter: got 'spam' instead of one of"
+    assert_raise_message(ValueError, msg, NMF(regularization=name).fit, A)
     msg = "Invalid beta_loss parameter: got 'spam' instead of one"
     assert_raise_message(ValueError, msg, NMF(solver='mu',
                                               beta_loss=name).fit, A)
@@ -103,30 +107,36 @@ def test_nmf_fit_nn_output():
               5. + np.arange(1, 6)]
     for solver in ('cd', 'mu'):
         for init in (None, 'nndsvd', 'nndsvda', 'nndsvdar', 'random'):
-            model = NMF(n_components=2, solver=solver, init=init,
-                        random_state=0)
-            transf = model.fit_transform(A)
-            assert not((model.components_ < 0).any() or
-                       (transf < 0).any())
+            for regularization in (None, 'both',
+                                   'components', 'transformation'):
+                model = NMF(n_components=2, solver=solver, init=init,
+                            regularization=regularization, random_state=0)
+                transf = model.fit_transform(A)
+                assert not((model.components_ < 0).any() or
+                           (transf < 0).any())
 
 
 @pytest.mark.parametrize('solver', ('cd', 'mu'))
-def test_nmf_fit_close(solver):
+@pytest.mark.parametrize('regularization',
+                         (None, 'both', 'components', 'transformation'))
+def test_nmf_fit_close(solver, regularization):
     rng = np.random.mtrand.RandomState(42)
     # Test that the fit is not too far away
     pnmf = NMF(5, solver=solver, init='nndsvdar', random_state=0,
-               max_iter=600)
+               regularization=regularization, max_iter=600)
     X = np.abs(rng.randn(6, 5))
     assert pnmf.fit(X).reconstruction_err_ < 0.1
 
 
 @pytest.mark.parametrize('solver', ('cd', 'mu'))
-def test_nmf_transform(solver):
+@pytest.mark.parametrize('regularization',
+                         (None, 'both', 'components', 'transformation'))
+def test_nmf_transform(solver, regularization):
     # Test that NMF.transform returns close values
     rng = np.random.mtrand.RandomState(42)
     A = np.abs(rng.randn(6, 5))
     m = NMF(solver=solver, n_components=3, init='random',
-            random_state=0, tol=1e-5)
+            regularization=regularization, random_state=0, tol=1e-5)
     ft = m.fit_transform(A)
     t = m.transform(A)
     assert_array_almost_equal(ft, t, decimal=2)
@@ -148,12 +158,14 @@ def test_nmf_transform_custom_init():
 
 
 @pytest.mark.parametrize('solver', ('cd', 'mu'))
-def test_nmf_inverse_transform(solver):
+@pytest.mark.parametrize('regularization',
+                         (None, 'both', 'components', 'transformation'))
+def test_nmf_inverse_transform(solver, regularization):
     # Test that NMF.inverse_transform returns close values
     random_state = np.random.RandomState(0)
     A = np.abs(random_state.randn(6, 4))
     m = NMF(solver=solver, n_components=4, init='random', random_state=0,
-            max_iter=1000)
+            regularization=regularization, max_iter=1000)
     ft = m.fit_transform(A)
     A_new = m.inverse_transform(ft)
     assert_array_almost_equal(A, A_new, decimal=2)
@@ -176,9 +188,11 @@ def test_nmf_sparse_input():
     A_sparse = csc_matrix(A)
 
     for solver in ('cd', 'mu'):
-        est1 = NMF(solver=solver, n_components=5, init='random',
-                   random_state=0, tol=1e-2)
-        est2 = clone(est1)
+        for regularization in (None, 'both', 'components', 'transformation'):
+            est1 = NMF(solver=solver, n_components=5, init='random',
+                       regularization=regularization, random_state=0,
+                       tol=1e-2)
+            est2 = clone(est1)
 
     W1 = est1.fit_transform(A)
     W2 = est2.fit_transform(A_sparse)
@@ -213,16 +227,20 @@ def test_non_negative_factorization_consistency():
 
     for init in ['random', 'nndsvd']:
         for solver in ('cd', 'mu'):
-            W_nmf, H, _ = non_negative_factorization(
-                A, init=init, solver=solver, random_state=1, tol=1e-2)
-            W_nmf_2, _, _ = non_negative_factorization(
-                A, H=H, update_H=False, init=init, solver=solver,
-                random_state=1, tol=1e-2)
-
-            model_class = NMF(init=init, solver=solver, random_state=1,
-                              tol=1e-2)
-            W_cls = model_class.fit_transform(A)
-            W_cls_2 = model_class.transform(A)
+            for regularization in (None, 'both',
+                                   'components', 'transformation'):
+                W_nmf, H, _ = non_negative_factorization(
+                    A, init=init, solver=solver,
+                    regularization=regularization, random_state=1, tol=1e-2)
+                W_nmf_2, _, _ = non_negative_factorization(
+                    A, H=H, update_H=False, init=init, solver=solver,
+                    regularization=regularization, random_state=1, tol=1e-2)
+
+                model_class = NMF(init=init, solver=solver,
+                                  regularization=regularization,
+                                  random_state=1, tol=1e-2)
+                W_cls = model_class.fit_transform(A)
+                W_cls_2 = model_class.transform(A)
 
             assert_array_almost_equal(W_nmf, W_cls, decimal=10)
             assert_array_almost_equal(W_nmf_2, W_cls_2, decimal=10)
@@ -515,11 +533,13 @@ def test_nmf_underflow():
     (np.int32, np.float64),
     (np.int64, np.float64)])
 @pytest.mark.parametrize("solver", ["cd", "mu"])
-def test_nmf_dtype_match(dtype_in, dtype_out, solver):
+@pytest.mark.parametrize("regularization",
+                         (None, "both", "components", "transformation"))
+def test_nmf_dtype_match(dtype_in, dtype_out, solver, regularization):
     # Check that NMF preserves dtype (float32 and float64)
     X = np.random.RandomState(0).randn(20, 15).astype(dtype_in, copy=False)
     np.abs(X, out=X)
-    nmf = NMF(solver=solver)
+    nmf = NMF(solver=solver, regularization=regularization)
 
     assert nmf.fit(X).transform(X).dtype == dtype_out
     assert nmf.fit_transform(X).dtype == dtype_out
@@ -527,13 +547,15 @@ def test_nmf_dtype_match(dtype_in, dtype_out, solver):
 
 
 @pytest.mark.parametrize("solver", ["cd", "mu"])
-def test_nmf_float32_float64_consistency(solver):
+@pytest.mark.parametrize("regularization",
+                         (None, "both", "components", "transformation"))
+def test_nmf_float32_float64_consistency(solver, regularization):
     # Check that the result of NMF is the same between float32 and float64
     X = np.random.RandomState(0).randn(50, 7)
     np.abs(X, out=X)
-    nmf32 = NMF(solver=solver, random_state=0)
+    nmf32 = NMF(solver=solver, regularization=regularization, random_state=0)
     W32 = nmf32.fit_transform(X.astype(np.float32))
-    nmf64 = NMF(solver=solver, random_state=0)
+    nmf64 = NMF(solver=solver, regularization=regularization, random_state=0)
     W64 = nmf64.fit_transform(X)
 
     assert_allclose(W32, W64, rtol=1e-6, atol=1e-5)

From 8033bbcced0c8db300f33900142a1245484685c0 Mon Sep 17 00:00:00 2001
From: Bharat Raghunathan <bharatraghunthan9767@gmail.com>
Date: Tue, 14 Jul 2020 17:30:52 +0000
Subject: [PATCH 02/10] Update _nmf with suggestions from code review

Update `_nmf.py` with suggestion from review

Co-authored-by: Adrin Jalali <adrin.jalali@gmail.com>
---
 sklearn/decomposition/_nmf.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/decomposition/_nmf.py b/sklearn/decomposition/_nmf.py
index 0c6846bb42b74..296341145844d 100644
--- a/sklearn/decomposition/_nmf.py
+++ b/sklearn/decomposition/_nmf.py
@@ -1163,7 +1163,7 @@ class NMF(TransformerMixin, BaseEstimator):
     max_iter : integer, default: 200
         Maximum number of iterations before timing out.
 
-    regularization : 'both' | 'components' | 'transformation' | None
+    regularization : {'both', 'components', 'transformation', None}, default=None
         Select whether the regularization affects the components (H), the
         transformation (W), both or none of them. Defaults to 'both'.
 

From 83c3c14037ed439c61bd16afae62f87f7bd0db36 Mon Sep 17 00:00:00 2001
From: Bharat123rox <bharatraghunthan9767@gmail.com>
Date: Wed, 15 Jul 2020 00:03:54 +0530
Subject: [PATCH 03/10] Refactor tests, fix linter errors

---
 sklearn/decomposition/_nmf.py           |  5 +-
 sklearn/decomposition/tests/test_nmf.py | 61 +++++++++++++------------
 2 files changed, 34 insertions(+), 32 deletions(-)

diff --git a/sklearn/decomposition/_nmf.py b/sklearn/decomposition/_nmf.py
index 296341145844d..27c5145780926 100644
--- a/sklearn/decomposition/_nmf.py
+++ b/sklearn/decomposition/_nmf.py
@@ -1163,9 +1163,10 @@ class NMF(TransformerMixin, BaseEstimator):
     max_iter : integer, default: 200
         Maximum number of iterations before timing out.
 
-    regularization : {'both', 'components', 'transformation', None}, default=None
+    regularization : {'both', 'components', 'transformation', None}
         Select whether the regularization affects the components (H), the
-        transformation (W), both or none of them. Defaults to 'both'.
+        transformation (W), both or none of them.
+        Defaults to 'both'.
 
         .. versionadded:: 0.24
 
diff --git a/sklearn/decomposition/tests/test_nmf.py b/sklearn/decomposition/tests/test_nmf.py
index 50b0e180e36af..466dc9d0f03f9 100644
--- a/sklearn/decomposition/tests/test_nmf.py
+++ b/sklearn/decomposition/tests/test_nmf.py
@@ -101,19 +101,20 @@ def test_initialize_variants():
 
 # ignore UserWarning raised when both solver='mu' and init='nndsvd'
 @ignore_warnings(category=UserWarning)
-def test_nmf_fit_nn_output():
+@pytest.mark.parametrize('solver', ('cd', 'mu'))
+@pytest.mark.parametrize('init',
+                         (None, 'nndsvd', 'nndsvda', 'nndsvdar', 'random'))
+@pytest.mark.parametrize('regularization',
+                         (None, 'both', 'components', 'transformation'))
+def test_nmf_fit_nn_output(solver, init, regularization):
     # Test that the decomposition does not contain negative values
     A = np.c_[5. - np.arange(1, 6),
               5. + np.arange(1, 6)]
-    for solver in ('cd', 'mu'):
-        for init in (None, 'nndsvd', 'nndsvda', 'nndsvdar', 'random'):
-            for regularization in (None, 'both',
-                                   'components', 'transformation'):
-                model = NMF(n_components=2, solver=solver, init=init,
-                            regularization=regularization, random_state=0)
-                transf = model.fit_transform(A)
-                assert not((model.components_ < 0).any() or
-                           (transf < 0).any())
+    model = NMF(n_components=2, solver=solver, init=init,
+                regularization=regularization, random_state=0)
+    transf = model.fit_transform(A)
+    assert not((model.components_ < 0).any() or
+               (transf < 0).any())
 
 
 @pytest.mark.parametrize('solver', ('cd', 'mu'))
@@ -218,32 +219,32 @@ def test_nmf_sparse_transform():
         assert_array_almost_equal(A_fit_tr, A_tr, decimal=1)
 
 
-def test_non_negative_factorization_consistency():
+@pytest.mark.parametrize('init', ['random', 'nndsvd'])
+@pytest.mark.parametrize('solver', ('cd', 'mu'))
+@pytest.mark.parametrize('regularization',
+                         (None, 'both', 'components', 'transformation'))
+def test_non_negative_factorization_consistency(init, solver, regularization):
     # Test that the function is called in the same way, either directly
     # or through the NMF class
     rng = np.random.mtrand.RandomState(42)
     A = np.abs(rng.randn(10, 10))
     A[:, 2 * np.arange(5)] = 0
 
-    for init in ['random', 'nndsvd']:
-        for solver in ('cd', 'mu'):
-            for regularization in (None, 'both',
-                                   'components', 'transformation'):
-                W_nmf, H, _ = non_negative_factorization(
-                    A, init=init, solver=solver,
-                    regularization=regularization, random_state=1, tol=1e-2)
-                W_nmf_2, _, _ = non_negative_factorization(
-                    A, H=H, update_H=False, init=init, solver=solver,
-                    regularization=regularization, random_state=1, tol=1e-2)
-
-                model_class = NMF(init=init, solver=solver,
-                                  regularization=regularization,
-                                  random_state=1, tol=1e-2)
-                W_cls = model_class.fit_transform(A)
-                W_cls_2 = model_class.transform(A)
-
-            assert_array_almost_equal(W_nmf, W_cls, decimal=10)
-            assert_array_almost_equal(W_nmf_2, W_cls_2, decimal=10)
+    W_nmf, H, _ = non_negative_factorization(
+        A, init=init, solver=solver,
+        regularization=regularization, random_state=1, tol=1e-2)
+    W_nmf_2, _, _ = non_negative_factorization(
+        A, H=H, update_H=False, init=init, solver=solver,
+        regularization=regularization, random_state=1, tol=1e-2)
+
+    model_class = NMF(init=init, solver=solver,
+                      regularization=regularization,
+                      random_state=1, tol=1e-2)
+    W_cls = model_class.fit_transform(A)
+    W_cls_2 = model_class.transform(A)
+
+    assert_array_almost_equal(W_nmf, W_cls, decimal=10)
+    assert_array_almost_equal(W_nmf_2, W_cls_2, decimal=10)
 
 
 def test_non_negative_factorization_checking():

From d28048d3be53ffc430ede108d766a3b850a2a8ff Mon Sep 17 00:00:00 2001
From: Bharat123rox <bharatraghunthan9767@gmail.com>
Date: Wed, 15 Jul 2020 10:22:09 +0530
Subject: [PATCH 04/10] Change default value to None

---
 doc/modules/decomposition.rst | 2 +-
 sklearn/decomposition/_nmf.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/doc/modules/decomposition.rst b/doc/modules/decomposition.rst
index 330dc59c759ed..ed00b267812c1 100644
--- a/doc/modules/decomposition.rst
+++ b/doc/modules/decomposition.rst
@@ -752,7 +752,7 @@ and the regularized objective function is:
     + \frac{\alpha(1-\rho)}{2} ||W||_{\mathrm{Fro}} ^ 2
     + \frac{\alpha(1-\rho)}{2} ||H||_{\mathrm{Fro}} ^ 2
 
-:class:`NMF` regularizes both W and H by default. The :attr:`regularization`
+:class:`NMF` regularizes both W and H. The :attr:`regularization`
 parameter allows for finer control, with which only W, only H,
 or both can be regularized.
 
diff --git a/sklearn/decomposition/_nmf.py b/sklearn/decomposition/_nmf.py
index 27c5145780926..69617a317aa90 100644
--- a/sklearn/decomposition/_nmf.py
+++ b/sklearn/decomposition/_nmf.py
@@ -1166,7 +1166,7 @@ class NMF(TransformerMixin, BaseEstimator):
     regularization : {'both', 'components', 'transformation', None}
         Select whether the regularization affects the components (H), the
         transformation (W), both or none of them.
-        Defaults to 'both'.
+        Default: None.
 
         .. versionadded:: 0.24
 
@@ -1243,7 +1243,7 @@ class NMF(TransformerMixin, BaseEstimator):
     @_deprecate_positional_args
     def __init__(self, n_components=None, *, init=None, solver='cd',
                  beta_loss='frobenius', tol=1e-4, max_iter=200,
-                 regularization='both', random_state=None, alpha=0.,
+                 regularization=None, random_state=None, alpha=0.,
                  l1_ratio=0., verbose=0, shuffle=False):
         self.n_components = n_components
         self.init = init

From 76dc605e21213807b578d83d60af9aa0cc359606 Mon Sep 17 00:00:00 2001
From: Bharat123rox <bharatraghunthan9767@gmail.com>
Date: Fri, 17 Jul 2020 14:31:32 +0530
Subject: [PATCH 05/10] Revert back to default value of "both"

---
 doc/modules/decomposition.rst | 2 +-
 sklearn/decomposition/_nmf.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/doc/modules/decomposition.rst b/doc/modules/decomposition.rst
index ed00b267812c1..330dc59c759ed 100644
--- a/doc/modules/decomposition.rst
+++ b/doc/modules/decomposition.rst
@@ -752,7 +752,7 @@ and the regularized objective function is:
     + \frac{\alpha(1-\rho)}{2} ||W||_{\mathrm{Fro}} ^ 2
     + \frac{\alpha(1-\rho)}{2} ||H||_{\mathrm{Fro}} ^ 2
 
-:class:`NMF` regularizes both W and H. The :attr:`regularization`
+:class:`NMF` regularizes both W and H by default. The :attr:`regularization`
 parameter allows for finer control, with which only W, only H,
 or both can be regularized.
 
diff --git a/sklearn/decomposition/_nmf.py b/sklearn/decomposition/_nmf.py
index 69617a317aa90..27c5145780926 100644
--- a/sklearn/decomposition/_nmf.py
+++ b/sklearn/decomposition/_nmf.py
@@ -1166,7 +1166,7 @@ class NMF(TransformerMixin, BaseEstimator):
     regularization : {'both', 'components', 'transformation', None}
         Select whether the regularization affects the components (H), the
         transformation (W), both or none of them.
-        Default: None.
+        Defaults to 'both'.
 
         .. versionadded:: 0.24
 
@@ -1243,7 +1243,7 @@ class NMF(TransformerMixin, BaseEstimator):
     @_deprecate_positional_args
     def __init__(self, n_components=None, *, init=None, solver='cd',
                  beta_loss='frobenius', tol=1e-4, max_iter=200,
-                 regularization=None, random_state=None, alpha=0.,
+                 regularization='both', random_state=None, alpha=0.,
                  l1_ratio=0., verbose=0, shuffle=False):
         self.n_components = n_components
         self.init = init

From 053c2b139ae942277ef1e62ddba87738c94a60ea Mon Sep 17 00:00:00 2001
From: Bharat Raghunathan <bharatraghunthan9767@gmail.com>
Date: Wed, 5 Aug 2020 15:20:17 +0000
Subject: [PATCH 06/10] Update default value documentation acc to @thomasjpfan

Co-authored-by: Thomas J. Fan <thomasjpfan@gmail.com>
---
 sklearn/decomposition/_nmf.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/decomposition/_nmf.py b/sklearn/decomposition/_nmf.py
index f5e2e1be25a0e..ba5907caed85c 100644
--- a/sklearn/decomposition/_nmf.py
+++ b/sklearn/decomposition/_nmf.py
@@ -1165,10 +1165,10 @@ class NMF(TransformerMixin, BaseEstimator):
     max_iter : integer, default: 200
         Maximum number of iterations before timing out.
 
-    regularization : {'both', 'components', 'transformation', None}
+    regularization : {'both', 'components', 'transformation', None}, \
+    				 default='both'
         Select whether the regularization affects the components (H), the
         transformation (W), both or none of them.
-        Defaults to 'both'.
 
         .. versionadded:: 0.24
 

From a10a3e5cc2667ec91ce5d18318327bfcb68ad074 Mon Sep 17 00:00:00 2001
From: "Thomas J. Fan" <thomasjpfan@gmail.com>
Date: Wed, 5 Aug 2020 11:37:10 -0400
Subject: [PATCH 07/10] CLN Places regularization at the end

---
 sklearn/decomposition/_nmf.py | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/sklearn/decomposition/_nmf.py b/sklearn/decomposition/_nmf.py
index ba5907caed85c..7420f7ade90ff 100644
--- a/sklearn/decomposition/_nmf.py
+++ b/sklearn/decomposition/_nmf.py
@@ -1165,13 +1165,6 @@ class NMF(TransformerMixin, BaseEstimator):
     max_iter : integer, default: 200
         Maximum number of iterations before timing out.
 
-    regularization : {'both', 'components', 'transformation', None}, \
-    				 default='both'
-        Select whether the regularization affects the components (H), the
-        transformation (W), both or none of them.
-
-        .. versionadded:: 0.24
-
     random_state : int, RandomState instance, default=None
         Used for initialisation (when ``init`` == 'nndsvdar' or
         'random'), and in Coordinate Descent. Pass an int for reproducible
@@ -1205,6 +1198,13 @@ class NMF(TransformerMixin, BaseEstimator):
         .. versionadded:: 0.17
            *shuffle* parameter used in the Coordinate Descent solver.
 
+    regularization : {'both', 'components', 'transformation', None}, \
+                     default='both'
+        Select whether the regularization affects the components (H), the
+        transformation (W), both or none of them.
+
+        .. versionadded:: 0.24
+
     Attributes
     ----------
     components_ : array, [n_components, n_features]
@@ -1245,20 +1245,20 @@ class NMF(TransformerMixin, BaseEstimator):
     @_deprecate_positional_args
     def __init__(self, n_components=None, *, init=None, solver='cd',
                  beta_loss='frobenius', tol=1e-4, max_iter=200,
-                 regularization='both', random_state=None, alpha=0.,
-                 l1_ratio=0., verbose=0, shuffle=False):
+                 random_state=None, alpha=0., l1_ratio=0., verbose=0,
+                 shuffle=False, regularization='both'):
         self.n_components = n_components
         self.init = init
         self.solver = solver
         self.beta_loss = beta_loss
         self.tol = tol
         self.max_iter = max_iter
-        self.regularization = regularization
         self.random_state = random_state
         self.alpha = alpha
         self.l1_ratio = l1_ratio
         self.verbose = verbose
         self.shuffle = shuffle
+        self.regularization = regularization
 
     def _more_tags(self):
         return {'requires_positive_X': True}

From 9af99afd3ef52dbfc77f638520d4b1ea4dbd04f9 Mon Sep 17 00:00:00 2001
From: Bharat123rox <bharatraghunthan9767@gmail.com>
Date: Wed, 5 Aug 2020 22:22:55 +0530
Subject: [PATCH 08/10] Add whatsnew entry

---
 doc/whats_new/v0.24.rst | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/doc/whats_new/v0.24.rst b/doc/whats_new/v0.24.rst
index fb02533193444..4ea2f90b4bd58 100644
--- a/doc/whats_new/v0.24.rst
+++ b/doc/whats_new/v0.24.rst
@@ -142,6 +142,12 @@ Changelog
   for large ``batch_size`` and ``n_samples`` values.`
   :pr:`17985` by :user:`Alan Butler <aldee153>` and :user:`Amanda Dsouza <amy12xx>`.
 
+- |Enhancement| :class:`decomposition.NMF` now supports the optional parameter
+  `regularization`, which can take the values `None`, `components`,
+  `transformation` or `both`, in accordance with
+  :func:`decomposition.NMF.non_negative_factorization`.
+  :pr:`17414` by :user:`Bharat Raghunathan <Bharat123rox>`.
+
 :mod:`sklearn.ensemble`
 .......................
 

From d56dbf74d6b57ec8e265f68af3ad87d92e7dc079 Mon Sep 17 00:00:00 2001
From: "Thomas J. Fan" <thomasjpfan@gmail.com>
Date: Thu, 6 Aug 2020 11:43:39 -0400
Subject: [PATCH 09/10] DOC Fix

---
 sklearn/decomposition/_nmf.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/sklearn/decomposition/_nmf.py b/sklearn/decomposition/_nmf.py
index 7420f7ade90ff..8f2c521e5afba 100644
--- a/sklearn/decomposition/_nmf.py
+++ b/sklearn/decomposition/_nmf.py
@@ -1198,8 +1198,7 @@ class NMF(TransformerMixin, BaseEstimator):
         .. versionadded:: 0.17
            *shuffle* parameter used in the Coordinate Descent solver.
 
-    regularization : {'both', 'components', 'transformation', None}, \
-                     default='both'
+    regularization : {'both', 'components', 'transformation', None}, default='both'
         Select whether the regularization affects the components (H), the
         transformation (W), both or none of them.
 

From 3b299be1c322edf9256580e35d2246b8fe0d77b1 Mon Sep 17 00:00:00 2001
From: "Thomas J. Fan" <thomasjpfan@gmail.com>
Date: Thu, 6 Aug 2020 11:48:18 -0400
Subject: [PATCH 10/10] DOC Fix

---
 doc/whats_new/v0.24.rst       | 4 ----
 sklearn/decomposition/_nmf.py | 9 +++++----
 2 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/doc/whats_new/v0.24.rst b/doc/whats_new/v0.24.rst
index 24c4b183565c9..c46bb51793ad1 100644
--- a/doc/whats_new/v0.24.rst
+++ b/doc/whats_new/v0.24.rst
@@ -114,10 +114,6 @@ Changelog
   argument `rotation`, which can take the value `None`, `'varimax'` or `'quartimax'.`
   :pr:`11064` by :user:`Jona Sassenhagen <jona-sassenhagen>`.
 
-- |Fix| Avoid overflows on Windows in :func:`decomposition.IncrementalPCA.partial_fit`
-  for large ``batch_size`` and ``n_samples`` values.`
-  :pr:`17985` by :user:`Alan Butler <aldee153>` and :user:`Amanda Dsouza <amy12xx>`.
-
 - |Enhancement| :class:`decomposition.NMF` now supports the optional parameter
   `regularization`, which can take the values `None`, `components`,
   `transformation` or `both`, in accordance with
diff --git a/sklearn/decomposition/_nmf.py b/sklearn/decomposition/_nmf.py
index 8f2c521e5afba..ebc905a7fbcb3 100644
--- a/sklearn/decomposition/_nmf.py
+++ b/sklearn/decomposition/_nmf.py
@@ -1081,7 +1081,7 @@ def non_negative_factorization(X, W=None, H=None, n_components=None, *,
 
 
 class NMF(TransformerMixin, BaseEstimator):
-    r"""Non-Negative Matrix Factorization (NMF)
+    """Non-Negative Matrix Factorization (NMF)
 
     Find two non-negative matrices (W, H) whose product approximates the non-
     negative matrix X. This factorization can be used for example for
@@ -1097,8 +1097,8 @@ class NMF(TransformerMixin, BaseEstimator):
 
     Where::
 
-        ||A||_Fro^2 = \sum_{i,j} A_{ij}^2 (Frobenius norm)
-        ||vec(A)||_1 = \sum_{i,j} abs(A_{ij}) (Elementwise L1 norm)
+        ||A||_Fro^2 = \\sum_{i,j} A_{ij}^2 (Frobenius norm)
+        ||vec(A)||_1 = \\sum_{i,j} abs(A_{ij}) (Elementwise L1 norm)
 
     For multiplicative-update ('mu') solver, the Frobenius norm
     (0.5 * ||X - WH||_Fro^2) can be changed into another beta-divergence loss,
@@ -1198,7 +1198,8 @@ class NMF(TransformerMixin, BaseEstimator):
         .. versionadded:: 0.17
            *shuffle* parameter used in the Coordinate Descent solver.
 
-    regularization : {'both', 'components', 'transformation', None}, default='both'
+    regularization : {'both', 'components', 'transformation', None}, \
+                     default='both'
         Select whether the regularization affects the components (H), the
         transformation (W), both or none of them.