ENH: Add regularization to the main NMF class (#17414)

bharatr21 · adrinjalali · thomasjpfan · web-flow · commit f734e11a53d3 · 2020-08-06T12:04:17.000-07:00
* ENH: Add regularization to the main NMF class * Update _nmf with suggestions from code review Update `_nmf.py` with suggestion from review Co-authored-by: Adrin Jalali <adrin.jalali@gmail.com> * Refactor tests, fix linter errors * Change default value to None * Revert back to default value of "both" * Update default value documentation acc to @thomasjpfan Co-authored-by: Thomas J. Fan <thomasjpfan@gmail.com> * CLN Places regularization at the end * Add whatsnew entry * DOC Fix * DOC Fix Co-authored-by: Adrin Jalali <adrin.jalali@gmail.com> Co-authored-by: Thomas J. Fan <thomasjpfan@gmail.com>
diff --git a/doc/modules/decomposition.rst b/doc/modules/decomposition.rst
@@ -759,9 +759,9 @@ and the regularized objective function is:
     + \frac{\alpha(1-\rho)}{2} ||W||_{\mathrm{Fro}} ^ 2
     + \frac{\alpha(1-\rho)}{2} ||H||_{\mathrm{Fro}} ^ 2
 
-:class:`NMF` regularizes both W and H. The public function
-:func:`non_negative_factorization` allows a finer control through the
-:attr:`regularization` attribute, and may regularize only W, only H, or both.
+:class:`NMF` regularizes both W and H by default. The :attr:`regularization`
+parameter allows for finer control, with which only W, only H,
+or both can be regularized.
 
 NMF with a beta-divergence
 --------------------------
diff --git a/doc/whats_new/v0.24.rst b/doc/whats_new/v0.24.rst
@@ -114,6 +114,12 @@ Changelog
   argument `rotation`, which can take the value `None`, `'varimax'` or `'quartimax'.`
   :pr:`11064` by :user:`Jona Sassenhagen <jona-sassenhagen>`.
 
+- |Enhancement| :class:`decomposition.NMF` now supports the optional parameter
+  `regularization`, which can take the values `None`, `components`,
+  `transformation` or `both`, in accordance with
+  :func:`decomposition.NMF.non_negative_factorization`.
+  :pr:`17414` by :user:`Bharat Raghunathan <Bharat123rox>`.
+
 :mod:`sklearn.ensemble`
 .......................
 
diff --git a/sklearn/decomposition/_nmf.py b/sklearn/decomposition/_nmf.py
@@ -1081,7 +1081,7 @@ def non_negative_factorization(X, W=None, H=None, n_components=None, *,
 
 
 class NMF(TransformerMixin, BaseEstimator):
-    r"""Non-Negative Matrix Factorization (NMF)
+    """Non-Negative Matrix Factorization (NMF)
 
     Find two non-negative matrices (W, H) whose product approximates the non-
     negative matrix X. This factorization can be used for example for
@@ -1097,8 +1097,8 @@ class NMF(TransformerMixin, BaseEstimator):
 
     Where::
 
-        ||A||_Fro^2 = \sum_{i,j} A_{ij}^2 (Frobenius norm)
-        ||vec(A)||_1 = \sum_{i,j} abs(A_{ij}) (Elementwise L1 norm)
+        ||A||_Fro^2 = \\sum_{i,j} A_{ij}^2 (Frobenius norm)
+        ||vec(A)||_1 = \\sum_{i,j} abs(A_{ij}) (Elementwise L1 norm)
 
     For multiplicative-update ('mu') solver, the Frobenius norm
     (0.5 * ||X - WH||_Fro^2) can be changed into another beta-divergence loss,
@@ -1198,6 +1198,13 @@ class NMF(TransformerMixin, BaseEstimator):
         .. versionadded:: 0.17
            *shuffle* parameter used in the Coordinate Descent solver.
 
+    regularization : {'both', 'components', 'transformation', None}, \
+                     default='both'
+        Select whether the regularization affects the components (H), the
+        transformation (W), both or none of them.
+
+        .. versionadded:: 0.24
+
     Attributes
     ----------
     components_ : array, [n_components, n_features]
@@ -1239,7 +1246,7 @@ class NMF(TransformerMixin, BaseEstimator):
     def __init__(self, n_components=None, *, init=None, solver='cd',
                  beta_loss='frobenius', tol=1e-4, max_iter=200,
                  random_state=None, alpha=0., l1_ratio=0., verbose=0,
-                 shuffle=False):
+                 shuffle=False, regularization='both'):
         self.n_components = n_components
         self.init = init
         self.solver = solver
@@ -1251,6 +1258,7 @@ def __init__(self, n_components=None, *, init=None, solver='cd',
         self.l1_ratio = l1_ratio
         self.verbose = verbose
         self.shuffle = shuffle
+        self.regularization = regularization
 
     def _more_tags(self):
         return {'requires_positive_X': True}
@@ -1285,7 +1293,7 @@ def fit_transform(self, X, y=None, W=None, H=None):
             X=X, W=W, H=H, n_components=self.n_components, init=self.init,
             update_H=True, solver=self.solver, beta_loss=self.beta_loss,
             tol=self.tol, max_iter=self.max_iter, alpha=self.alpha,
-            l1_ratio=self.l1_ratio, regularization='both',
+            l1_ratio=self.l1_ratio, regularization=self.regularization,
             random_state=self.random_state, verbose=self.verbose,
             shuffle=self.shuffle)
 
@@ -1334,9 +1342,10 @@ def transform(self, X):
             X=X, W=None, H=self.components_, n_components=self.n_components_,
             init=self.init, update_H=False, solver=self.solver,
             beta_loss=self.beta_loss, tol=self.tol, max_iter=self.max_iter,
-            alpha=self.alpha, l1_ratio=self.l1_ratio, regularization='both',
-            random_state=self.random_state, verbose=self.verbose,
-            shuffle=self.shuffle)
+            alpha=self.alpha, l1_ratio=self.l1_ratio,
+            regularization=self.regularization,
+            random_state=self.random_state,
+            verbose=self.verbose, shuffle=self.shuffle)
 
         return W
 
diff --git a/sklearn/decomposition/tests/test_nmf.py b/sklearn/decomposition/tests/test_nmf.py
@@ -20,12 +20,14 @@
 
 
 @pytest.mark.parametrize('solver', ['cd', 'mu'])
-def test_convergence_warning(solver):
+@pytest.mark.parametrize('regularization',
+                         [None, 'both', 'components', 'transformation'])
+def test_convergence_warning(solver, regularization):
     convergence_warning = ("Maximum number of iterations 1 reached. "
                            "Increase it to improve convergence.")
     A = np.ones((2, 2))
     with pytest.warns(ConvergenceWarning, match=convergence_warning):
-        NMF(solver=solver, max_iter=1).fit(A)
+        NMF(solver=solver, regularization=regularization, max_iter=1).fit(A)
 
 
 def test_initialize_nn_output():
@@ -44,6 +46,8 @@ def test_parameter_checking():
     assert_raise_message(ValueError, msg, NMF(solver=name).fit, A)
     msg = "Invalid init parameter: got 'spam' instead of one of"
     assert_raise_message(ValueError, msg, NMF(init=name).fit, A)
+    msg = "Invalid regularization parameter: got 'spam' instead of one of"
+    assert_raise_message(ValueError, msg, NMF(regularization=name).fit, A)
     msg = "Invalid beta_loss parameter: got 'spam' instead of one"
     assert_raise_message(ValueError, msg, NMF(solver='mu',
                                               beta_loss=name).fit, A)
@@ -97,36 +101,43 @@ def test_initialize_variants():
 
 # ignore UserWarning raised when both solver='mu' and init='nndsvd'
 @ignore_warnings(category=UserWarning)
-def test_nmf_fit_nn_output():
+@pytest.mark.parametrize('solver', ('cd', 'mu'))
+@pytest.mark.parametrize('init',
+                         (None, 'nndsvd', 'nndsvda', 'nndsvdar', 'random'))
+@pytest.mark.parametrize('regularization',
+                         (None, 'both', 'components', 'transformation'))
+def test_nmf_fit_nn_output(solver, init, regularization):
     # Test that the decomposition does not contain negative values
     A = np.c_[5. - np.arange(1, 6),
               5. + np.arange(1, 6)]
-    for solver in ('cd', 'mu'):
-        for init in (None, 'nndsvd', 'nndsvda', 'nndsvdar', 'random'):
-            model = NMF(n_components=2, solver=solver, init=init,
-                        random_state=0)
-            transf = model.fit_transform(A)
-            assert not((model.components_ < 0).any() or
-                       (transf < 0).any())
+    model = NMF(n_components=2, solver=solver, init=init,
+                regularization=regularization, random_state=0)
+    transf = model.fit_transform(A)
+    assert not((model.components_ < 0).any() or
+               (transf < 0).any())
 
 
 @pytest.mark.parametrize('solver', ('cd', 'mu'))
-def test_nmf_fit_close(solver):
+@pytest.mark.parametrize('regularization',
+                         (None, 'both', 'components', 'transformation'))
+def test_nmf_fit_close(solver, regularization):
     rng = np.random.mtrand.RandomState(42)
     # Test that the fit is not too far away
     pnmf = NMF(5, solver=solver, init='nndsvdar', random_state=0,
-               max_iter=600)
+               regularization=regularization, max_iter=600)
     X = np.abs(rng.randn(6, 5))
     assert pnmf.fit(X).reconstruction_err_ < 0.1
 
 
 @pytest.mark.parametrize('solver', ('cd', 'mu'))
-def test_nmf_transform(solver):
+@pytest.mark.parametrize('regularization',
+                         (None, 'both', 'components', 'transformation'))
+def test_nmf_transform(solver, regularization):
     # Test that NMF.transform returns close values
     rng = np.random.mtrand.RandomState(42)
     A = np.abs(rng.randn(6, 5))
     m = NMF(solver=solver, n_components=3, init='random',
-            random_state=0, tol=1e-5)
+            regularization=regularization, random_state=0, tol=1e-5)
     ft = m.fit_transform(A)
     t = m.transform(A)
     assert_array_almost_equal(ft, t, decimal=2)
@@ -148,12 +159,14 @@ def test_nmf_transform_custom_init():
 
 
 @pytest.mark.parametrize('solver', ('cd', 'mu'))
-def test_nmf_inverse_transform(solver):
+@pytest.mark.parametrize('regularization',
+                         (None, 'both', 'components', 'transformation'))
+def test_nmf_inverse_transform(solver, regularization):
     # Test that NMF.inverse_transform returns close values
     random_state = np.random.RandomState(0)
     A = np.abs(random_state.randn(6, 4))
     m = NMF(solver=solver, n_components=4, init='random', random_state=0,
-            max_iter=1000)
+            regularization=regularization, max_iter=1000)
     ft = m.fit_transform(A)
     A_new = m.inverse_transform(ft)
     assert_array_almost_equal(A, A_new, decimal=2)
@@ -167,7 +180,9 @@ def test_n_components_greater_n_features():
 
 
 @pytest.mark.parametrize('solver', ['cd', 'mu'])
-def test_nmf_sparse_input(solver):
+@pytest.mark.parametrize('regularization',
+                         [None, 'both', 'components', 'transformation'])
+def test_nmf_sparse_input(solver, regularization):
     # Test that sparse matrices are accepted as input
     from scipy.sparse import csc_matrix
 
@@ -177,7 +192,8 @@ def test_nmf_sparse_input(solver):
     A_sparse = csc_matrix(A)
 
     est1 = NMF(solver=solver, n_components=5, init='random',
-               random_state=0, tol=1e-2)
+               regularization=regularization, random_state=0,
+               tol=1e-2)
     est2 = clone(est1)
 
     W1 = est1.fit_transform(A)
@@ -204,28 +220,32 @@ def test_nmf_sparse_transform():
         assert_array_almost_equal(A_fit_tr, A_tr, decimal=1)
 
 
-def test_non_negative_factorization_consistency():
+@pytest.mark.parametrize('init', ['random', 'nndsvd'])
+@pytest.mark.parametrize('solver', ('cd', 'mu'))
+@pytest.mark.parametrize('regularization',
+                         (None, 'both', 'components', 'transformation'))
+def test_non_negative_factorization_consistency(init, solver, regularization):
     # Test that the function is called in the same way, either directly
     # or through the NMF class
     rng = np.random.mtrand.RandomState(42)
     A = np.abs(rng.randn(10, 10))
     A[:, 2 * np.arange(5)] = 0
 
-    for init in ['random', 'nndsvd']:
-        for solver in ('cd', 'mu'):
-            W_nmf, H, _ = non_negative_factorization(
-                A, init=init, solver=solver, random_state=1, tol=1e-2)
-            W_nmf_2, _, _ = non_negative_factorization(
-                A, H=H, update_H=False, init=init, solver=solver,
-                random_state=1, tol=1e-2)
+    W_nmf, H, _ = non_negative_factorization(
+        A, init=init, solver=solver,
+        regularization=regularization, random_state=1, tol=1e-2)
+    W_nmf_2, _, _ = non_negative_factorization(
+        A, H=H, update_H=False, init=init, solver=solver,
+        regularization=regularization, random_state=1, tol=1e-2)
 
-            model_class = NMF(init=init, solver=solver, random_state=1,
-                              tol=1e-2)
-            W_cls = model_class.fit_transform(A)
-            W_cls_2 = model_class.transform(A)
+    model_class = NMF(init=init, solver=solver,
+                      regularization=regularization,
+                      random_state=1, tol=1e-2)
+    W_cls = model_class.fit_transform(A)
+    W_cls_2 = model_class.transform(A)
 
-            assert_array_almost_equal(W_nmf, W_cls, decimal=10)
-            assert_array_almost_equal(W_nmf_2, W_cls_2, decimal=10)
+    assert_array_almost_equal(W_nmf, W_cls, decimal=10)
+    assert_array_almost_equal(W_nmf_2, W_cls_2, decimal=10)
 
 
 def test_non_negative_factorization_checking():
@@ -515,25 +535,29 @@ def test_nmf_underflow():
     (np.int32, np.float64),
     (np.int64, np.float64)])
 @pytest.mark.parametrize("solver", ["cd", "mu"])
-def test_nmf_dtype_match(dtype_in, dtype_out, solver):
+@pytest.mark.parametrize("regularization",
+                         (None, "both", "components", "transformation"))
+def test_nmf_dtype_match(dtype_in, dtype_out, solver, regularization):
     # Check that NMF preserves dtype (float32 and float64)
     X = np.random.RandomState(0).randn(20, 15).astype(dtype_in, copy=False)
     np.abs(X, out=X)
-    nmf = NMF(solver=solver)
+    nmf = NMF(solver=solver, regularization=regularization)
 
     assert nmf.fit(X).transform(X).dtype == dtype_out
     assert nmf.fit_transform(X).dtype == dtype_out
     assert nmf.components_.dtype == dtype_out
 
 
 @pytest.mark.parametrize("solver", ["cd", "mu"])
-def test_nmf_float32_float64_consistency(solver):
+@pytest.mark.parametrize("regularization",
+                         (None, "both", "components", "transformation"))
+def test_nmf_float32_float64_consistency(solver, regularization):
     # Check that the result of NMF is the same between float32 and float64
     X = np.random.RandomState(0).randn(50, 7)
     np.abs(X, out=X)
-    nmf32 = NMF(solver=solver, random_state=0)
+    nmf32 = NMF(solver=solver, regularization=regularization, random_state=0)
     W32 = nmf32.fit_transform(X.astype(np.float32))
-    nmf64 = NMF(solver=solver, random_state=0)
+    nmf64 = NMF(solver=solver, regularization=regularization, random_state=0)
     W64 = nmf64.fit_transform(X)
 
     assert_allclose(W32, W64, rtol=1e-6, atol=1e-5)