diff --git a/doc/whats_new/upcoming_changes/sklearn.decomposition/30443.feature.rst b/doc/whats_new/upcoming_changes/sklearn.decomposition/30443.feature.rst new file mode 100644 index 0000000000000..5678039b69065 --- /dev/null +++ b/doc/whats_new/upcoming_changes/sklearn.decomposition/30443.feature.rst @@ -0,0 +1,4 @@ +- :class:`~sklearn.decomposition.DictionaryLearning`, + :class:`~sklearn.decomposition.SparseCoder` and + :class:`~sklearn.decomposition.MiniBatchDictionaryLearning` now have a + ``inverse_transform`` method. By :user:`Rémi Flamary ` diff --git a/sklearn/decomposition/_dict_learning.py b/sklearn/decomposition/_dict_learning.py index 7410eeb4405df..282376550de24 100644 --- a/sklearn/decomposition/_dict_learning.py +++ b/sklearn/decomposition/_dict_learning.py @@ -1142,6 +1142,44 @@ def transform(self, X): check_is_fitted(self) return self._transform(X, self.components_) + def _inverse_transform(self, code, dictionary): + """Private method allowing to accommodate both DictionaryLearning and + SparseCoder.""" + code = check_array(code) + # compute number of expected features in code + expected_n_components = dictionary.shape[0] + if self.split_sign: + expected_n_components += expected_n_components + if not code.shape[1] == expected_n_components: + raise ValueError( + "The number of components in the code is different from the " + "number of components in the dictionary." + f"Expected {expected_n_components}, got {code.shape[1]}." + ) + if self.split_sign: + n_samples, n_features = code.shape + n_features //= 2 + code = code[:, :n_features] - code[:, n_features:] + + return code @ dictionary + + def inverse_transform(self, X): + """Transform data back to its original space. + + Parameters + ---------- + X : array-like of shape (n_samples, n_components) + Data to be transformed back. Must have the same number of + components as the data used to train the model. + + Returns + ------- + X_new : ndarray of shape (n_samples, n_features) + Transformed data. + """ + check_is_fitted(self) + return self._inverse_transform(X, self.components_) + class SparseCoder(_BaseSparseCoding, BaseEstimator): """Sparse coding. @@ -1329,6 +1367,22 @@ def transform(self, X, y=None): """ return super()._transform(X, self.dictionary) + def inverse_transform(self, X): + """Transform data back to its original space. + + Parameters + ---------- + X : array-like of shape (n_samples, n_components) + Data to be transformed back. Must have the same number of + components as the data used to train the model. + + Returns + ------- + X_new : ndarray of shape (n_samples, n_features) + Transformed data. + """ + return self._inverse_transform(X, self.dictionary) + def __sklearn_tags__(self): tags = super().__sklearn_tags__() tags.requires_fit = False diff --git a/sklearn/decomposition/tests/test_dict_learning.py b/sklearn/decomposition/tests/test_dict_learning.py index f52c851012481..717c56d0abdbe 100644 --- a/sklearn/decomposition/tests/test_dict_learning.py +++ b/sklearn/decomposition/tests/test_dict_learning.py @@ -202,10 +202,16 @@ def test_dict_learning_reconstruction(): ) code = dico.fit(X).transform(X) assert_array_almost_equal(np.dot(code, dico.components_), X) + assert_array_almost_equal(dico.inverse_transform(code), X) dico.set_params(transform_algorithm="lasso_lars") code = dico.transform(X) assert_array_almost_equal(np.dot(code, dico.components_), X, decimal=2) + assert_array_almost_equal(dico.inverse_transform(code), X, decimal=2) + + # test error raised for wrong code size + with pytest.raises(ValueError, match="Expected 12, got 11."): + dico.inverse_transform(code[:, :-1]) # used to test lars here too, but there's no guarantee the number of # nonzero atoms is right. @@ -268,6 +274,8 @@ def test_dict_learning_split(): n_components, transform_algorithm="threshold", random_state=0 ) code = dico.fit(X).transform(X) + Xr = dico.inverse_transform(code) + dico.split_sign = True split_code = dico.transform(X) @@ -275,6 +283,9 @@ def test_dict_learning_split(): split_code[:, :n_components] - split_code[:, n_components:], code ) + Xr2 = dico.inverse_transform(split_code) + assert_array_almost_equal(Xr, Xr2) + def test_dict_learning_online_shapes(): rng = np.random.RandomState(0) @@ -591,9 +602,12 @@ def test_sparse_coder_estimator(): V /= np.sum(V**2, axis=1)[:, np.newaxis] coder = SparseCoder( dictionary=V, transform_algorithm="lasso_lars", transform_alpha=0.001 - ).transform(X) - assert not np.all(coder == 0) - assert np.sqrt(np.sum((np.dot(coder, V) - X) ** 2)) < 0.1 + ) + code = coder.fit_transform(X) + Xr = coder.inverse_transform(code) + assert not np.all(code == 0) + assert np.sqrt(np.sum((np.dot(code, V) - X) ** 2)) < 0.1 + np.testing.assert_allclose(Xr, np.dot(code, V)) def test_sparse_coder_estimator_clone():