From ce755c3dc43f46db9a566fea279ba15032473f3c Mon Sep 17 00:00:00 2001 From: takoika <> Date: Thu, 27 May 2021 12:27:23 +0900 Subject: [PATCH 01/13] Add test for type checking between input and transformed --- sklearn/decomposition/tests/test_sparse_pca.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/sklearn/decomposition/tests/test_sparse_pca.py b/sklearn/decomposition/tests/test_sparse_pca.py index d6ddfa01a49d0..f9d5d12e58cf6 100644 --- a/sklearn/decomposition/tests/test_sparse_pca.py +++ b/sklearn/decomposition/tests/test_sparse_pca.py @@ -189,7 +189,7 @@ def test_pca_vs_spca(): assert_allclose(results_test_pca, results_test_spca) -@pytest.mark.parametrize("SPCA", [SparsePCA, MiniBatchSparsePCA]) +@pytest.mark.parametrize("SPCA", (SparsePCA, MiniBatchSparsePCA)) @pytest.mark.parametrize("n_components", [None, 3]) def test_spca_n_components_(SPCA, n_components): rng = np.random.RandomState(0) @@ -202,3 +202,16 @@ def test_spca_n_components_(SPCA, n_components): assert model.n_components_ == n_components else: assert model.n_components_ == n_features + +@pytest.mark.parametrize("SPCA", (SparsePCA, MiniBatchSparsePCA)) +@pytest.mark.parametrize("data_type, expected_type", ( + (np.float32, np.float32), + (np.float64, np.float64), + (np.int32, np.float64), + (np.int64, np.float64))) +def test_lda_dtype_match(SPCA, data_type, expected_type): + n_samples, n_features, n_components = 12, 10, 3 + rng = np.random.RandomState(0) + input_array = rng.randn(n_samples, n_features).astype(data_type) + transformed = SPCA(n_components=n_components).fit_transform(input_array) + assert transformed.dtype == expected_type From cfe31b8fe9dc8c59fcadcc84b4bd6dabc9df208e Mon Sep 17 00:00:00 2001 From: takoika <> Date: Thu, 27 May 2021 12:30:44 +0900 Subject: [PATCH 02/13] Fix test name --- sklearn/decomposition/tests/test_sparse_pca.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/decomposition/tests/test_sparse_pca.py b/sklearn/decomposition/tests/test_sparse_pca.py index f9d5d12e58cf6..bf346b0ee8ce7 100644 --- a/sklearn/decomposition/tests/test_sparse_pca.py +++ b/sklearn/decomposition/tests/test_sparse_pca.py @@ -209,7 +209,7 @@ def test_spca_n_components_(SPCA, n_components): (np.float64, np.float64), (np.int32, np.float64), (np.int64, np.float64))) -def test_lda_dtype_match(SPCA, data_type, expected_type): +def test_sparse_pca_dtype_match(SPCA, data_type, expected_type): n_samples, n_features, n_components = 12, 10, 3 rng = np.random.RandomState(0) input_array = rng.randn(n_samples, n_features).astype(data_type) From 0e63c5e00e6075043da7182569b4722ec6b4cf82 Mon Sep 17 00:00:00 2001 From: takoika <> Date: Thu, 30 Dec 2021 21:10:42 +0900 Subject: [PATCH 03/13] Add sprase PCA unit test for numerical consitency among float32 and float64 --- .../decomposition/tests/test_sparse_pca.py | 24 +++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/sklearn/decomposition/tests/test_sparse_pca.py b/sklearn/decomposition/tests/test_sparse_pca.py index eb81180c58099..d6eb92502f962 100644 --- a/sklearn/decomposition/tests/test_sparse_pca.py +++ b/sklearn/decomposition/tests/test_sparse_pca.py @@ -207,6 +207,7 @@ def test_spca_n_components_(SPCA, n_components): @pytest.mark.parametrize("SPCA", (SparsePCA, MiniBatchSparsePCA)) +@pytest.mark.parametrize("method", ("lars", "cd")) @pytest.mark.parametrize( "data_type, expected_type", ( @@ -216,14 +217,33 @@ def test_spca_n_components_(SPCA, n_components): (np.int64, np.float64), ), ) -def test_sparse_pca_dtype_match(SPCA, data_type, expected_type): +def test_sparse_pca_dtype_match(SPCA, method, data_type, expected_type): n_samples, n_features, n_components = 12, 10, 3 rng = np.random.RandomState(0) input_array = rng.randn(n_samples, n_features).astype(data_type) - transformed = SPCA(n_components=n_components).fit_transform(input_array) + transformed = SPCA(n_components=n_components, method=method).fit_transform( + input_array + ) assert transformed.dtype == expected_type +@pytest.mark.parametrize("SPCA", (SparsePCA, MiniBatchSparsePCA)) +@pytest.mark.parametrize("method", ("lars", "cd")) +def test_sparse_pca_numerical_consistency(SPCA, method): + rtol = 1e-3 + alpha = 2 + n_samples, n_features, n_components = 12, 10, 3 + rng = np.random.RandomState(0) + input_array = rng.randn(n_samples, n_features) + transformed_32 = SPCA( + n_components=n_components, alpha=alpha, method=method, random_state=0 + ).fit_transform(input_array.astype(np.float32)) + transformed_64 = SPCA( + n_components=n_components, alpha=alpha, method=method, random_state=0 + ).fit_transform(input_array.astype(np.float64)) + assert_allclose(transformed_64, transformed_32, rtol=rtol) + + @pytest.mark.parametrize("SPCA", [SparsePCA, MiniBatchSparsePCA]) def test_spca_feature_names_out(SPCA): """Check feature names out for *SparsePCA.""" From 6ccc37ced85420678371d9c0c08374d0ad5be869 Mon Sep 17 00:00:00 2001 From: takoika <> Date: Thu, 30 Dec 2021 21:30:27 +0900 Subject: [PATCH 04/13] Update changelog --- doc/whats_new/v1.1.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/doc/whats_new/v1.1.rst b/doc/whats_new/v1.1.rst index 935be46bba5af..e03086351320b 100644 --- a/doc/whats_new/v1.1.rst +++ b/doc/whats_new/v1.1.rst @@ -154,6 +154,10 @@ Changelog and :class:`decomposition.SparseCoder` preserve dtype for `numpy.float32`. :pr:`22002` by :user:`Takeshi Oura `. +- |Enhancement| :class:`decomposition.SparsePCA` and :class:`decomposition.MiniBatchSparsePCA` + preserve dtype for `numpy.flaot32`. + :pr:`` by :user:`Takeshi Oura `. + - |API| Adds :term:`get_feature_names_out` to all transformers in the :mod:`~sklearn.decomposition` module: :class:`~sklearn.decomposition.DictionaryLearning`, From bb8c4a8c95ac87eac5beb85c25f4aafbcd8a4b4b Mon Sep 17 00:00:00 2001 From: takoika <> Date: Sat, 1 Jan 2022 16:57:31 +0900 Subject: [PATCH 05/13] Add perserves_dtype tag for SparsePCA and MiniBAtchSparsePCA --- sklearn/decomposition/_sparse_pca.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/sklearn/decomposition/_sparse_pca.py b/sklearn/decomposition/_sparse_pca.py index 31c8d2168a3e6..6acd83da24dc8 100644 --- a/sklearn/decomposition/_sparse_pca.py +++ b/sklearn/decomposition/_sparse_pca.py @@ -241,6 +241,11 @@ def _n_features_out(self): """Number of transformed output features.""" return self.components_.shape[0] + def _more_tags(self): + return { + "preserves_dtype": [np.float64, np.float32], + } + class MiniBatchSparsePCA(SparsePCA): """Mini-batch Sparse Principal Components Analysis. @@ -434,3 +439,8 @@ def fit(self, X, y=None): self.n_components_ = len(self.components_) return self + + def _more_tags(self): + return { + "preserves_dtype": [np.float64, np.float32], + } From 944dcb1d4584e6a4f8c6d8cb2d363b6146abada1 Mon Sep 17 00:00:00 2001 From: takoika <> Date: Sat, 1 Jan 2022 17:06:37 +0900 Subject: [PATCH 06/13] Fill pr number --- doc/whats_new/v1.1.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats_new/v1.1.rst b/doc/whats_new/v1.1.rst index e03086351320b..a67de742be86b 100644 --- a/doc/whats_new/v1.1.rst +++ b/doc/whats_new/v1.1.rst @@ -156,7 +156,7 @@ Changelog - |Enhancement| :class:`decomposition.SparsePCA` and :class:`decomposition.MiniBatchSparsePCA` preserve dtype for `numpy.flaot32`. - :pr:`` by :user:`Takeshi Oura `. + :pr:`22111` by :user:`Takeshi Oura `. - |API| Adds :term:`get_feature_names_out` to all transformers in the :mod:`~sklearn.decomposition` module: From 465568b1dd2341467eb08ac7591fb2c60bfa3def Mon Sep 17 00:00:00 2001 From: takoika <> Date: Sat, 1 Jan 2022 22:09:33 +0900 Subject: [PATCH 07/13] Add comments --- sklearn/decomposition/tests/test_sparse_pca.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sklearn/decomposition/tests/test_sparse_pca.py b/sklearn/decomposition/tests/test_sparse_pca.py index d6eb92502f962..f111c28a75e1f 100644 --- a/sklearn/decomposition/tests/test_sparse_pca.py +++ b/sklearn/decomposition/tests/test_sparse_pca.py @@ -218,6 +218,7 @@ def test_spca_n_components_(SPCA, n_components): ), ) def test_sparse_pca_dtype_match(SPCA, method, data_type, expected_type): + # Verify output matrix dtype n_samples, n_features, n_components = 12, 10, 3 rng = np.random.RandomState(0) input_array = rng.randn(n_samples, n_features).astype(data_type) @@ -230,6 +231,7 @@ def test_sparse_pca_dtype_match(SPCA, method, data_type, expected_type): @pytest.mark.parametrize("SPCA", (SparsePCA, MiniBatchSparsePCA)) @pytest.mark.parametrize("method", ("lars", "cd")) def test_sparse_pca_numerical_consistency(SPCA, method): + # verify numerically consistent among np.float32 and np.float64 rtol = 1e-3 alpha = 2 n_samples, n_features, n_components = 12, 10, 3 From 4450364a1d756dc42c8dd05c5759b8c4d24c15ed Mon Sep 17 00:00:00 2001 From: takoika <> Date: Tue, 4 Jan 2022 21:54:28 +0900 Subject: [PATCH 08/13] Revert unlrelated change --- sklearn/decomposition/tests/test_sparse_pca.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/decomposition/tests/test_sparse_pca.py b/sklearn/decomposition/tests/test_sparse_pca.py index f111c28a75e1f..12f639eb855e2 100644 --- a/sklearn/decomposition/tests/test_sparse_pca.py +++ b/sklearn/decomposition/tests/test_sparse_pca.py @@ -191,7 +191,7 @@ def test_pca_vs_spca(): assert_allclose(results_test_pca, results_test_spca) -@pytest.mark.parametrize("SPCA", (SparsePCA, MiniBatchSparsePCA)) +@pytest.mark.parametrize("SPCA", [SparsePCA, MiniBatchSparsePCA]) @pytest.mark.parametrize("n_components", [None, 3]) def test_spca_n_components_(SPCA, n_components): rng = np.random.RandomState(0) From df37345e3bfa58fce8479718185b85be06f2b42e Mon Sep 17 00:00:00 2001 From: takoika <> Date: Tue, 4 Jan 2022 22:06:47 +0900 Subject: [PATCH 09/13] Fix typo --- doc/whats_new/v1.1.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats_new/v1.1.rst b/doc/whats_new/v1.1.rst index a67de742be86b..542d3ef0946db 100644 --- a/doc/whats_new/v1.1.rst +++ b/doc/whats_new/v1.1.rst @@ -155,7 +155,7 @@ Changelog :pr:`22002` by :user:`Takeshi Oura `. - |Enhancement| :class:`decomposition.SparsePCA` and :class:`decomposition.MiniBatchSparsePCA` - preserve dtype for `numpy.flaot32`. + preserve dtype for `numpy.float32`. :pr:`22111` by :user:`Takeshi Oura `. - |API| Adds :term:`get_feature_names_out` to all transformers in the From 6a0b7c533f4e01d85f5e5685a3efd956c88c17d4 Mon Sep 17 00:00:00 2001 From: takoika <> Date: Tue, 4 Jan 2022 22:15:15 +0900 Subject: [PATCH 10/13] Add test for dype matching and numerical consitency for trained components --- .../decomposition/tests/test_sparse_pca.py | 21 ++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/sklearn/decomposition/tests/test_sparse_pca.py b/sklearn/decomposition/tests/test_sparse_pca.py index 12f639eb855e2..d188ef830a903 100644 --- a/sklearn/decomposition/tests/test_sparse_pca.py +++ b/sklearn/decomposition/tests/test_sparse_pca.py @@ -222,10 +222,11 @@ def test_sparse_pca_dtype_match(SPCA, method, data_type, expected_type): n_samples, n_features, n_components = 12, 10, 3 rng = np.random.RandomState(0) input_array = rng.randn(n_samples, n_features).astype(data_type) - transformed = SPCA(n_components=n_components, method=method).fit_transform( - input_array - ) + transformer = SPCA(n_components=n_components, method=method) + transformed = transformer.fit_transform(input_array) + assert transformed.dtype == expected_type + assert transformer.components_.dtype == expected_type @pytest.mark.parametrize("SPCA", (SparsePCA, MiniBatchSparsePCA)) @@ -237,13 +238,19 @@ def test_sparse_pca_numerical_consistency(SPCA, method): n_samples, n_features, n_components = 12, 10, 3 rng = np.random.RandomState(0) input_array = rng.randn(n_samples, n_features) - transformed_32 = SPCA( + + transformer_32 = SPCA( n_components=n_components, alpha=alpha, method=method, random_state=0 - ).fit_transform(input_array.astype(np.float32)) - transformed_64 = SPCA( + ) + transformed_32 = transformer_32.fit_transform(input_array.astype(np.float32)) + + transformer_64 = SPCA( n_components=n_components, alpha=alpha, method=method, random_state=0 - ).fit_transform(input_array.astype(np.float64)) + ) + transformed_64 = transformer_64.fit_transform(input_array.astype(np.float64)) + assert_allclose(transformed_64, transformed_32, rtol=rtol) + assert_allclose(transformer_64.components_, transformer_32.components_, rtol=rtol) @pytest.mark.parametrize("SPCA", [SparsePCA, MiniBatchSparsePCA]) From 0a2f2438aaba2ea13c33b4d4b43833d5fd73943b Mon Sep 17 00:00:00 2001 From: takoika <> Date: Tue, 4 Jan 2022 22:20:03 +0900 Subject: [PATCH 11/13] change variable name --- sklearn/decomposition/tests/test_sparse_pca.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/sklearn/decomposition/tests/test_sparse_pca.py b/sklearn/decomposition/tests/test_sparse_pca.py index d188ef830a903..d21628b658666 100644 --- a/sklearn/decomposition/tests/test_sparse_pca.py +++ b/sklearn/decomposition/tests/test_sparse_pca.py @@ -222,11 +222,11 @@ def test_sparse_pca_dtype_match(SPCA, method, data_type, expected_type): n_samples, n_features, n_components = 12, 10, 3 rng = np.random.RandomState(0) input_array = rng.randn(n_samples, n_features).astype(data_type) - transformer = SPCA(n_components=n_components, method=method) - transformed = transformer.fit_transform(input_array) + model = SPCA(n_components=n_components, method=method) + transformed = model.fit_transform(input_array) assert transformed.dtype == expected_type - assert transformer.components_.dtype == expected_type + assert model.components_.dtype == expected_type @pytest.mark.parametrize("SPCA", (SparsePCA, MiniBatchSparsePCA)) @@ -239,18 +239,18 @@ def test_sparse_pca_numerical_consistency(SPCA, method): rng = np.random.RandomState(0) input_array = rng.randn(n_samples, n_features) - transformer_32 = SPCA( + model_32 = SPCA( n_components=n_components, alpha=alpha, method=method, random_state=0 ) - transformed_32 = transformer_32.fit_transform(input_array.astype(np.float32)) + transformed_32 = model_32.fit_transform(input_array.astype(np.float32)) - transformer_64 = SPCA( + model_64 = SPCA( n_components=n_components, alpha=alpha, method=method, random_state=0 ) - transformed_64 = transformer_64.fit_transform(input_array.astype(np.float64)) + transformed_64 = model_64.fit_transform(input_array.astype(np.float64)) assert_allclose(transformed_64, transformed_32, rtol=rtol) - assert_allclose(transformer_64.components_, transformer_32.components_, rtol=rtol) + assert_allclose(model_64.components_, model_32.components_, rtol=rtol) @pytest.mark.parametrize("SPCA", [SparsePCA, MiniBatchSparsePCA]) From 0b9123624d9d2c88e23d3d1e4f2c653c0e7f0969 Mon Sep 17 00:00:00 2001 From: takoika <> Date: Tue, 4 Jan 2022 22:21:07 +0900 Subject: [PATCH 12/13] fix typo --- sklearn/decomposition/tests/test_sparse_pca.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/decomposition/tests/test_sparse_pca.py b/sklearn/decomposition/tests/test_sparse_pca.py index d21628b658666..db92ec582abdd 100644 --- a/sklearn/decomposition/tests/test_sparse_pca.py +++ b/sklearn/decomposition/tests/test_sparse_pca.py @@ -232,7 +232,7 @@ def test_sparse_pca_dtype_match(SPCA, method, data_type, expected_type): @pytest.mark.parametrize("SPCA", (SparsePCA, MiniBatchSparsePCA)) @pytest.mark.parametrize("method", ("lars", "cd")) def test_sparse_pca_numerical_consistency(SPCA, method): - # verify numerically consistent among np.float32 and np.float64 + # Verify numericall consistentency among np.float32 and np.float64 rtol = 1e-3 alpha = 2 n_samples, n_features, n_components = 12, 10, 3 From aa742e9d4be55211e91a387c914bfd4c4147ece1 Mon Sep 17 00:00:00 2001 From: takoika <> Date: Wed, 5 Jan 2022 01:04:35 +0900 Subject: [PATCH 13/13] Remove unneccesary method --- sklearn/decomposition/_sparse_pca.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/sklearn/decomposition/_sparse_pca.py b/sklearn/decomposition/_sparse_pca.py index 6acd83da24dc8..6f2f4c8b10582 100644 --- a/sklearn/decomposition/_sparse_pca.py +++ b/sklearn/decomposition/_sparse_pca.py @@ -439,8 +439,3 @@ def fit(self, X, y=None): self.n_components_ = len(self.components_) return self - - def _more_tags(self): - return { - "preserves_dtype": [np.float64, np.float32], - }