From f5501bace81248159b52e567310d1dd6e7781dba Mon Sep 17 00:00:00 2001 From: Edoardo Abati <29585319+EdAbati@users.noreply.github.com> Date: Mon, 9 Oct 2023 20:24:54 +0200 Subject: [PATCH 1/9] made Normalizer compatible with the array api --- sklearn/preprocessing/_data.py | 10 ++++++---- sklearn/preprocessing/tests/test_data.py | 2 +- sklearn/utils/extmath.py | 6 ++++-- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/sklearn/preprocessing/_data.py b/sklearn/preprocessing/_data.py index 4b7421d8a4c01..2601bf8738b88 100644 --- a/sklearn/preprocessing/_data.py +++ b/sklearn/preprocessing/_data.py @@ -1858,12 +1858,14 @@ def normalize(X, norm="l2", *, axis=1, copy=True, return_norm=False): else: # axis == 1: sparse_format = "csr" + xp, _ = get_namespace(X) + X = check_array( X, accept_sparse=sparse_format, copy=copy, estimator="the normalize function", - dtype=FLOAT_DTYPES, + dtype=_array_api.supported_float_dtypes(xp), ) if axis == 0: X = X.T @@ -1887,13 +1889,13 @@ def normalize(X, norm="l2", *, axis=1, copy=True, return_norm=False): X.data[mask] /= norms_elementwise[mask] else: if norm == "l1": - norms = np.abs(X).sum(axis=1) + norms = xp.sum(xp.abs(X), axis=1) elif norm == "l2": norms = row_norms(X) elif norm == "max": - norms = np.max(abs(X), axis=1) + norms = xp.max(xp.abs(X), axis=1) norms = _handle_zeros_in_scale(norms, copy=False) - X /= norms[:, np.newaxis] + X /= norms[:, None] if axis == 0: X = X.T diff --git a/sklearn/preprocessing/tests/test_data.py b/sklearn/preprocessing/tests/test_data.py index 5042cf218fb26..7b87afc6f6242 100644 --- a/sklearn/preprocessing/tests/test_data.py +++ b/sklearn/preprocessing/tests/test_data.py @@ -691,7 +691,7 @@ def test_standard_check_array_of_inverse_transform(): ) @pytest.mark.parametrize( "estimator", - [MaxAbsScaler(), MinMaxScaler()], + [MaxAbsScaler(), MinMaxScaler(), Normalizer()], ids=_get_check_estimator_ids, ) def test_scaler_array_api_compliance(estimator, check, array_namespace, device, dtype): diff --git a/sklearn/utils/extmath.py b/sklearn/utils/extmath.py index eb060e563d50c..55835297b3a92 100644 --- a/sklearn/utils/extmath.py +++ b/sklearn/utils/extmath.py @@ -75,14 +75,16 @@ def row_norms(X, squared=False): array-like The row-wise (squared) Euclidean norm of X. """ + xp, _ = get_namespace(X) + if sparse.issparse(X): X = X.tocsr() norms = csr_row_norms(X) else: - norms = np.einsum("ij,ij->i", X, X) + norms = xp.sum(X * X, axis=1) if not squared: - np.sqrt(norms, norms) + norms = xp.sqrt(norms) return norms From f569448c7fe75ed2a9056b5ac7bef3222aeb6a28 Mon Sep 17 00:00:00 2001 From: Edoardo Abati <29585319+EdAbati@users.noreply.github.com> Date: Tue, 10 Oct 2023 07:52:11 +0200 Subject: [PATCH 2/9] adding all norm cases --- sklearn/preprocessing/tests/test_data.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/sklearn/preprocessing/tests/test_data.py b/sklearn/preprocessing/tests/test_data.py index 7b87afc6f6242..27465023b318d 100644 --- a/sklearn/preprocessing/tests/test_data.py +++ b/sklearn/preprocessing/tests/test_data.py @@ -691,7 +691,13 @@ def test_standard_check_array_of_inverse_transform(): ) @pytest.mark.parametrize( "estimator", - [MaxAbsScaler(), MinMaxScaler(), Normalizer()], + [ + MaxAbsScaler(), + MinMaxScaler(), + Normalizer(norm="l1"), + Normalizer(norm="l2"), + Normalizer(norm="max"), + ], ids=_get_check_estimator_ids, ) def test_scaler_array_api_compliance(estimator, check, array_namespace, device, dtype): From 74312c5f9db7ed3bb4c1c8535ab379241582146c Mon Sep 17 00:00:00 2001 From: Edoardo Abati <29585319+EdAbati@users.noreply.github.com> Date: Tue, 10 Oct 2023 07:54:45 +0200 Subject: [PATCH 3/9] updated docs --- doc/modules/array_api.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/modules/array_api.rst b/doc/modules/array_api.rst index d04d47cb94049..2c5fefbbff0aa 100644 --- a/doc/modules/array_api.rst +++ b/doc/modules/array_api.rst @@ -98,6 +98,7 @@ Estimators - :class:`discriminant_analysis.LinearDiscriminantAnalysis` (with `solver="svd"`) - :class:`preprocessing.MaxAbsScaler` - :class:`preprocessing.MinMaxScaler` +- :class:`preprocessing.Normalizer` Metrics ------- From 957c4c0a074a31c36572dcab950bab4dbbaa7488 Mon Sep 17 00:00:00 2001 From: Edoardo Abati <29585319+EdAbati@users.noreply.github.com> Date: Tue, 10 Oct 2023 08:02:13 +0200 Subject: [PATCH 4/9] updated whats new --- doc/whats_new/v1.4.rst | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/doc/whats_new/v1.4.rst b/doc/whats_new/v1.4.rst index 1112f95e95a7e..f17f7fb72c735 100644 --- a/doc/whats_new/v1.4.rst +++ b/doc/whats_new/v1.4.rst @@ -371,13 +371,14 @@ Changelog :mod:`sklearn.preprocessing` ............................ -- |MajorFeature| :class:`preprocessing.MinMaxScaler` and - :class:`preprocessing.MaxAbsScaler` now +- |MajorFeature| :class:`preprocessing.MinMaxScaler`, :class:`preprocessing.MaxAbsScaler` + and :class:`preprocessing.Normalizer` now support the `Array API `_. Array API support is considered experimental and might evolve without being subject to our usual rolling deprecation cycle policy. See :ref:`array_api` for more details. - :pr:`26243` by `Tim Head`_ and :pr:`27110` by :user:`Edoardo Abati `. + :pr:`26243` by `Tim Head`_ , :pr:`27110` by :user:`Edoardo Abati ` and + :pr:`27558` by :user:`Edoardo Abati `. - |Efficiency| :class:`preprocessing.OrdinalEncoder` avoids calculating missing indices twice to improve efficiency. From 3477f9e48fdea48d056ce537e611a860ee82abd1 Mon Sep 17 00:00:00 2001 From: Edoardo Abati <29585319+EdAbati@users.noreply.github.com> Date: Tue, 10 Oct 2023 12:41:31 +0200 Subject: [PATCH 5/9] moved * to multiply --- sklearn/utils/extmath.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/utils/extmath.py b/sklearn/utils/extmath.py index 55835297b3a92..176a400a7aef0 100644 --- a/sklearn/utils/extmath.py +++ b/sklearn/utils/extmath.py @@ -81,7 +81,7 @@ def row_norms(X, squared=False): X = X.tocsr() norms = csr_row_norms(X) else: - norms = xp.sum(X * X, axis=1) + norms = xp.sum(xp.multiply(X, X), axis=1) if not squared: norms = xp.sqrt(norms) From 8212ffb1e5080b15b744613ff8f1a8c12c74ec66 Mon Sep 17 00:00:00 2001 From: Edoardo Abati <29585319+EdAbati@users.noreply.github.com> Date: Sun, 22 Oct 2023 18:08:35 +0200 Subject: [PATCH 6/9] Fixed row_norms for sparse arrays and revert to einsum for numpy --- sklearn/utils/extmath.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/sklearn/utils/extmath.py b/sklearn/utils/extmath.py index 176a400a7aef0..e2892b6e5df49 100644 --- a/sklearn/utils/extmath.py +++ b/sklearn/utils/extmath.py @@ -75,16 +75,19 @@ def row_norms(X, squared=False): array-like The row-wise (squared) Euclidean norm of X. """ - xp, _ = get_namespace(X) - if sparse.issparse(X): X = X.tocsr() norms = csr_row_norms(X) + if not squared: + norms = np.sqrt(norms) else: - norms = xp.sum(xp.multiply(X, X), axis=1) - - if not squared: - norms = xp.sqrt(norms) + xp, _ = get_namespace(X) + if _is_numpy_namespace(xp): + norms = np.einsum("ij,ij->i", X, X) + else: + norms = xp.sum(xp.multiply(X, X), axis=1) + if not squared: + norms = xp.sqrt(norms) return norms From f4d7e1fafe91d2b8355ea5a0672f7c8f75d877a2 Mon Sep 17 00:00:00 2001 From: Edoardo Abati <29585319+EdAbati@users.noreply.github.com> Date: Sun, 22 Oct 2023 18:10:52 +0200 Subject: [PATCH 7/9] added array_api_support tag --- sklearn/preprocessing/_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/preprocessing/_data.py b/sklearn/preprocessing/_data.py index 2601bf8738b88..4ee1e633891cc 100644 --- a/sklearn/preprocessing/_data.py +++ b/sklearn/preprocessing/_data.py @@ -2033,7 +2033,7 @@ def transform(self, X, copy=None): return normalize(X, norm=self.norm, axis=1, copy=copy) def _more_tags(self): - return {"stateless": True} + return {"stateless": True, "array_api_support": True} @validate_params( From 8ce25f7c5d9c1aa2f257ecf1268174d2e4394591 Mon Sep 17 00:00:00 2001 From: Edoardo Abati <29585319+EdAbati@users.noreply.github.com> Date: Sun, 22 Oct 2023 18:57:22 +0200 Subject: [PATCH 8/9] one entry per class in changelog --- doc/whats_new/v1.4.rst | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/doc/whats_new/v1.4.rst b/doc/whats_new/v1.4.rst index 8576181a762af..bf6b611ef214e 100644 --- a/doc/whats_new/v1.4.rst +++ b/doc/whats_new/v1.4.rst @@ -410,14 +410,15 @@ Changelog :mod:`sklearn.preprocessing` ............................ -- |MajorFeature| :class:`preprocessing.MinMaxScaler`, :class:`preprocessing.MaxAbsScaler` - and :class:`preprocessing.Normalizer` now - support the `Array API `_. Array API +- |MajorFeature| The following classes now support the + `Array API `_. Array API support is considered experimental and might evolve without being subject to our usual rolling deprecation cycle policy. See :ref:`array_api` for more details. - :pr:`26243` by `Tim Head`_ , :pr:`27110` by :user:`Edoardo Abati ` and - :pr:`27558` by :user:`Edoardo Abati `. + + - :class:`preprocessing.MinMaxScaler` :pr:`26243` by `Tim Head`_ + - :class:`preprocessing.MaxAbsScaler` :pr:`27110` by :user:`Edoardo Abati ` + - :class:`preprocessing.Normalizer` :pr:`27558` by :user:`Edoardo Abati ` - |Efficiency| :class:`preprocessing.OrdinalEncoder` avoids calculating missing indices twice to improve efficiency. From 13c27f9cd5565d78c82f4ac9c1437d62a92f617b Mon Sep 17 00:00:00 2001 From: Edoardo Abati <29585319+EdAbati@users.noreply.github.com> Date: Tue, 24 Oct 2023 12:10:10 +0200 Subject: [PATCH 9/9] fixing casting to/from numpy before and after einsum --- sklearn/utils/extmath.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sklearn/utils/extmath.py b/sklearn/utils/extmath.py index 3cc5da9c0be59..4a16a313100aa 100644 --- a/sklearn/utils/extmath.py +++ b/sklearn/utils/extmath.py @@ -83,7 +83,9 @@ def row_norms(X, squared=False): else: xp, _ = get_namespace(X) if _is_numpy_namespace(xp): + X = np.asarray(X) norms = np.einsum("ij,ij->i", X, X) + norms = xp.asarray(norms) else: norms = xp.sum(xp.multiply(X, X), axis=1) if not squared: