From f5501bace81248159b52e567310d1dd6e7781dba Mon Sep 17 00:00:00 2001
From: Edoardo Abati <29585319+EdAbati@users.noreply.github.com>
Date: Mon, 9 Oct 2023 20:24:54 +0200
Subject: [PATCH 1/9] made Normalizer compatible with the array api
---
sklearn/preprocessing/_data.py | 10 ++++++----
sklearn/preprocessing/tests/test_data.py | 2 +-
sklearn/utils/extmath.py | 6 ++++--
3 files changed, 11 insertions(+), 7 deletions(-)
diff --git a/sklearn/preprocessing/_data.py b/sklearn/preprocessing/_data.py
index 4b7421d8a4c01..2601bf8738b88 100644
--- a/sklearn/preprocessing/_data.py
+++ b/sklearn/preprocessing/_data.py
@@ -1858,12 +1858,14 @@ def normalize(X, norm="l2", *, axis=1, copy=True, return_norm=False):
else: # axis == 1:
sparse_format = "csr"
+ xp, _ = get_namespace(X)
+
X = check_array(
X,
accept_sparse=sparse_format,
copy=copy,
estimator="the normalize function",
- dtype=FLOAT_DTYPES,
+ dtype=_array_api.supported_float_dtypes(xp),
)
if axis == 0:
X = X.T
@@ -1887,13 +1889,13 @@ def normalize(X, norm="l2", *, axis=1, copy=True, return_norm=False):
X.data[mask] /= norms_elementwise[mask]
else:
if norm == "l1":
- norms = np.abs(X).sum(axis=1)
+ norms = xp.sum(xp.abs(X), axis=1)
elif norm == "l2":
norms = row_norms(X)
elif norm == "max":
- norms = np.max(abs(X), axis=1)
+ norms = xp.max(xp.abs(X), axis=1)
norms = _handle_zeros_in_scale(norms, copy=False)
- X /= norms[:, np.newaxis]
+ X /= norms[:, None]
if axis == 0:
X = X.T
diff --git a/sklearn/preprocessing/tests/test_data.py b/sklearn/preprocessing/tests/test_data.py
index 5042cf218fb26..7b87afc6f6242 100644
--- a/sklearn/preprocessing/tests/test_data.py
+++ b/sklearn/preprocessing/tests/test_data.py
@@ -691,7 +691,7 @@ def test_standard_check_array_of_inverse_transform():
)
@pytest.mark.parametrize(
"estimator",
- [MaxAbsScaler(), MinMaxScaler()],
+ [MaxAbsScaler(), MinMaxScaler(), Normalizer()],
ids=_get_check_estimator_ids,
)
def test_scaler_array_api_compliance(estimator, check, array_namespace, device, dtype):
diff --git a/sklearn/utils/extmath.py b/sklearn/utils/extmath.py
index eb060e563d50c..55835297b3a92 100644
--- a/sklearn/utils/extmath.py
+++ b/sklearn/utils/extmath.py
@@ -75,14 +75,16 @@ def row_norms(X, squared=False):
array-like
The row-wise (squared) Euclidean norm of X.
"""
+ xp, _ = get_namespace(X)
+
if sparse.issparse(X):
X = X.tocsr()
norms = csr_row_norms(X)
else:
- norms = np.einsum("ij,ij->i", X, X)
+ norms = xp.sum(X * X, axis=1)
if not squared:
- np.sqrt(norms, norms)
+ norms = xp.sqrt(norms)
return norms
From f569448c7fe75ed2a9056b5ac7bef3222aeb6a28 Mon Sep 17 00:00:00 2001
From: Edoardo Abati <29585319+EdAbati@users.noreply.github.com>
Date: Tue, 10 Oct 2023 07:52:11 +0200
Subject: [PATCH 2/9] adding all norm cases
---
sklearn/preprocessing/tests/test_data.py | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/sklearn/preprocessing/tests/test_data.py b/sklearn/preprocessing/tests/test_data.py
index 7b87afc6f6242..27465023b318d 100644
--- a/sklearn/preprocessing/tests/test_data.py
+++ b/sklearn/preprocessing/tests/test_data.py
@@ -691,7 +691,13 @@ def test_standard_check_array_of_inverse_transform():
)
@pytest.mark.parametrize(
"estimator",
- [MaxAbsScaler(), MinMaxScaler(), Normalizer()],
+ [
+ MaxAbsScaler(),
+ MinMaxScaler(),
+ Normalizer(norm="l1"),
+ Normalizer(norm="l2"),
+ Normalizer(norm="max"),
+ ],
ids=_get_check_estimator_ids,
)
def test_scaler_array_api_compliance(estimator, check, array_namespace, device, dtype):
From 74312c5f9db7ed3bb4c1c8535ab379241582146c Mon Sep 17 00:00:00 2001
From: Edoardo Abati <29585319+EdAbati@users.noreply.github.com>
Date: Tue, 10 Oct 2023 07:54:45 +0200
Subject: [PATCH 3/9] updated docs
---
doc/modules/array_api.rst | 1 +
1 file changed, 1 insertion(+)
diff --git a/doc/modules/array_api.rst b/doc/modules/array_api.rst
index d04d47cb94049..2c5fefbbff0aa 100644
--- a/doc/modules/array_api.rst
+++ b/doc/modules/array_api.rst
@@ -98,6 +98,7 @@ Estimators
- :class:`discriminant_analysis.LinearDiscriminantAnalysis` (with `solver="svd"`)
- :class:`preprocessing.MaxAbsScaler`
- :class:`preprocessing.MinMaxScaler`
+- :class:`preprocessing.Normalizer`
Metrics
-------
From 957c4c0a074a31c36572dcab950bab4dbbaa7488 Mon Sep 17 00:00:00 2001
From: Edoardo Abati <29585319+EdAbati@users.noreply.github.com>
Date: Tue, 10 Oct 2023 08:02:13 +0200
Subject: [PATCH 4/9] updated whats new
---
doc/whats_new/v1.4.rst | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/doc/whats_new/v1.4.rst b/doc/whats_new/v1.4.rst
index 1112f95e95a7e..f17f7fb72c735 100644
--- a/doc/whats_new/v1.4.rst
+++ b/doc/whats_new/v1.4.rst
@@ -371,13 +371,14 @@ Changelog
:mod:`sklearn.preprocessing`
............................
-- |MajorFeature| :class:`preprocessing.MinMaxScaler` and
- :class:`preprocessing.MaxAbsScaler` now
+- |MajorFeature| :class:`preprocessing.MinMaxScaler`, :class:`preprocessing.MaxAbsScaler`
+ and :class:`preprocessing.Normalizer` now
support the `Array API `_. Array API
support is considered experimental and might evolve without being subject to
our usual rolling deprecation cycle policy. See
:ref:`array_api` for more details.
- :pr:`26243` by `Tim Head`_ and :pr:`27110` by :user:`Edoardo Abati `.
+ :pr:`26243` by `Tim Head`_ , :pr:`27110` by :user:`Edoardo Abati ` and
+ :pr:`27558` by :user:`Edoardo Abati `.
- |Efficiency| :class:`preprocessing.OrdinalEncoder` avoids calculating
missing indices twice to improve efficiency.
From 3477f9e48fdea48d056ce537e611a860ee82abd1 Mon Sep 17 00:00:00 2001
From: Edoardo Abati <29585319+EdAbati@users.noreply.github.com>
Date: Tue, 10 Oct 2023 12:41:31 +0200
Subject: [PATCH 5/9] moved * to multiply
---
sklearn/utils/extmath.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/sklearn/utils/extmath.py b/sklearn/utils/extmath.py
index 55835297b3a92..176a400a7aef0 100644
--- a/sklearn/utils/extmath.py
+++ b/sklearn/utils/extmath.py
@@ -81,7 +81,7 @@ def row_norms(X, squared=False):
X = X.tocsr()
norms = csr_row_norms(X)
else:
- norms = xp.sum(X * X, axis=1)
+ norms = xp.sum(xp.multiply(X, X), axis=1)
if not squared:
norms = xp.sqrt(norms)
From 8212ffb1e5080b15b744613ff8f1a8c12c74ec66 Mon Sep 17 00:00:00 2001
From: Edoardo Abati <29585319+EdAbati@users.noreply.github.com>
Date: Sun, 22 Oct 2023 18:08:35 +0200
Subject: [PATCH 6/9] Fixed row_norms for sparse arrays and revert to einsum
for numpy
---
sklearn/utils/extmath.py | 15 +++++++++------
1 file changed, 9 insertions(+), 6 deletions(-)
diff --git a/sklearn/utils/extmath.py b/sklearn/utils/extmath.py
index 176a400a7aef0..e2892b6e5df49 100644
--- a/sklearn/utils/extmath.py
+++ b/sklearn/utils/extmath.py
@@ -75,16 +75,19 @@ def row_norms(X, squared=False):
array-like
The row-wise (squared) Euclidean norm of X.
"""
- xp, _ = get_namespace(X)
-
if sparse.issparse(X):
X = X.tocsr()
norms = csr_row_norms(X)
+ if not squared:
+ norms = np.sqrt(norms)
else:
- norms = xp.sum(xp.multiply(X, X), axis=1)
-
- if not squared:
- norms = xp.sqrt(norms)
+ xp, _ = get_namespace(X)
+ if _is_numpy_namespace(xp):
+ norms = np.einsum("ij,ij->i", X, X)
+ else:
+ norms = xp.sum(xp.multiply(X, X), axis=1)
+ if not squared:
+ norms = xp.sqrt(norms)
return norms
From f4d7e1fafe91d2b8355ea5a0672f7c8f75d877a2 Mon Sep 17 00:00:00 2001
From: Edoardo Abati <29585319+EdAbati@users.noreply.github.com>
Date: Sun, 22 Oct 2023 18:10:52 +0200
Subject: [PATCH 7/9] added array_api_support tag
---
sklearn/preprocessing/_data.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/sklearn/preprocessing/_data.py b/sklearn/preprocessing/_data.py
index 2601bf8738b88..4ee1e633891cc 100644
--- a/sklearn/preprocessing/_data.py
+++ b/sklearn/preprocessing/_data.py
@@ -2033,7 +2033,7 @@ def transform(self, X, copy=None):
return normalize(X, norm=self.norm, axis=1, copy=copy)
def _more_tags(self):
- return {"stateless": True}
+ return {"stateless": True, "array_api_support": True}
@validate_params(
From 8ce25f7c5d9c1aa2f257ecf1268174d2e4394591 Mon Sep 17 00:00:00 2001
From: Edoardo Abati <29585319+EdAbati@users.noreply.github.com>
Date: Sun, 22 Oct 2023 18:57:22 +0200
Subject: [PATCH 8/9] one entry per class in changelog
---
doc/whats_new/v1.4.rst | 11 ++++++-----
1 file changed, 6 insertions(+), 5 deletions(-)
diff --git a/doc/whats_new/v1.4.rst b/doc/whats_new/v1.4.rst
index 8576181a762af..bf6b611ef214e 100644
--- a/doc/whats_new/v1.4.rst
+++ b/doc/whats_new/v1.4.rst
@@ -410,14 +410,15 @@ Changelog
:mod:`sklearn.preprocessing`
............................
-- |MajorFeature| :class:`preprocessing.MinMaxScaler`, :class:`preprocessing.MaxAbsScaler`
- and :class:`preprocessing.Normalizer` now
- support the `Array API `_. Array API
+- |MajorFeature| The following classes now support the
+ `Array API `_. Array API
support is considered experimental and might evolve without being subject to
our usual rolling deprecation cycle policy. See
:ref:`array_api` for more details.
- :pr:`26243` by `Tim Head`_ , :pr:`27110` by :user:`Edoardo Abati ` and
- :pr:`27558` by :user:`Edoardo Abati `.
+
+ - :class:`preprocessing.MinMaxScaler` :pr:`26243` by `Tim Head`_
+ - :class:`preprocessing.MaxAbsScaler` :pr:`27110` by :user:`Edoardo Abati `
+ - :class:`preprocessing.Normalizer` :pr:`27558` by :user:`Edoardo Abati `
- |Efficiency| :class:`preprocessing.OrdinalEncoder` avoids calculating
missing indices twice to improve efficiency.
From 13c27f9cd5565d78c82f4ac9c1437d62a92f617b Mon Sep 17 00:00:00 2001
From: Edoardo Abati <29585319+EdAbati@users.noreply.github.com>
Date: Tue, 24 Oct 2023 12:10:10 +0200
Subject: [PATCH 9/9] fixing casting to/from numpy before and after einsum
---
sklearn/utils/extmath.py | 2 ++
1 file changed, 2 insertions(+)
diff --git a/sklearn/utils/extmath.py b/sklearn/utils/extmath.py
index 3cc5da9c0be59..4a16a313100aa 100644
--- a/sklearn/utils/extmath.py
+++ b/sklearn/utils/extmath.py
@@ -83,7 +83,9 @@ def row_norms(X, squared=False):
else:
xp, _ = get_namespace(X)
if _is_numpy_namespace(xp):
+ X = np.asarray(X)
norms = np.einsum("ij,ij->i", X, X)
+ norms = xp.asarray(norms)
else:
norms = xp.sum(xp.multiply(X, X), axis=1)
if not squared: