From 672b43420d57401a9cd3086c39be4699b9a35d0d Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Tue, 3 Sep 2019 08:14:09 +0200
Subject: [PATCH 1/5] check_array float->int casting with NaN

---
 sklearn/utils/tests/test_validation.py | 22 ++++++++++++++++++++++
 sklearn/utils/validation.py            | 18 +++++++++++++++---
 2 files changed, 37 insertions(+), 3 deletions(-)

diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py
index 0f7ffe9a3e4f0..ce56dd6f3ab5d 100644
--- a/sklearn/utils/tests/test_validation.py
+++ b/sklearn/utils/tests/test_validation.py
@@ -201,6 +201,28 @@ def test_check_array_force_all_finite_object():
     with pytest.raises(ValueError, match='Input contains NaN'):
         check_array(X, dtype=None, force_all_finite=True)
 
+    # casting a float array containing NaN or inf to int dtype should
+    # raise an error irrespective of the force_all_finite parameter.
+    X = np.array([[1, np.nan]])
+
+    msg = "Input contains NaN, infinity or a value too large for.*int"
+    with pytest.raises(ValueError, match=msg):
+        check_array(X, dtype=np.int, force_all_finite=True)
+
+    with pytest.raises(ValueError, match=msg):
+        check_array(X, dtype=np.int, force_all_finite=False)
+
+    X = np.array([[1, np.inf]])
+
+    with pytest.raises(ValueError, match=msg):
+        check_array(X, dtype=np.int)
+
+    X = np.array([[1, np.nan]], dtype=np.object)
+
+    msg = 'cannot convert float NaN to integer'
+    with pytest.raises(ValueError, match=msg):
+        check_array(X, dtype=np.int)
+
 
 @ignore_warnings
 def test_check_array():
diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py
index 465acf48e8293..860d8b74930ed 100644
--- a/sklearn/utils/validation.py
+++ b/sklearn/utils/validation.py
@@ -32,7 +32,7 @@
 warnings.simplefilter('ignore', NonBLASDotWarning)
 
 
-def _assert_all_finite(X, allow_nan=False):
+def _assert_all_finite(X, allow_nan=False, msg_dtype=None):
     """Like assert_all_finite, but only for ndarray."""
     # validation is also imported in extmath
     from .extmath import _safe_accumulator_op
@@ -52,7 +52,11 @@ def _assert_all_finite(X, allow_nan=False):
         if (allow_nan and np.isinf(X).any() or
                 not allow_nan and not np.isfinite(X).all()):
             type_err = 'infinity' if allow_nan else 'NaN, infinity'
-            raise ValueError(msg_err.format(type_err, X.dtype))
+            raise ValueError(
+                    msg_err.format
+                    (type_err,
+                     msg_dtype if msg_dtype is not None else X.dtype)
+            )
     # for object dtype data, we only check for NaNs (GH-13254)
     elif X.dtype == np.dtype('object') and not allow_nan:
         if _object_dtype_isnan(X).any():
@@ -494,7 +498,15 @@ def check_array(array, accept_sparse=False, accept_large_sparse=True,
         with warnings.catch_warnings():
             try:
                 warnings.simplefilter('error', ComplexWarning)
-                array = np.asarray(array, dtype=dtype, order=order)
+                array = np.asarray(array, order=order)
+                if dtype is not None:
+                    if np.dtype(dtype).kind == 'i' and array.dtype.kind == 'f':
+                        # Conversion float -> int should not contain NaN or
+                        # inf. We cannot use casting='safe' because then
+                        # conversion float -> int would be disallowed.
+                        _assert_all_finite(array, allow_nan=False,
+                                           msg_dtype=dtype)
+                    array = array.astype(dtype, casting="unsafe", copy=False)
             except ComplexWarning:
                 raise ValueError("Complex data not supported\n"
                                  "{}\n".format(array))

From d2ccfd0a58d0795844bbe32112a864bd072a2d92 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Tue, 3 Sep 2019 12:59:13 +0200
Subject: [PATCH 2/5] Previous behaviour for dtype=np.object

---
 sklearn/utils/validation.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py
index 860d8b74930ed..416a21ee9e045 100644
--- a/sklearn/utils/validation.py
+++ b/sklearn/utils/validation.py
@@ -498,15 +498,17 @@ def check_array(array, accept_sparse=False, accept_large_sparse=True,
         with warnings.catch_warnings():
             try:
                 warnings.simplefilter('error', ComplexWarning)
-                array = np.asarray(array, order=order)
-                if dtype is not None:
-                    if np.dtype(dtype).kind == 'i' and array.dtype.kind == 'f':
-                        # Conversion float -> int should not contain NaN or
-                        # inf. We cannot use casting='safe' because then
-                        # conversion float -> int would be disallowed.
+                if dtype is not None and np.dtype(dtype).kind in 'iu':
+                    # Conversion float -> int should not contain NaN or
+                    # inf. We cannot use casting='safe' because then
+                    # conversion float -> int would be disallowed.
+                    array = np.asarray(array, order=order)
+                    if array.dtype.kind == 'f':
                         _assert_all_finite(array, allow_nan=False,
                                            msg_dtype=dtype)
                     array = array.astype(dtype, casting="unsafe", copy=False)
+                else:
+                    array = np.asarray(array, order=order, dtype=dtype)
             except ComplexWarning:
                 raise ValueError("Complex data not supported\n"
                                  "{}\n".format(array))

From 3711329c078d70664b5107c4dc51cc6b9cf55285 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Tue, 3 Sep 2019 13:07:31 +0200
Subject: [PATCH 3/5] Comment wording improvement

---
 sklearn/utils/validation.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py
index 416a21ee9e045..5da8b6f2bed64 100644
--- a/sklearn/utils/validation.py
+++ b/sklearn/utils/validation.py
@@ -500,8 +500,8 @@ def check_array(array, accept_sparse=False, accept_large_sparse=True,
                 warnings.simplefilter('error', ComplexWarning)
                 if dtype is not None and np.dtype(dtype).kind in 'iu':
                     # Conversion float -> int should not contain NaN or
-                    # inf. We cannot use casting='safe' because then
-                    # conversion float -> int would be disallowed.
+                    # inf (numpy#14412). We cannot use casting='safe' because
+                    # then conversion float -> int would be disallowed.
                     array = np.asarray(array, order=order)
                     if array.dtype.kind == 'f':
                         _assert_all_finite(array, allow_nan=False,

From eb52dbb3aa38ba5bfec711790a3a88641daccc2d Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Fri, 20 Sep 2019 10:18:13 +0200
Subject: [PATCH 4/5] apply changes

---
 doc/whats_new/v0.22.rst                |  4 +++
 sklearn/utils/tests/test_validation.py | 36 ++++++++++++--------------
 2 files changed, 21 insertions(+), 19 deletions(-)

diff --git a/doc/whats_new/v0.22.rst b/doc/whats_new/v0.22.rst
index ce3174218679f..58c1a0c95706c 100644
--- a/doc/whats_new/v0.22.rst
+++ b/doc/whats_new/v0.22.rst
@@ -397,6 +397,10 @@ Changelog
   a proper error message is raised if X contains some negative entries.
   :pr:`14680` by :user:`Alex Gramfort <agramfort>`.
 
+- |Fix| :func:`utils.check_array` is now raising an error instead of casting
+  NaN to integer.
+  :pr:`14872` by `Roman Yurchak`_.
+
 :mod:`sklearn.neighbors`
 ....................
 
diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py
index ce56dd6f3ab5d..d5c0aa444a8e2 100644
--- a/sklearn/utils/tests/test_validation.py
+++ b/sklearn/utils/tests/test_validation.py
@@ -201,27 +201,25 @@ def test_check_array_force_all_finite_object():
     with pytest.raises(ValueError, match='Input contains NaN'):
         check_array(X, dtype=None, force_all_finite=True)
 
+
+@pytest.mark.parametrize(
+    "X, err_msg",
+    [(np.array([[1, np.nan]]),
+      "Input contains NaN, infinity or a value too large for.*int"),
+     (np.array([[1, np.nan]]),
+      "Input contains NaN, infinity or a value too large for.*int"),
+     (np.array([[1, np.inf]]),
+      "Input contains NaN, infinity or a value too large for.*int"),
+     (np.array([[1, np.nan]], dtype=np.object),
+      "cannot convert float NaN to integer")]
+)
+@pytest.mark.parametrize("force_all_finite", [True, False])
+def test_check_array_force_all_finite_object_unsafe_casting(
+        X, err_msg, force_all_finite):
     # casting a float array containing NaN or inf to int dtype should
     # raise an error irrespective of the force_all_finite parameter.
-    X = np.array([[1, np.nan]])
-
-    msg = "Input contains NaN, infinity or a value too large for.*int"
-    with pytest.raises(ValueError, match=msg):
-        check_array(X, dtype=np.int, force_all_finite=True)
-
-    with pytest.raises(ValueError, match=msg):
-        check_array(X, dtype=np.int, force_all_finite=False)
-
-    X = np.array([[1, np.inf]])
-
-    with pytest.raises(ValueError, match=msg):
-        check_array(X, dtype=np.int)
-
-    X = np.array([[1, np.nan]], dtype=np.object)
-
-    msg = 'cannot convert float NaN to integer'
-    with pytest.raises(ValueError, match=msg):
-        check_array(X, dtype=np.int)
+    with pytest.raises(ValueError, match=err_msg):
+        check_array(X, dtype=np.int, force_all_finite=force_all_finite)
 
 
 @ignore_warnings

From 9923c3e5249b0d8377fabb358118f23633f0eb13 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Fri, 20 Sep 2019 10:21:21 +0200
Subject: [PATCH 5/5] fix merge conflict

---
 doc/whats_new/v0.22.rst | 15 ++++-----------
 1 file changed, 4 insertions(+), 11 deletions(-)

diff --git a/doc/whats_new/v0.22.rst b/doc/whats_new/v0.22.rst
index 91dbc7848b81f..752c865519e2e 100644
--- a/doc/whats_new/v0.22.rst
+++ b/doc/whats_new/v0.22.rst
@@ -537,17 +537,6 @@ Changelog
   :func:`~utils.estimator_checks.parametrize_with_checks`, to parametrize
   estimator checks for a list of estimators. :pr:`14381` by `Thomas Fan`_.
 
-- |API| `requires_positive_X` estimator tag (for models that require
-  X to be non-negative) is now used by `check_estimator` to make sure
-  a proper error message is raised if X contains some negative entries.
-  :pr:`14680` by :user:`Alex Gramfort <agramfort>`.
-
-- |Fix| :func:`utils.check_array` is now raising an error instead of casting
-  NaN to integer.
-  :pr:`14872` by `Roman Yurchak`_.
-
-:mod:`sklearn.neighbors`
-....................
 - |API| The following utils have been deprecated and are now private:
   - ``choose_check_classifiers_labels``
   - ``enforce_estimator_tags_y``
@@ -562,6 +551,10 @@ Changelog
   and sparse matrix.
   :pr:`14538` by :user:`Jérémie du Boisberranger <jeremiedbb>`.
 
+- |Fix| :func:`utils.check_array` is now raising an error instead of casting
+  NaN to integer.
+  :pr:`14872` by `Roman Yurchak`_.
+
 :mod:`sklearn.metrics`
 ..................................