From d0f8d60a8654747f2f5600e3fae9c071c1c417aa Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Mon, 29 Jul 2019 18:31:25 +0200
Subject: [PATCH 1/7] FIX change boolean array-likes indexing in old NumPy
 version

---
 sklearn/compose/tests/test_column_transformer.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/sklearn/compose/tests/test_column_transformer.py b/sklearn/compose/tests/test_column_transformer.py
index ae7ef31d6c7f1..2ccfd6d6c2eae 100644
--- a/sklearn/compose/tests/test_column_transformer.py
+++ b/sklearn/compose/tests/test_column_transformer.py
@@ -16,6 +16,7 @@
 from sklearn.base import BaseEstimator
 from sklearn.compose import ColumnTransformer, make_column_transformer
 from sklearn.exceptions import NotFittedError
+from sklearn.preprocessing import FunctionTransformer
 from sklearn.preprocessing import StandardScaler, Normalizer, OneHotEncoder
 from sklearn.feature_extraction import DictVectorizer
 
@@ -1108,3 +1109,14 @@ def test_column_transformer_reordered_column_names_remainder(explicit_colname):
     err_msg = 'Specifying the columns'
     with pytest.raises(ValueError, match=err_msg):
         tf.transform(X_array)
+
+
+def test_column_transformer_mask_indexing():
+    # Regression test for #xxxxx
+    # Boolean mask indexing with NumPy < 1.13
+    X = np.transpose([[1, 2, 3], [4, 5, 6], [5, 6, 7], [8, 9, 10]])
+    column_transformer = ColumnTransformer(
+        [('identity', FunctionTransformer(), [False, True, False, True])]
+    )
+    X_trans = column_transformer.fit_transform(X)
+    assert X_trans.shape == (3, 2)

From f95a228e5444d801b6ab09d30dcc655d347cc663 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Mon, 29 Jul 2019 19:12:10 +0200
Subject: [PATCH 2/7] change indexing

---
 sklearn/utils/__init__.py | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py
index efcaf6865faa5..8db41bb27986d 100644
--- a/sklearn/utils/__init__.py
+++ b/sklearn/utils/__init__.py
@@ -18,6 +18,7 @@
 from . import _joblib
 from ..exceptions import DataConversionWarning
 from .deprecation import deprecated
+from .fixes import np_version
 from .validation import (as_float_array,
                          assert_all_finite,
                          check_random_state, column_or_1d, check_array,
@@ -225,6 +226,17 @@ def safe_indexing(X, indices, axis=0):
         )
 
 
+# FIXME: to be removed once NumPy 1.13 is the minimum version required
+def _array_indexing(array, key, axis=0):
+    """Index an array consistently across NumPy version."""
+    if np_version < (1, 13):
+        # check if we have an boolean array-likes to make the proper indexing
+        key_array = np.asarray(key)
+        if np.issubdtype(key_array.dtype, np.bool_):
+            key = key_array
+    return array[key] if axis == 0 else array[:, key]
+
+
 def _safe_indexing_row(X, indices):
     """Return items or rows from X using indices.
 
@@ -266,7 +278,7 @@ def _safe_indexing_row(X, indices):
             # This is often substantially faster than X[indices]
             return X.take(indices, axis=0)
         else:
-            return X[indices]
+            return _array_indexing(X, indices, axis=0)
     else:
         return [X[idx] for idx in indices]
 
@@ -356,7 +368,7 @@ def _safe_indexing_column(X, key):
             return X.iloc[:, key]
         else:
             # numpy arrays, sparse arrays
-            return X[:, key]
+            return _array_indexing(X, key, axis=1)
 
 
 def _get_column_indices(X, key):
@@ -371,7 +383,7 @@ def _get_column_indices(X, key):
             or hasattr(key, 'dtype') and np.issubdtype(key.dtype, np.bool_)):
         # Convert key into positive indexes
         try:
-            idx = np.arange(n_columns)[key]
+            idx = safe_indexing(np.arange(n_columns), key)
         except IndexError as e:
             raise ValueError(
                 'all features must be in [0, %d]' % (n_columns - 1)

From 1c8180390799d22bf42b5c1673caf0cb3dd71c79 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Tue, 30 Jul 2019 09:54:22 +0200
Subject: [PATCH 3/7] add regression test in utils

---
 doc/whats_new/v0.22.rst                       |  8 +++++++
 .../compose/tests/test_column_transformer.py  |  4 ++--
 sklearn/utils/tests/test_utils.py             | 22 +++++++++++++++++++
 3 files changed, 32 insertions(+), 2 deletions(-)

diff --git a/doc/whats_new/v0.22.rst b/doc/whats_new/v0.22.rst
index 93635d88069d5..114afb9185a18 100644
--- a/doc/whats_new/v0.22.rst
+++ b/doc/whats_new/v0.22.rst
@@ -61,6 +61,14 @@ Changelog
   `sample_weights` are not supported by the wrapped estimator). :pr:`13575`
   by :user:`William de Vazelhes <wdevazelhes>`.
 
+:mod:`sklearn.compose`
+......................
+
+- |Fix| Fixed a bug in :class:`compose.ColumnTransformer` which failed to
+  select the proper columns when using a boolean list and NumPy older than
+  1.13.
+  :pr:`14510` by :user:`Guillaume Lemaitre <glemaitre>`.
+
 :mod:`sklearn.datasets`
 .......................
 
diff --git a/sklearn/compose/tests/test_column_transformer.py b/sklearn/compose/tests/test_column_transformer.py
index 2ccfd6d6c2eae..a9c4fd9e25fbe 100644
--- a/sklearn/compose/tests/test_column_transformer.py
+++ b/sklearn/compose/tests/test_column_transformer.py
@@ -1112,8 +1112,8 @@ def test_column_transformer_reordered_column_names_remainder(explicit_colname):
 
 
 def test_column_transformer_mask_indexing():
-    # Regression test for #xxxxx
-    # Boolean mask indexing with NumPy < 1.13
+    # Regression test for #14510
+    # Boolean array-like does not behave as boolean array with NumPy < 1.13
     X = np.transpose([[1, 2, 3], [4, 5, 6], [5, 6, 7], [8, 9, 10]])
     column_transformer = ColumnTransformer(
         [('identity', FunctionTransformer(), [False, True, False, True])]
diff --git a/sklearn/utils/tests/test_utils.py b/sklearn/utils/tests/test_utils.py
index a39e8160047a5..35cfde4aaef7d 100644
--- a/sklearn/utils/tests/test_utils.py
+++ b/sklearn/utils/tests/test_utils.py
@@ -10,6 +10,7 @@
 
 from sklearn.utils.testing import (assert_raises,
                                    assert_array_equal,
+                                   assert_allclose_dense_sparse,
                                    assert_raises_regex,
                                    assert_warns_message, assert_no_warnings)
 from sklearn.utils import check_random_state
@@ -365,6 +366,27 @@ def test_safe_indexing_mock_pandas(asarray):
     assert_array_equal(np.array(X_df_indexed), X_indexed)
 
 
+@pytest.mark.parametrize("array_type", ['array', 'sparse', 'dataframe'])
+def test_safe_indexing_mask_axis_1(array_type):
+    # regression test for #14510
+    # check that boolean array-like and boolean array lead to the same indexing
+    # even in NumPy < 1.13
+    if array_type == 'array':
+        array_constructor = np.asarray
+    elif array_type == 'sparse':
+        array_constructor = sp.csr_matrix
+    elif array_type == 'dataframe':
+        pd = pytest.importorskip('pandas')
+        array_constructor = pd.DataFrame
+
+    X = array_constructor([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
+    mask = [True, False, True]
+    mask_array = np.array(mask)
+    X_masked = safe_indexing(X, mask, axis=1)
+    X_masked_array = safe_indexing(X, mask_array, axis=1)
+    assert_allclose_dense_sparse(X_masked, X_masked_array)
+
+
 def test_shuffle_on_ndim_equals_three():
     def to_tuple(A):    # to make the inner arrays hashable
         return tuple(tuple(tuple(C) for C in B) for B in A)

From c8009a28aa2855e8e01cfebd3ef5df337c0536f4 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Tue, 30 Jul 2019 12:17:43 +0200
Subject: [PATCH 4/7] fix

---
 sklearn/utils/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py
index 8db41bb27986d..ac6446afcd6cf 100644
--- a/sklearn/utils/__init__.py
+++ b/sklearn/utils/__init__.py
@@ -229,7 +229,7 @@ def safe_indexing(X, indices, axis=0):
 # FIXME: to be removed once NumPy 1.13 is the minimum version required
 def _array_indexing(array, key, axis=0):
     """Index an array consistently across NumPy version."""
-    if np_version < (1, 13):
+    if np_version < (1, 13) or issparse(array):
         # check if we have an boolean array-likes to make the proper indexing
         key_array = np.asarray(key)
         if np.issubdtype(key_array.dtype, np.bool_):

From a80b33d7a60d67706150d48f71b3d0837fceba38 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Tue, 30 Jul 2019 14:25:31 +0200
Subject: [PATCH 5/7] add test in column transformer

---
 sklearn/compose/tests/test_column_transformer.py | 5 ++++-
 sklearn/utils/__init__.py                        | 1 -
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/sklearn/compose/tests/test_column_transformer.py b/sklearn/compose/tests/test_column_transformer.py
index a9c4fd9e25fbe..a667b35cf65e3 100644
--- a/sklearn/compose/tests/test_column_transformer.py
+++ b/sklearn/compose/tests/test_column_transformer.py
@@ -1111,10 +1111,13 @@ def test_column_transformer_reordered_column_names_remainder(explicit_colname):
         tf.transform(X_array)
 
 
-def test_column_transformer_mask_indexing():
+@pytest.mark.parametrize("array_type", [np.asarray, sparse.csr_matrix])
+def test_column_transformer_mask_indexing(array_type):
     # Regression test for #14510
     # Boolean array-like does not behave as boolean array with NumPy < 1.13
+    # and sparse matrices as well
     X = np.transpose([[1, 2, 3], [4, 5, 6], [5, 6, 7], [8, 9, 10]])
+    X = array_type(X)
     column_transformer = ColumnTransformer(
         [('identity', FunctionTransformer(), [False, True, False, True])]
     )
diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py
index ac6446afcd6cf..83f4d7fd1876c 100644
--- a/sklearn/utils/__init__.py
+++ b/sklearn/utils/__init__.py
@@ -226,7 +226,6 @@ def safe_indexing(X, indices, axis=0):
         )
 
 
-# FIXME: to be removed once NumPy 1.13 is the minimum version required
 def _array_indexing(array, key, axis=0):
     """Index an array consistently across NumPy version."""
     if np_version < (1, 13) or issparse(array):

From 9fb045dcf1b7923bf06021b6944ca0cb3dd8ad40 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Thu, 1 Aug 2019 14:22:46 +0200
Subject: [PATCH 6/7] raise error if axis not 0 or 1

---
 sklearn/utils/__init__.py         | 5 +++++
 sklearn/utils/tests/test_utils.py | 8 ++++++++
 2 files changed, 13 insertions(+)

diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py
index 83f4d7fd1876c..3b4a20d08716b 100644
--- a/sklearn/utils/__init__.py
+++ b/sklearn/utils/__init__.py
@@ -228,6 +228,11 @@ def safe_indexing(X, indices, axis=0):
 
 def _array_indexing(array, key, axis=0):
     """Index an array consistently across NumPy version."""
+    if axis not in (0, 1):
+        raise ValueError(
+            "'axis' should be either 0 (to index rows) or 1 (to index "
+            " column). Got {} instead.".format(axis)
+        )
     if np_version < (1, 13) or issparse(array):
         # check if we have an boolean array-likes to make the proper indexing
         key_array = np.asarray(key)
diff --git a/sklearn/utils/tests/test_utils.py b/sklearn/utils/tests/test_utils.py
index 35cfde4aaef7d..49f50eedc0a42 100644
--- a/sklearn/utils/tests/test_utils.py
+++ b/sklearn/utils/tests/test_utils.py
@@ -13,6 +13,7 @@
                                    assert_allclose_dense_sparse,
                                    assert_raises_regex,
                                    assert_warns_message, assert_no_warnings)
+from sklearn.utils import _array_indexing
 from sklearn.utils import check_random_state
 from sklearn.utils import _check_key_type
 from sklearn.utils import deprecated
@@ -387,6 +388,13 @@ def test_safe_indexing_mask_axis_1(array_type):
     assert_allclose_dense_sparse(X_masked, X_masked_array)
 
 
+def test_array_indexing_array_error():
+    X = np.array([[0, 1], [2, 3]])
+    mask = [True, False]
+    with pytest.raises(ValueError, match="'axis' should be either 0"):
+        _array_indexing(X, mask, axis=3)
+
+
 def test_shuffle_on_ndim_equals_three():
     def to_tuple(A):    # to make the inner arrays hashable
         return tuple(tuple(tuple(C) for C in B) for B in A)

From b1918e83de705b97b789d970ecd471903141182c Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Fri, 2 Aug 2019 15:33:41 +0200
Subject: [PATCH 7/7] address different comments

---
 doc/whats_new/v0.22.rst                          | 4 ++--
 sklearn/compose/tests/test_column_transformer.py | 2 +-
 sklearn/utils/__init__.py                        | 2 +-
 sklearn/utils/tests/test_utils.py                | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/doc/whats_new/v0.22.rst b/doc/whats_new/v0.22.rst
index db9e0e574da06..0f3c5665e3aa6 100644
--- a/doc/whats_new/v0.22.rst
+++ b/doc/whats_new/v0.22.rst
@@ -65,8 +65,8 @@ Changelog
 ......................
 
 - |Fix| Fixed a bug in :class:`compose.ColumnTransformer` which failed to
-  select the proper columns when using a boolean list and NumPy older than
-  1.13.
+  select the proper columns when using a boolean list, with NumPy older than
+  1.12.
   :pr:`14510` by :user:`Guillaume Lemaitre <glemaitre>`.
 
 :mod:`sklearn.datasets`
diff --git a/sklearn/compose/tests/test_column_transformer.py b/sklearn/compose/tests/test_column_transformer.py
index a667b35cf65e3..d28a82374ad5b 100644
--- a/sklearn/compose/tests/test_column_transformer.py
+++ b/sklearn/compose/tests/test_column_transformer.py
@@ -1114,7 +1114,7 @@ def test_column_transformer_reordered_column_names_remainder(explicit_colname):
 @pytest.mark.parametrize("array_type", [np.asarray, sparse.csr_matrix])
 def test_column_transformer_mask_indexing(array_type):
     # Regression test for #14510
-    # Boolean array-like does not behave as boolean array with NumPy < 1.13
+    # Boolean array-like does not behave as boolean array with NumPy < 1.12
     # and sparse matrices as well
     X = np.transpose([[1, 2, 3], [4, 5, 6], [5, 6, 7], [8, 9, 10]])
     X = array_type(X)
diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py
index 3b4a20d08716b..f95a0d6cccc57 100644
--- a/sklearn/utils/__init__.py
+++ b/sklearn/utils/__init__.py
@@ -233,7 +233,7 @@ def _array_indexing(array, key, axis=0):
             "'axis' should be either 0 (to index rows) or 1 (to index "
             " column). Got {} instead.".format(axis)
         )
-    if np_version < (1, 13) or issparse(array):
+    if np_version < (1, 12) or issparse(array):
         # check if we have an boolean array-likes to make the proper indexing
         key_array = np.asarray(key)
         if np.issubdtype(key_array.dtype, np.bool_):
diff --git a/sklearn/utils/tests/test_utils.py b/sklearn/utils/tests/test_utils.py
index 49f50eedc0a42..806295f1aae28 100644
--- a/sklearn/utils/tests/test_utils.py
+++ b/sklearn/utils/tests/test_utils.py
@@ -371,7 +371,7 @@ def test_safe_indexing_mock_pandas(asarray):
 def test_safe_indexing_mask_axis_1(array_type):
     # regression test for #14510
     # check that boolean array-like and boolean array lead to the same indexing
-    # even in NumPy < 1.13
+    # even in NumPy < 1.12
     if array_type == 'array':
         array_constructor = np.asarray
     elif array_type == 'sparse':