From b13fcf55ea0886c15496d442f59ad8b152617d36 Mon Sep 17 00:00:00 2001
From: tirthasheshpatel <tirthasheshpatel@gmail.com>
Date: Sun, 1 Dec 2019 23:21:26 +0530
Subject: [PATCH 01/11] FIX: order of values of self.quantiles_ in
 QuantileTransformer

---
 sklearn/preprocessing/_data.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/sklearn/preprocessing/_data.py b/sklearn/preprocessing/_data.py
index ef8b9c6db9e3b..e7a80a94c8498 100644
--- a/sklearn/preprocessing/_data.py
+++ b/sklearn/preprocessing/_data.py
@@ -2262,6 +2262,8 @@ def _dense_fit(self, X, random_state):
                 col = col.take(subsample_idx, mode='clip')
             self.quantiles_.append(np.nanpercentile(col, references))
         self.quantiles_ = np.transpose(self.quantiles_)
+        self.quantiles_ = np.minimum.accumulate(
+                                    self.quantiles_[::-1])[::-1]
 
     def _sparse_fit(self, X, random_state):
         """Compute percentiles for sparse matrices.
@@ -2305,6 +2307,8 @@ def _sparse_fit(self, X, random_state):
                 self.quantiles_.append(
                         np.nanpercentile(column_data, references))
         self.quantiles_ = np.transpose(self.quantiles_)
+        self.quantiles_ = np.minimum.accumulate(
+                                        self.quantiles_[::-1])[::-1]
 
     def fit(self, X, y=None):
         """Compute the quantiles used for transforming.

From 7bd784653935881356e99b85e1ccfea591f2f12d Mon Sep 17 00:00:00 2001
From: Tirth Patel <tirthasheshpatel@gmail.com>
Date: Mon, 2 Dec 2019 15:55:16 +0530
Subject: [PATCH 02/11] FIX: order of values in self.quantiles_ in
 QuantileTransformer

---
 sklearn/preprocessing/_data.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/sklearn/preprocessing/_data.py b/sklearn/preprocessing/_data.py
index e7a80a94c8498..b4796b9fd019b 100644
--- a/sklearn/preprocessing/_data.py
+++ b/sklearn/preprocessing/_data.py
@@ -2262,8 +2262,8 @@ def _dense_fit(self, X, random_state):
                 col = col.take(subsample_idx, mode='clip')
             self.quantiles_.append(np.nanpercentile(col, references))
         self.quantiles_ = np.transpose(self.quantiles_)
-        self.quantiles_ = np.minimum.accumulate(
-                                    self.quantiles_[::-1])[::-1]
+        self.quantiles_ = np.maximum.accumulate(
+                                    self.quantiles_)
 
     def _sparse_fit(self, X, random_state):
         """Compute percentiles for sparse matrices.
@@ -2307,8 +2307,8 @@ def _sparse_fit(self, X, random_state):
                 self.quantiles_.append(
                         np.nanpercentile(column_data, references))
         self.quantiles_ = np.transpose(self.quantiles_)
-        self.quantiles_ = np.minimum.accumulate(
-                                        self.quantiles_[::-1])[::-1]
+        self.quantiles_ = np.maximum.accumulate(
+                                        self.quantiles_)
 
     def fit(self, X, y=None):
         """Compute the quantiles used for transforming.

From 1196430bac1c802d2bb0e3532794a74741725a22 Mon Sep 17 00:00:00 2001
From: tirthasheshpatel <tirthasheshpatel@gmail.com>
Date: Mon, 2 Dec 2019 19:02:25 +0530
Subject: [PATCH 03/11] add comment explaining changes

---
 sklearn/preprocessing/_data.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/preprocessing/_data.py b/sklearn/preprocessing/_data.py
index b4796b9fd019b..25ad9a13f4401 100644
--- a/sklearn/preprocessing/_data.py
+++ b/sklearn/preprocessing/_data.py
@@ -2307,8 +2307,8 @@ def _sparse_fit(self, X, random_state):
                 self.quantiles_.append(
                         np.nanpercentile(column_data, references))
         self.quantiles_ = np.transpose(self.quantiles_)
-        self.quantiles_ = np.maximum.accumulate(
-                                        self.quantiles_)
+        # make sure the quantiles are monotonically increasing
+        self.quantiles_ = np.maximum.accumulate(self.quantiles_)
 
     def fit(self, X, y=None):
         """Compute the quantiles used for transforming.

From 3816224620255a982a8a279e4c4633b5267c81f0 Mon Sep 17 00:00:00 2001
From: Tirth Patel <tirthasheshpatel@gmail.com>
Date: Mon, 2 Dec 2019 21:23:26 +0530
Subject: [PATCH 04/11] Update _data.py

---
 sklearn/preprocessing/_data.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/sklearn/preprocessing/_data.py b/sklearn/preprocessing/_data.py
index 25ad9a13f4401..a1fbbe8d38b92 100644
--- a/sklearn/preprocessing/_data.py
+++ b/sklearn/preprocessing/_data.py
@@ -2262,8 +2262,7 @@ def _dense_fit(self, X, random_state):
                 col = col.take(subsample_idx, mode='clip')
             self.quantiles_.append(np.nanpercentile(col, references))
         self.quantiles_ = np.transpose(self.quantiles_)
-        self.quantiles_ = np.maximum.accumulate(
-                                    self.quantiles_)
+        self.quantiles_ = np.maximum.accumulate(self.quantiles_)
 
     def _sparse_fit(self, X, random_state):
         """Compute percentiles for sparse matrices.

From b47a78699f638c8a8c19b44e3f6646aa2c9d6052 Mon Sep 17 00:00:00 2001
From: Tirth Patel <tirthasheshpatel@gmail.com>
Date: Mon, 2 Dec 2019 21:24:17 +0530
Subject: [PATCH 05/11] Update _data.py

---
 sklearn/preprocessing/_data.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sklearn/preprocessing/_data.py b/sklearn/preprocessing/_data.py
index a1fbbe8d38b92..16c7b3214661f 100644
--- a/sklearn/preprocessing/_data.py
+++ b/sklearn/preprocessing/_data.py
@@ -2262,6 +2262,7 @@ def _dense_fit(self, X, random_state):
                 col = col.take(subsample_idx, mode='clip')
             self.quantiles_.append(np.nanpercentile(col, references))
         self.quantiles_ = np.transpose(self.quantiles_)
+        # make sure the quantiles are monotonically increasing
         self.quantiles_ = np.maximum.accumulate(self.quantiles_)
 
     def _sparse_fit(self, X, random_state):

From 3eb3aa9f7c34232674806ae3414413f7d9d1d9d5 Mon Sep 17 00:00:00 2001
From: Tirth Patel <tirthasheshpatel@gmail.com>
Date: Fri, 6 Dec 2019 22:33:52 +0530
Subject: [PATCH 06/11] Update sklearn/preprocessing/_data.py

Add suggested docstring...

Co-Authored-By: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/preprocessing/_data.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sklearn/preprocessing/_data.py b/sklearn/preprocessing/_data.py
index 16c7b3214661f..125049bd09b84 100644
--- a/sklearn/preprocessing/_data.py
+++ b/sklearn/preprocessing/_data.py
@@ -2262,7 +2262,8 @@ def _dense_fit(self, X, random_state):
                 col = col.take(subsample_idx, mode='clip')
             self.quantiles_.append(np.nanpercentile(col, references))
         self.quantiles_ = np.transpose(self.quantiles_)
-        # make sure the quantiles are monotonically increasing
+        # due to floating-point precision error in `np.nanpercentile`,
+        # make sure that quantiles are monotonically increasing
         self.quantiles_ = np.maximum.accumulate(self.quantiles_)
 
     def _sparse_fit(self, X, random_state):

From ebe75907df9bdf02735fdc038ea7910522960149 Mon Sep 17 00:00:00 2001
From: Tirth Patel <tirthasheshpatel@gmail.com>
Date: Fri, 6 Dec 2019 22:34:19 +0530
Subject: [PATCH 07/11] Update sklearn/preprocessing/_data.py

Add suggested docstring

Co-Authored-By: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 sklearn/preprocessing/_data.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sklearn/preprocessing/_data.py b/sklearn/preprocessing/_data.py
index 125049bd09b84..64691d83b0651 100644
--- a/sklearn/preprocessing/_data.py
+++ b/sklearn/preprocessing/_data.py
@@ -2308,6 +2308,7 @@ def _sparse_fit(self, X, random_state):
                 self.quantiles_.append(
                         np.nanpercentile(column_data, references))
         self.quantiles_ = np.transpose(self.quantiles_)
+        # due to floating-point precision error in `np.nanpercentile`,
         # make sure the quantiles are monotonically increasing
         self.quantiles_ = np.maximum.accumulate(self.quantiles_)
 

From ec222ea30c0de571cc5e1d0397dd6a1ba5924b61 Mon Sep 17 00:00:00 2001
From: tirthasheshpatel <tirthasheshpatel@gmail.com>
Date: Fri, 6 Dec 2019 22:47:01 +0530
Subject: [PATCH 08/11] add fix in whats_new

---
 doc/whats_new/v0.22.rst | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/doc/whats_new/v0.22.rst b/doc/whats_new/v0.22.rst
index e14601c1b52a7..ff3d922ddfbad 100644
--- a/doc/whats_new/v0.22.rst
+++ b/doc/whats_new/v0.22.rst
@@ -768,6 +768,10 @@ Changelog
   :class:`preprocessing.KernelCenterer`
   :pr:`14336` by :user:`Gregory Dexter <gdex1>`.
 
+- |Fix| :class:`perprocessing.QuantileTransformer` now guarantees the 
+  `quantiles_` attribute to be completely sorted in non-decreasing manner.
+  :pr:`15751` by :user:`Tirth Patel <tirthasheshpatel>`.
+
 :mod:`sklearn.model_selection`
 ..............................
 

From c9cf96e7393b6a02943284bb9aeaf290f15f3f6d Mon Sep 17 00:00:00 2001
From: Tirth Patel <tirthasheshpatel@gmail.com>
Date: Sat, 7 Dec 2019 16:42:17 +0530
Subject: [PATCH 09/11] Update doc/whats_new/v0.22.rst

Co-Authored-By: fcharras <franck@sancare.fr>
---
 doc/whats_new/v0.22.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/whats_new/v0.22.rst b/doc/whats_new/v0.22.rst
index ff3d922ddfbad..484600701a2a4 100644
--- a/doc/whats_new/v0.22.rst
+++ b/doc/whats_new/v0.22.rst
@@ -768,7 +768,7 @@ Changelog
   :class:`preprocessing.KernelCenterer`
   :pr:`14336` by :user:`Gregory Dexter <gdex1>`.
 
-- |Fix| :class:`perprocessing.QuantileTransformer` now guarantees the 
+- |Fix| :class:`preprocessing.QuantileTransformer` now guarantees the 
   `quantiles_` attribute to be completely sorted in non-decreasing manner.
   :pr:`15751` by :user:`Tirth Patel <tirthasheshpatel>`.
 

From 911859db678735a6a5b9b0a9e4dd785654b20fda Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Sat, 21 Dec 2019 15:36:15 +0100
Subject: [PATCH 10/11] Add non-regression test + reference to upstream numpy
 issue

---
 sklearn/preprocessing/_data.py           |  8 ++++++--
 sklearn/preprocessing/tests/test_data.py | 21 +++++++++++++++++++++
 2 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/sklearn/preprocessing/_data.py b/sklearn/preprocessing/_data.py
index 64691d83b0651..27a752813dd2a 100644
--- a/sklearn/preprocessing/_data.py
+++ b/sklearn/preprocessing/_data.py
@@ -2262,8 +2262,10 @@ def _dense_fit(self, X, random_state):
                 col = col.take(subsample_idx, mode='clip')
             self.quantiles_.append(np.nanpercentile(col, references))
         self.quantiles_ = np.transpose(self.quantiles_)
-        # due to floating-point precision error in `np.nanpercentile`,
-        # make sure that quantiles are monotonically increasing
+        # Due to floating-point precision error in `np.nanpercentile`,
+        # make sure that quantiles are monotonically increasing.
+        # Upstream issue in numpy:
+        # https://github.com/numpy/numpy/issues/14685
         self.quantiles_ = np.maximum.accumulate(self.quantiles_)
 
     def _sparse_fit(self, X, random_state):
@@ -2310,6 +2312,8 @@ def _sparse_fit(self, X, random_state):
         self.quantiles_ = np.transpose(self.quantiles_)
         # due to floating-point precision error in `np.nanpercentile`,
         # make sure the quantiles are monotonically increasing
+        # Upstream issue in numpy:
+        # https://github.com/numpy/numpy/issues/14685
         self.quantiles_ = np.maximum.accumulate(self.quantiles_)
 
     def fit(self, X, y=None):
diff --git a/sklearn/preprocessing/tests/test_data.py b/sklearn/preprocessing/tests/test_data.py
index 060719200fa99..5926bff21acd4 100644
--- a/sklearn/preprocessing/tests/test_data.py
+++ b/sklearn/preprocessing/tests/test_data.py
@@ -1533,6 +1533,27 @@ def test_quantile_transform_nan():
     assert not np.isnan(transformer.quantiles_[:, 1:]).any()
 
 
+@pytest.mark.parametrize("sparse_data", [False, True])
+def test_quantile_transformer_sorted_quantiles(sparse_data):
+    # Non-regression test for:
+    # https://github.com/scikit-learn/scikit-learn/issues/15733
+    # Taken from upstream bug report:
+    # https://github.com/numpy/numpy/issues/14685
+    X = np.array([0, 1, 1, 2, 2, 3, 3, 4, 5, 5, 1, 1, 9, 9, 9, 8, 8, 7] * 10)
+    X = 0.1 * X.reshape(-1, 1)
+    if sparse_data:
+        X = sparse.csc_matrix(X)
+
+    n_quantiles = 100
+    qt = QuantileTransformer(n_quantiles=n_quantiles).fit(X)
+
+    # Check that the estimated quantile threasholds are monotically
+    # increasing:
+    quantiles = qt.quantiles_[:, 0]
+    assert len(quantiles) == 100
+    assert all(np.diff(quantiles) >= 0)
+
+
 def test_deprecated_quantile_transform_copy():
     future_message = ("The default value of `copy` will change from False to "
                       "True in 0.23 in order to make it more consistent with "

From f81b17c9cc598fc4413ce145de8e947464fb3de2 Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Sat, 21 Dec 2019 15:48:22 +0100
Subject: [PATCH 11/11] Use _convert_container

---
 sklearn/preprocessing/tests/test_data.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/sklearn/preprocessing/tests/test_data.py b/sklearn/preprocessing/tests/test_data.py
index a7d49d856a7d1..cdff446cb336c 100644
--- a/sklearn/preprocessing/tests/test_data.py
+++ b/sklearn/preprocessing/tests/test_data.py
@@ -25,6 +25,7 @@
 from sklearn.utils._testing import assert_allclose
 from sklearn.utils._testing import assert_allclose_dense_sparse
 from sklearn.utils._testing import skip_if_32bit
+from sklearn.utils._testing import _convert_container
 
 from sklearn.utils.sparsefuncs import mean_variance_axis
 from sklearn.preprocessing._data import _handle_zeros_in_scale
@@ -1532,16 +1533,15 @@ def test_quantile_transform_nan():
     assert not np.isnan(transformer.quantiles_[:, 1:]).any()
 
 
-@pytest.mark.parametrize("sparse_data", [False, True])
-def test_quantile_transformer_sorted_quantiles(sparse_data):
+@pytest.mark.parametrize("array_type", ['array', 'sparse'])
+def test_quantile_transformer_sorted_quantiles(array_type):
     # Non-regression test for:
     # https://github.com/scikit-learn/scikit-learn/issues/15733
     # Taken from upstream bug report:
     # https://github.com/numpy/numpy/issues/14685
     X = np.array([0, 1, 1, 2, 2, 3, 3, 4, 5, 5, 1, 1, 9, 9, 9, 8, 8, 7] * 10)
     X = 0.1 * X.reshape(-1, 1)
-    if sparse_data:
-        X = sparse.csc_matrix(X)
+    X = _convert_container(X, array_type)
 
     n_quantiles = 100
     qt = QuantileTransformer(n_quantiles=n_quantiles).fit(X)