From 967d792eab04173ac1a1b7743f215465f1cdb369 Mon Sep 17 00:00:00 2001
From: Wally <wallygauze@gmail.com>
Date: Wed, 1 Mar 2017 20:57:37 +0000
Subject: [PATCH 01/23] fixed issue 8484

---
 sklearn/decomposition/pca.py | 42 ++++++++++++++++++++++--------------
 1 file changed, 26 insertions(+), 16 deletions(-)

diff --git a/sklearn/decomposition/pca.py b/sklearn/decomposition/pca.py
index eb11d9b032106..e8cf5f8737c93 100644
--- a/sklearn/decomposition/pca.py
+++ b/sklearn/decomposition/pca.py
@@ -134,8 +134,11 @@ class PCA(_BasePCA):
         to guess the dimension
         if ``0 < n_components < 1`` and svd_solver == 'full', select the number
         of components such that the amount of variance that needs to be
-        explained is greater than the percentage specified by n_components
-        n_components cannot be equal to n_features for svd_solver == 'arpack'.
+        explained is greater than the percentage specified by n_components.
+        if svd_solver == 'arpack', the number of components must be strictly
+        less than the minimum of n_features and n_samples:
+
+            n_components == min(n_samples, n_features)
 
     copy : bool (default True)
         If False, data passed to fit are overwritten and running
@@ -166,7 +169,7 @@ class PCA(_BasePCA):
         arpack :
             run SVD truncated to n_components calling ARPACK solver via
             `scipy.sparse.linalg.svds`. It requires strictly
-            0 < n_components < X.shape[1]
+            0 < n_components < min(X.shape)
         randomized :
             run randomized SVD by the method of Halko et al.
 
@@ -207,7 +210,7 @@ class PCA(_BasePCA):
         Percentage of variance explained by each of the selected components.
 
         If ``n_components`` is not set then all components are stored and the
-        sum of explained variances is equal to 1.0.
+        sum of the ratios is equal to 1.0.
 
     singular_values_ : array, shape (n_components,)
         The singular values corresponding to each of the selected components.
@@ -223,7 +226,8 @@ class PCA(_BasePCA):
         The estimated number of components. When n_components is set
         to 'mle' or a number between 0 and 1 (with svd_solver == 'full') this
         number is estimated from input data. Otherwise it equals the parameter
-        n_components, or n_features if n_components is None.
+        n_components, or the lesser value of n_features and n_samples
+        if n_components is None.
 
     noise_variance_ : float
         The estimated noise covariance following the Probabilistic PCA model
@@ -367,7 +371,7 @@ def _fit(self, X):
 
         # Handle n_components==None
         if self.n_components is None:
-            n_components = X.shape[1]
+            n_components = min(X.shape)
         else:
             n_components = self.n_components
 
@@ -400,10 +404,11 @@ def _fit_full(self, X, n_components):
             if n_samples < n_features:
                 raise ValueError("n_components='mle' is only supported "
                                  "if n_samples >= n_features")
-        elif not 0 <= n_components <= n_features:
+        elif not 0 <= n_components <= min(n_samples, n_features):
             raise ValueError("n_components=%r must be between 0 and "
-                             "n_features=%r with svd_solver='full'"
-                             % (n_components, n_features))
+                             "min(n_samples, n_features)=%r with "
+                             "svd_solver='full'"
+                             % (n_components, min(n_samples, n_features)))
 
         # Center data
         self.mean_ = np.mean(X, axis=0)
@@ -458,14 +463,19 @@ def _fit_truncated(self, X, n_components, svd_solver):
             raise ValueError("n_components=%r cannot be a string "
                              "with svd_solver='%s'"
                              % (n_components, svd_solver))
-        elif not 1 <= n_components <= n_features:
+        elif not 1 <= n_components <= min(n_samples, n_features):
             raise ValueError("n_components=%r must be between 1 and "
-                             "n_features=%r with svd_solver='%s'"
-                             % (n_components, n_features, svd_solver))
-        elif svd_solver == 'arpack' and n_components == n_features:
+                             "min(n_samples, n_features)=%r with "
+                             "svd_solver='%s'"
+                             % (n_components, min(n_samples, n_features),
+                             svd_solver))
+        elif svd_solver == 'arpack' and n_components == min(n_samples,
+        n_features):
             raise ValueError("n_components=%r must be stricly less than "
-                             "n_features=%r with svd_solver='%s'"
-                             % (n_components, n_features, svd_solver))
+                             "min(n_samples, n_features)=%r with "
+                             "svd_solver='%s'"
+                             % (n_components, min(n_samples, n_features),
+                             svd_solver))
 
         random_state = check_random_state(self.random_state)
 
@@ -500,7 +510,7 @@ def _fit_truncated(self, X, n_components, svd_solver):
         self.explained_variance_ratio_ = \
             self.explained_variance_ / total_var.sum()
         self.singular_values_ = S.copy()  # Store the singular values.
-        if self.n_components_ < n_features:
+        if self.n_components_ < min(n_samples, n_features):
             self.noise_variance_ = (total_var.sum() -
                                     self.explained_variance_.sum())
         else:

From 8ffff6fe0048d3b1ba0dc7b6c90aaf646dc94051 Mon Sep 17 00:00:00 2001
From: Wally <wallygauze@gmail.com>
Date: Mon, 6 Mar 2017 15:20:54 +0000
Subject: [PATCH 02/23] dealt with indentation issues flagged by flake8

---
 sklearn/decomposition/pca.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sklearn/decomposition/pca.py b/sklearn/decomposition/pca.py
index e8cf5f8737c93..0a672bf02e3ba 100644
--- a/sklearn/decomposition/pca.py
+++ b/sklearn/decomposition/pca.py
@@ -468,14 +468,14 @@ def _fit_truncated(self, X, n_components, svd_solver):
                              "min(n_samples, n_features)=%r with "
                              "svd_solver='%s'"
                              % (n_components, min(n_samples, n_features),
-                             svd_solver))
+                                svd_solver))
         elif svd_solver == 'arpack' and n_components == min(n_samples,
-        n_features):
+                                                            n_features):
             raise ValueError("n_components=%r must be stricly less than "
                              "min(n_samples, n_features)=%r with "
                              "svd_solver='%s'"
                              % (n_components, min(n_samples, n_features),
-                             svd_solver))
+                                svd_solver))
 
         random_state = check_random_state(self.random_state)
 

From cbdffc4927f11755d2b01e20d3838ee102130d83 Mon Sep 17 00:00:00 2001
From: Wally <wallygauze@gmail.com>
Date: Wed, 8 Mar 2017 07:35:33 +0000
Subject: [PATCH 03/23] code to handle n_components==None with arpack was
 missing

---
 sklearn/decomposition/pca.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/sklearn/decomposition/pca.py b/sklearn/decomposition/pca.py
index 0a672bf02e3ba..0aee4c721a6e1 100644
--- a/sklearn/decomposition/pca.py
+++ b/sklearn/decomposition/pca.py
@@ -371,7 +371,10 @@ def _fit(self, X):
 
         # Handle n_components==None
         if self.n_components is None:
-            n_components = min(X.shape)
+            if self.svd_solver is not 'arpack':
+                n_components = min(X.shape)
+            else:
+                n_components = min(X.shape) - 1
         else:
             n_components = self.n_components
 

From 279184c47e414bec05bf4a0e98342088f84d4cef Mon Sep 17 00:00:00 2001
From: Wally <wallygauze@gmail.com>
Date: Wed, 8 Mar 2017 07:37:05 +0000
Subject: [PATCH 04/23] added non-regression tests for my previous changes in
 pca

---
 sklearn/decomposition/tests/test_pca.py | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/sklearn/decomposition/tests/test_pca.py b/sklearn/decomposition/tests/test_pca.py
index ea321089d719c..44befd421b2a4 100644
--- a/sklearn/decomposition/tests/test_pca.py
+++ b/sklearn/decomposition/tests/test_pca.py
@@ -340,11 +340,26 @@ def test_pca_inverse():
 
 
 def test_pca_validation():
-    X = [[0, 1], [1, 0]]
+    # Ensures that extreme inputs for n_components common to all solvers
+    # (less than 0 or more than the lesser dimension of the input
+    # matrix X) raise errors.
+    X = np.array([[0, 1, 0], [1, 0, 0]])
     for solver in solver_list:
-        for n_components in [-1, 3]:
+        for n_comp in [-1, 3]:
             assert_raises(ValueError,
-                          PCA(n_components, svd_solver=solver).fit, X)
+                          PCA(n_components=n_comp, svd_solver=solver).fit, X)
+
+
+def test_n_components_none():
+    # Ensures that n_components == None is handled correctly
+    X = iris.data
+    for solver in solver_list:
+        pca = PCA(svd_solver=solver)
+        pca.fit(X)
+        if solver == 'arpack':
+            assert_equal(pca.n_components_, min(X.shape)-1)
+        else:
+            assert_equal(pca.n_components_, min(X.shape))
 
 
 def test_randomized_pca_check_projection():

From 4d093ab3b89284ac1b399e46772dc838b28cadb7 Mon Sep 17 00:00:00 2001
From: Wally <wallygauze@gmail.com>
Date: Tue, 4 Apr 2017 21:15:32 +0100
Subject: [PATCH 05/23] minor change: reverted iterator name in test_pca

---
 sklearn/decomposition/tests/test_pca.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/decomposition/tests/test_pca.py b/sklearn/decomposition/tests/test_pca.py
index 44befd421b2a4..d88ce339baa1c 100644
--- a/sklearn/decomposition/tests/test_pca.py
+++ b/sklearn/decomposition/tests/test_pca.py
@@ -345,9 +345,9 @@ def test_pca_validation():
     # matrix X) raise errors.
     X = np.array([[0, 1, 0], [1, 0, 0]])
     for solver in solver_list:
-        for n_comp in [-1, 3]:
+        for n_components in [-1, 3]:
             assert_raises(ValueError,
-                          PCA(n_components=n_comp, svd_solver=solver).fit, X)
+                          PCA(n_components, svd_solver=solver).fit, X)
 
 
 def test_n_components_none():

From 0f3810164533320a9d83811385f39659d53a4a38 Mon Sep 17 00:00:00 2001
From: Wally <wallygauze@gmail.com>
Date: Fri, 7 Apr 2017 14:46:25 +0100
Subject: [PATCH 06/23] changed AssertRaises to regex variant in test, and
 minor writing change in docs

---
 sklearn/decomposition/pca.py            | 2 +-
 sklearn/decomposition/tests/test_pca.py | 8 ++++++--
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/sklearn/decomposition/pca.py b/sklearn/decomposition/pca.py
index 0aee4c721a6e1..30e77ceff81de 100644
--- a/sklearn/decomposition/pca.py
+++ b/sklearn/decomposition/pca.py
@@ -135,7 +135,7 @@ class PCA(_BasePCA):
         if ``0 < n_components < 1`` and svd_solver == 'full', select the number
         of components such that the amount of variance that needs to be
         explained is greater than the percentage specified by n_components.
-        if svd_solver == 'arpack', the number of components must be strictly
+        If svd_solver == 'arpack', the number of components must be strictly
         less than the minimum of n_features and n_samples:
 
             n_components == min(n_samples, n_features)
diff --git a/sklearn/decomposition/tests/test_pca.py b/sklearn/decomposition/tests/test_pca.py
index d88ce339baa1c..b7f8b942aaace 100644
--- a/sklearn/decomposition/tests/test_pca.py
+++ b/sklearn/decomposition/tests/test_pca.py
@@ -8,6 +8,7 @@
 from sklearn.utils.testing import assert_equal
 from sklearn.utils.testing import assert_greater
 from sklearn.utils.testing import assert_raises
+from sklearn.utils.testing import assert_raises_regex
 from sklearn.utils.testing import assert_no_warnings
 from sklearn.utils.testing import assert_warns_message
 from sklearn.utils.testing import ignore_warnings
@@ -346,8 +347,11 @@ def test_pca_validation():
     X = np.array([[0, 1, 0], [1, 0, 0]])
     for solver in solver_list:
         for n_components in [-1, 3]:
-            assert_raises(ValueError,
-                          PCA(n_components, svd_solver=solver).fit, X)
+            assert_raises_regex(ValueError,
+                                "n_components\=.* must be between .* and min\("
+                                "n_samples, n_features\)\=.* with svd_solver"
+                                "\=\'(?:full|arpack|randomized|auto)\'$",
+                                PCA(n_components, svd_solver=solver).fit, X)
 
 
 def test_n_components_none():

From 4ee548c75e2f0384961144f0bcc989853aab6257 Mon Sep 17 00:00:00 2001
From: Wally <wallygauze@gmail.com>
Date: Sat, 27 May 2017 21:05:09 +0100
Subject: [PATCH 07/23] corrected pca.py fix

---
 sklearn/decomposition/pca.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/decomposition/pca.py b/sklearn/decomposition/pca.py
index 30e77ceff81de..db0af4e855a2b 100644
--- a/sklearn/decomposition/pca.py
+++ b/sklearn/decomposition/pca.py
@@ -371,7 +371,7 @@ def _fit(self, X):
 
         # Handle n_components==None
         if self.n_components is None:
-            if self.svd_solver is not 'arpack':
+            if self.svd_solver != 'arpack':
                 n_components = min(X.shape)
             else:
                 n_components = min(X.shape) - 1

From b72ffe41a5809692be1d423bec33288be08e0461 Mon Sep 17 00:00:00 2001
From: Wally <wallygauze@gmail.com>
Date: Sat, 27 May 2017 21:07:19 +0100
Subject: [PATCH 08/23] improved test_pca_validation()'s scope

---
 sklearn/decomposition/tests/test_pca.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/sklearn/decomposition/tests/test_pca.py b/sklearn/decomposition/tests/test_pca.py
index b7f8b942aaace..66a2d1a2fb1e8 100644
--- a/sklearn/decomposition/tests/test_pca.py
+++ b/sklearn/decomposition/tests/test_pca.py
@@ -352,6 +352,12 @@ def test_pca_validation():
                                 "n_samples, n_features\)\=.* with svd_solver"
                                 "\=\'(?:full|arpack|randomized|auto)\'$",
                                 PCA(n_components, svd_solver=solver).fit, X)
+            # We conduct the same test on X.T so that it is invariant to axis.
+            assert_raises_regex(ValueError,
+                                "n_components\=.* must be between .* and min\("
+                                "n_samples, n_features\)\=.* with svd_solver"
+                                "\=\'(?:full|arpack|randomized|auto)\'$",
+                                PCA(n_components, svd_solver=solver).fit, X.T)
 
 
 def test_n_components_none():
@@ -361,7 +367,7 @@ def test_n_components_none():
         pca = PCA(svd_solver=solver)
         pca.fit(X)
         if solver == 'arpack':
-            assert_equal(pca.n_components_, min(X.shape)-1)
+            assert_equal(pca.n_components_, min(X.shape) - 1)
         else:
             assert_equal(pca.n_components_, min(X.shape))
 

From 110cd1860c26e7190f86bfcc7945a76106896129 Mon Sep 17 00:00:00 2001
From: Wally <wallygauze@gmail.com>
Date: Tue, 15 Aug 2017 16:23:18 +0100
Subject: [PATCH 09/23] added an entry to whats_new.rst

---
 doc/whats_new.rst | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/doc/whats_new.rst b/doc/whats_new.rst
index 258dfe19b33cb..c10b4f86111e2 100644
--- a/doc/whats_new.rst
+++ b/doc/whats_new.rst
@@ -51,6 +51,11 @@ Decomposition, manifold learning and clustering
   division on Python 2 versions. :issue:`9492` by
   :user:`James Bourbeau <jrbourbeau>`.
 
+- In :class:`decomposition.pca` selecting a n_components parameter greater than
+  the number of samples failed to raise an error.
+  Similarly, the ``n_components=None`` case now selects the minimum of
+  n_samples and n_features. :issue:`6452`. By :user:`Wally Gauze <wallygauze>`.
+
 Version 0.19
 ============
 

From c9049f9a2e942aa049914ec81ce931bdcb6bbb98 Mon Sep 17 00:00:00 2001
From: wallygauze <wallygauze@yahoo.fr>
Date: Tue, 15 Aug 2017 20:42:19 +0100
Subject: [PATCH 10/23] add requested code for axis-invariance check

---
 sklearn/decomposition/tests/test_pca.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/sklearn/decomposition/tests/test_pca.py b/sklearn/decomposition/tests/test_pca.py
index 6b0836f9e15aa..1c52e2cc1edce 100644
--- a/sklearn/decomposition/tests/test_pca.py
+++ b/sklearn/decomposition/tests/test_pca.py
@@ -379,6 +379,16 @@ def test_n_components_none():
             assert_equal(pca.n_components_, min(X.shape) - 1)
         else:
             assert_equal(pca.n_components_, min(X.shape))
+    
+    # We conduct the same test on X.T so that it is invariant to axis.
+    X_2 = X.T
+    for solver in solver_list:
+        pca = PCA(svd_solver=solver)
+        pca.fit(X_2)
+        if solver == 'arpack':
+            assert_equal(pca.n_components_, min(X_2.shape) - 1)
+        else:
+            assert_equal(pca.n_components_, min(X_2.shape))
 
 
 def test_randomized_pca_check_projection():

From c89ef02e727307f331881e0334bc674902cf7e8b Mon Sep 17 00:00:00 2001
From: wallygauze <wallygauze@yahoo.fr>
Date: Tue, 15 Aug 2017 20:48:57 +0100
Subject: [PATCH 11/23] Clarified doc change

---
 sklearn/decomposition/pca.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/sklearn/decomposition/pca.py b/sklearn/decomposition/pca.py
index 1b043ff2b3f2b..337396212bb87 100644
--- a/sklearn/decomposition/pca.py
+++ b/sklearn/decomposition/pca.py
@@ -136,9 +136,10 @@ class PCA(_BasePCA):
         of components such that the amount of variance that needs to be
         explained is greater than the percentage specified by n_components.
         If svd_solver == 'arpack', the number of components must be strictly
-        less than the minimum of n_features and n_samples:
-
-            n_components == min(n_samples, n_features)
+        less than the minimum of n_features and n_samples. 
+        Hence, the None case results in:
+        
+            n_components == min(n_samples, n_features) - 1
 
     copy : bool (default True)
         If False, data passed to fit are overwritten and running

From b91fa3b61a3817bcc062f4791528d93478d0b763 Mon Sep 17 00:00:00 2001
From: Wally <wallygauze@gmail.com>
Date: Tue, 15 Aug 2017 22:42:09 +0100
Subject: [PATCH 12/23] rephrased whats_new entry

---
 doc/whats_new.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/whats_new.rst b/doc/whats_new.rst
index c10b4f86111e2..4e7305888bc99 100644
--- a/doc/whats_new.rst
+++ b/doc/whats_new.rst
@@ -52,9 +52,9 @@ Decomposition, manifold learning and clustering
   :user:`James Bourbeau <jrbourbeau>`.
 
 - In :class:`decomposition.pca` selecting a n_components parameter greater than
-  the number of samples failed to raise an error.
+  the number of samples now raises an error.
   Similarly, the ``n_components=None`` case now selects the minimum of
-  n_samples and n_features. :issue:`6452`. By :user:`Wally Gauze <wallygauze>`.
+  n_samples and n_features. :issue:`8484`. By :user:`Wally Gauze <wallygauze>`.
 
 Version 0.19
 ============

From d44986827fb9e2049e5882dcd84d6fb26c2aa08b Mon Sep 17 00:00:00 2001
From: Wally <wallygauze@gmail.com>
Date: Tue, 15 Aug 2017 22:42:50 +0100
Subject: [PATCH 13/23] fixed flake8

---
 sklearn/decomposition/pca.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/decomposition/pca.py b/sklearn/decomposition/pca.py
index 337396212bb87..1178f8e540552 100644
--- a/sklearn/decomposition/pca.py
+++ b/sklearn/decomposition/pca.py
@@ -136,9 +136,9 @@ class PCA(_BasePCA):
         of components such that the amount of variance that needs to be
         explained is greater than the percentage specified by n_components.
         If svd_solver == 'arpack', the number of components must be strictly
-        less than the minimum of n_features and n_samples. 
+        less than the minimum of n_features and n_samples.
         Hence, the None case results in:
-        
+
             n_components == min(n_samples, n_features) - 1
 
     copy : bool (default True)

From 07c1e1d8945aa195855b4b375ed23610f5cd8b0d Mon Sep 17 00:00:00 2001
From: Wally <wallygauze@gmail.com>
Date: Tue, 15 Aug 2017 22:43:16 +0100
Subject: [PATCH 14/23] refactored test code

---
 sklearn/decomposition/tests/test_pca.py | 48 ++++++++++---------------
 1 file changed, 19 insertions(+), 29 deletions(-)

diff --git a/sklearn/decomposition/tests/test_pca.py b/sklearn/decomposition/tests/test_pca.py
index 1c52e2cc1edce..c4073330a48fb 100644
--- a/sklearn/decomposition/tests/test_pca.py
+++ b/sklearn/decomposition/tests/test_pca.py
@@ -354,41 +354,31 @@ def test_pca_validation():
     # (less than 0 or more than the lesser dimension of the input
     # matrix X) raise errors.
     X = np.array([[0, 1, 0], [1, 0, 0]])
-    for solver in solver_list:
-        for n_components in [-1, 3]:
-            assert_raises_regex(ValueError,
-                                "n_components\=.* must be between .* and min\("
-                                "n_samples, n_features\)\=.* with svd_solver"
-                                "\=\'(?:full|arpack|randomized|auto)\'$",
-                                PCA(n_components, svd_solver=solver).fit, X)
-            # We conduct the same test on X.T so that it is invariant to axis.
-            assert_raises_regex(ValueError,
-                                "n_components\=.* must be between .* and min\("
-                                "n_samples, n_features\)\=.* with svd_solver"
-                                "\=\'(?:full|arpack|randomized|auto)\'$",
-                                PCA(n_components, svd_solver=solver).fit, X.T)
+    # We conduct the same test on X.T so that it is invariant to axis.
+    for data in [X, X.T]:
+        for solver in solver_list:
+            for n_components in [-1, 3]:
+                assert_raises_regex(ValueError,
+                                    "n_components\=.* must be between .* and "
+                                    "min\(n_samples, n_features\)\=.* with "
+                                    "svd_solver\="
+                                    "\'(?:full|arpack|randomized|auto)\'$",
+                                    PCA(n_components, svd_solver=solver).fit,
+                                    data)
 
 
 def test_n_components_none():
     # Ensures that n_components == None is handled correctly
     X = iris.data
-    for solver in solver_list:
-        pca = PCA(svd_solver=solver)
-        pca.fit(X)
-        if solver == 'arpack':
-            assert_equal(pca.n_components_, min(X.shape) - 1)
-        else:
-            assert_equal(pca.n_components_, min(X.shape))
-    
     # We conduct the same test on X.T so that it is invariant to axis.
-    X_2 = X.T
-    for solver in solver_list:
-        pca = PCA(svd_solver=solver)
-        pca.fit(X_2)
-        if solver == 'arpack':
-            assert_equal(pca.n_components_, min(X_2.shape) - 1)
-        else:
-            assert_equal(pca.n_components_, min(X_2.shape))
+    for data in [X, X.T]:
+        for solver in solver_list:
+            pca = PCA(svd_solver=solver)
+            pca.fit(data)
+            if solver == 'arpack':
+                assert_equal(pca.n_components_, min(data.shape) - 1)
+            else:
+                assert_equal(pca.n_components_, min(data.shape))
 
 
 def test_randomized_pca_check_projection():

From 724e612f9910155db0be27f983e49bf9281a3ec2 Mon Sep 17 00:00:00 2001
From: Wally <wallygauze@gmail.com>
Date: Fri, 18 Aug 2017 20:00:22 +0100
Subject: [PATCH 15/23] corrected whats_new entry typo

---
 doc/whats_new.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/whats_new.rst b/doc/whats_new.rst
index 4e7305888bc99..cfe3b1f475d24 100644
--- a/doc/whats_new.rst
+++ b/doc/whats_new.rst
@@ -41,7 +41,7 @@ Bug fixes
 
 Decomposition, manifold learning and clustering
 
-- Fix for uninformative error in :class:`decomposition.incremental_pca`:
+- Fix for uninformative error in :class:`decomposition.IncrementalPCA`:
   now an error is raised if the number of components is larger than the
   chosen batch size. The ``n_components=None`` case was adapted accordingly.
   :issue:`6452`. By :user:`Wally Gauze <wallygauze>`.
@@ -51,7 +51,7 @@ Decomposition, manifold learning and clustering
   division on Python 2 versions. :issue:`9492` by
   :user:`James Bourbeau <jrbourbeau>`.
 
-- In :class:`decomposition.pca` selecting a n_components parameter greater than
+- In :class:`decomposition.PCA` selecting a n_components parameter greater than
   the number of samples now raises an error.
   Similarly, the ``n_components=None`` case now selects the minimum of
   n_samples and n_features. :issue:`8484`. By :user:`Wally Gauze <wallygauze>`.

From 2251ae5046495523d385a5f294da846bdeb36ffc Mon Sep 17 00:00:00 2001
From: Wally <wallygauze@gmail.com>
Date: Fri, 18 Aug 2017 20:01:55 +0100
Subject: [PATCH 16/23] arpack case was missing from test; improved overall
 test

---
 sklearn/decomposition/tests/test_pca.py | 27 ++++++++++++++++++-------
 1 file changed, 20 insertions(+), 7 deletions(-)

diff --git a/sklearn/decomposition/tests/test_pca.py b/sklearn/decomposition/tests/test_pca.py
index c4073330a48fb..47c0b403af687 100644
--- a/sklearn/decomposition/tests/test_pca.py
+++ b/sklearn/decomposition/tests/test_pca.py
@@ -354,17 +354,30 @@ def test_pca_validation():
     # (less than 0 or more than the lesser dimension of the input
     # matrix X) raise errors.
     X = np.array([[0, 1, 0], [1, 0, 0]])
+    minimum = 2 # The smallest dimension
+    lower_limit = {'randomized':1,'full':0,'auto':0}
     # We conduct the same test on X.T so that it is invariant to axis.
     for data in [X, X.T]:
         for solver in solver_list:
             for n_components in [-1, 3]:
-                assert_raises_regex(ValueError,
-                                    "n_components\=.* must be between .* and "
-                                    "min\(n_samples, n_features\)\=.* with "
-                                    "svd_solver\="
-                                    "\'(?:full|arpack|randomized|auto)\'$",
-                                    PCA(n_components, svd_solver=solver).fit,
-                                    data)
+                if solver == 'arpack':
+                    assert_raises_regex(ValueError,
+                                        "n_components={} must be stricly less "
+                                        "than min(n_samples, n_features)={} "
+                                        "with svd_solver='arpack'"
+                                        .format(n_components, minimum),
+                                        PCA(n_components, svd_solver=solver)
+                                        .fit, data)
+                else:
+                    assert_raises_regex(ValueError,
+                                        "n_components={} must be between {} "
+                                        "and min(n_samples, n_features)={} with "
+                                        "svd_solver='{}'"
+                                        .format(n_components,
+                                                lower_limit[solver], minimum,
+                                                solver),
+                                        PCA(n_components,
+                                            svd_solver=solver).fit, data)
 
 
 def test_n_components_none():

From f9af4d63ff3caacfeb8c315c43f3e6ee060f18ce Mon Sep 17 00:00:00 2001
From: Wally <wallygauze@gmail.com>
Date: Fri, 18 Aug 2017 21:15:15 +0100
Subject: [PATCH 17/23] flake8 corrections

---
 sklearn/decomposition/tests/test_pca.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/sklearn/decomposition/tests/test_pca.py b/sklearn/decomposition/tests/test_pca.py
index 47c0b403af687..7d6787b72ad49 100644
--- a/sklearn/decomposition/tests/test_pca.py
+++ b/sklearn/decomposition/tests/test_pca.py
@@ -354,8 +354,8 @@ def test_pca_validation():
     # (less than 0 or more than the lesser dimension of the input
     # matrix X) raise errors.
     X = np.array([[0, 1, 0], [1, 0, 0]])
-    minimum = 2 # The smallest dimension
-    lower_limit = {'randomized':1,'full':0,'auto':0}
+    minimum = 2  # The smallest dimension
+    lower_limit = {'randomized': 1, 'full': 0, 'auto': 0}
     # We conduct the same test on X.T so that it is invariant to axis.
     for data in [X, X.T]:
         for solver in solver_list:
@@ -371,8 +371,8 @@ def test_pca_validation():
                 else:
                     assert_raises_regex(ValueError,
                                         "n_components={} must be between {} "
-                                        "and min(n_samples, n_features)={} with "
-                                        "svd_solver='{}'"
+                                        "and min(n_samples, n_features)={} "
+                                        "with svd_solver='{}'"
                                         .format(n_components,
                                                 lower_limit[solver], minimum,
                                                 solver),

From fe7047fd853421d358034dfdcdba9690a2d06bc8 Mon Sep 17 00:00:00 2001
From: Wally <wallygauze@gmail.com>
Date: Sat, 19 Aug 2017 21:06:15 +0100
Subject: [PATCH 18/23] arpack case was still missing + fixed my test bug +
 more refactoring

---
 sklearn/decomposition/tests/test_pca.py | 60 ++++++++++++++-----------
 1 file changed, 35 insertions(+), 25 deletions(-)

diff --git a/sklearn/decomposition/tests/test_pca.py b/sklearn/decomposition/tests/test_pca.py
index 7d6787b72ad49..c94712d0b18cb 100644
--- a/sklearn/decomposition/tests/test_pca.py
+++ b/sklearn/decomposition/tests/test_pca.py
@@ -350,34 +350,44 @@ def test_pca_inverse():
 
 
 def test_pca_validation():
-    # Ensures that extreme inputs for n_components common to all solvers
-    # (less than 0 or more than the lesser dimension of the input
-    # matrix X) raise errors.
+    # Ensures that solver-specific extreme inputs for the n_components
+    # parameter raise errors
     X = np.array([[0, 1, 0], [1, 0, 0]])
-    minimum = 2  # The smallest dimension
-    lower_limit = {'randomized': 1, 'full': 0, 'auto': 0}
-    # We conduct the same test on X.T so that it is invariant to axis.
-    for data in [X, X.T]:
-        for solver in solver_list:
+    smallest_d = 2  # The smallest dimension
+    lower_limit = {'randomized': 1, 'arpack': 1, 'full': 0, 'auto': 0}
+
+    for solver in solver_list:
+        # We conduct the same test on X.T so that it is invariant to axis.
+        for data in [X, X.T]:
             for n_components in [-1, 3]:
-                if solver == 'arpack':
-                    assert_raises_regex(ValueError,
-                                        "n_components={} must be stricly less "
-                                        "than min(n_samples, n_features)={} "
-                                        "with svd_solver='arpack'"
-                                        .format(n_components, minimum),
-                                        PCA(n_components, svd_solver=solver)
-                                        .fit, data)
+
+                if solver == 'auto':
+                    solver_reported = 'full'
                 else:
-                    assert_raises_regex(ValueError,
-                                        "n_components={} must be between {} "
-                                        "and min(n_samples, n_features)={} "
-                                        "with svd_solver='{}'"
-                                        .format(n_components,
-                                                lower_limit[solver], minimum,
-                                                solver),
-                                        PCA(n_components,
-                                            svd_solver=solver).fit, data)
+                    solver_reported = solver
+
+                assert_raises_regex(ValueError,
+                                    "n_components={} must be between "
+                                    "{} and min\(n_samples, n_features\)="
+                                    "{} with svd_solver=\'{}\'"
+                                    .format(n_components,
+                                            lower_limit[solver],
+                                            smallest_d,
+                                            solver_reported),
+                                    PCA(n_components,
+                                        svd_solver=solver).fit, data)
+            if solver == 'arpack':
+
+                n_components = smallest_d
+
+                assert_raises_regex(ValueError,
+                                    "n_components={} must be "
+                                    "strictly less than "
+                                    "min\(n_samples, n_features\)={}"
+                                    " with svd_solver=\'arpack\'"
+                                    .format(n_components, smallest_d),
+                                    PCA(n_components, svd_solver=solver)
+                                    .fit, data)
 
 
 def test_n_components_none():

From 1e7cd1047201c52ee7694729d9887ad9e190a578 Mon Sep 17 00:00:00 2001
From: Wally <wallygauze@gmail.com>
Date: Sat, 19 Aug 2017 21:06:40 +0100
Subject: [PATCH 19/23] corrected typo

---
 sklearn/decomposition/pca.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/decomposition/pca.py b/sklearn/decomposition/pca.py
index 1178f8e540552..9cd108710ad3f 100644
--- a/sklearn/decomposition/pca.py
+++ b/sklearn/decomposition/pca.py
@@ -475,7 +475,7 @@ def _fit_truncated(self, X, n_components, svd_solver):
                                 svd_solver))
         elif svd_solver == 'arpack' and n_components == min(n_samples,
                                                             n_features):
-            raise ValueError("n_components=%r must be stricly less than "
+            raise ValueError("n_components=%r must be strictly less than "
                              "min(n_samples, n_features)=%r with "
                              "svd_solver='%s'"
                              % (n_components, min(n_samples, n_features),

From a52851294954ffac94e474583d95e4bddcf3417b Mon Sep 17 00:00:00 2001
From: Wally <wallygauze@gmail.com>
Date: Sun, 20 Aug 2017 13:13:38 +0100
Subject: [PATCH 20/23] allow type long?

---
 sklearn/decomposition/tests/test_pca.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/sklearn/decomposition/tests/test_pca.py b/sklearn/decomposition/tests/test_pca.py
index c94712d0b18cb..6c0ccb2a1d813 100644
--- a/sklearn/decomposition/tests/test_pca.py
+++ b/sklearn/decomposition/tests/test_pca.py
@@ -353,7 +353,7 @@ def test_pca_validation():
     # Ensures that solver-specific extreme inputs for the n_components
     # parameter raise errors
     X = np.array([[0, 1, 0], [1, 0, 0]])
-    smallest_d = 2  # The smallest dimension
+    smallest_d = int(2)  # The smallest dimension
     lower_limit = {'randomized': 1, 'arpack': 1, 'full': 0, 'auto': 0}
 
     for solver in solver_list:
@@ -367,9 +367,9 @@ def test_pca_validation():
                     solver_reported = solver
 
                 assert_raises_regex(ValueError,
-                                    "n_components={} must be between "
-                                    "{} and min\(n_samples, n_features\)="
-                                    "{} with svd_solver=\'{}\'"
+                                    "n_components={}L? must be between "
+                                    "{}L? and min\(n_samples, n_features\)="
+                                    "{}L? with svd_solver=\'{}\'"
                                     .format(n_components,
                                             lower_limit[solver],
                                             smallest_d,
@@ -381,9 +381,9 @@ def test_pca_validation():
                 n_components = smallest_d
 
                 assert_raises_regex(ValueError,
-                                    "n_components={} must be "
+                                    "n_components={}L? must be "
                                     "strictly less than "
-                                    "min\(n_samples, n_features\)={}"
+                                    "min\(n_samples, n_features\)={}L?"
                                     " with svd_solver=\'arpack\'"
                                     .format(n_components, smallest_d),
                                     PCA(n_components, svd_solver=solver)

From f25bd9ccd35e2bc0fe124848a90cee2894df004f Mon Sep 17 00:00:00 2001
From: Wally <wallygauze@gmail.com>
Date: Tue, 22 Aug 2017 15:44:49 +0100
Subject: [PATCH 21/23] accidentally left useless piece of code

---
 sklearn/decomposition/tests/test_pca.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/decomposition/tests/test_pca.py b/sklearn/decomposition/tests/test_pca.py
index 6c0ccb2a1d813..aa67189407296 100644
--- a/sklearn/decomposition/tests/test_pca.py
+++ b/sklearn/decomposition/tests/test_pca.py
@@ -353,7 +353,7 @@ def test_pca_validation():
     # Ensures that solver-specific extreme inputs for the n_components
     # parameter raise errors
     X = np.array([[0, 1, 0], [1, 0, 0]])
-    smallest_d = int(2)  # The smallest dimension
+    smallest_d = 2  # The smallest dimension
     lower_limit = {'randomized': 1, 'arpack': 1, 'full': 0, 'auto': 0}
 
     for solver in solver_list:

From bd1f151fe5778b55ef7654d756c5951d07905d79 Mon Sep 17 00:00:00 2001
From: Wally <wallygauze@gmail.com>
Date: Fri, 8 Sep 2017 20:51:29 +0100
Subject: [PATCH 22/23] reverted changes in doc/whats_new.rst

---
 doc/whats_new.rst | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/doc/whats_new.rst b/doc/whats_new.rst
index fe8e0e007f932..5de27d3251787 100644
--- a/doc/whats_new.rst
+++ b/doc/whats_new.rst
@@ -65,7 +65,7 @@ Bug fixes
 
 Decomposition, manifold learning and clustering
 
-- Fix for uninformative error in :class:`decomposition.IncrementalPCA`:
+- Fix for uninformative error in :class:`decomposition.incremental_pca`:
   now an error is raised if the number of components is larger than the
   chosen batch size. The ``n_components=None`` case was adapted accordingly.
   :issue:`6452`. By :user:`Wally Gauze <wallygauze>`.
@@ -82,11 +82,6 @@ Decomposition, manifold learning and clustering
   where all samples had equal similarity.
   :issue:`9612`. By :user:`Jonatan Samoocha <jsamoocha>`.
 
-- In :class:`decomposition.PCA` selecting a n_components parameter greater than
-  the number of samples now raises an error.
-  Similarly, the ``n_components=None`` case now selects the minimum of
-  n_samples and n_features. :issue:`8484`. By :user:`Wally Gauze <wallygauze>`.
-
 Version 0.19
 ============
 

From e3ecd12b45b905ed7973ea743308e55f41812d34 Mon Sep 17 00:00:00 2001
From: Wally <wallygauze@gmail.com>
Date: Fri, 8 Sep 2017 20:57:56 +0100
Subject: [PATCH 23/23] added entry in whats_new/v0.20.rst

---
 doc/whats_new/v0.20.rst | 102 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 102 insertions(+)
 create mode 100644 doc/whats_new/v0.20.rst

diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst
new file mode 100644
index 0000000000000..4f5e13e7860a5
--- /dev/null
+++ b/doc/whats_new/v0.20.rst
@@ -0,0 +1,102 @@
+.. include:: _contributors.rst
+
+.. currentmodule:: sklearn
+
+.. _changes_0_20:
+
+Version 0.20 (under development)
+================================
+
+Changed models
+--------------
+
+The following estimators and functions, when fit with the same data and
+parameters, may produce different models from the previous version. This often
+occurs due to changes in the modelling logic (bug fixes or enhancements), or in
+random sampling procedures.
+
+- :class:`decomposition.IncrementalPCA` in Python 2 (bug fix)
+
+Details are listed in the changelog below.
+
+(While we are trying to better inform users by providing this information, we
+cannot assure that this list is complete.)
+
+Changelog
+---------
+
+New features
+............
+
+Classifiers and regressors
+
+- :class:`ensemble.GradientBoostingClassifier` and
+  :class:`ensemble.GradientBoostingRegressor` now support early stopping
+  via ``n_iter_no_change``, ``validation_fraction`` and ``tol``. :issue:`7071`
+  by `Raghav RV`_
+
+- Added :class:`naive_bayes.ComplementNB`, which implements the Complement
+  Naive Bayes classifier described in Rennie et al. (2003).
+  By :user:`Michael A. Alcorn <airalcorn2>`.
+
+Enhancements
+............
+
+Classifiers and regressors
+
+- In :class:`gaussian_process.GaussianProcessRegressor`, method ``predict``
+  is faster when using ``return_std=True`` in particular more when called
+  several times in a row. :issue:`9234` by :user:`andrewww <andrewww>`
+  and :user:`Minghui Liu <minghui-liu>`.
+
+- Add `named_estimators_` parameter in
+  :class:`sklearn.ensemble.voting_classifier` to access fitted
+  estimators. :issue:`9157` by :user:`Herilalaina Rakotoarison <herilalaina>`.
+
+
+Model evaluation and meta-estimators
+
+- A scorer based on :func:`metrics.brier_score_loss` is also available.
+  :issue:`9521` by :user:`Hanmin Qin <qinhanmin2014>`.
+
+Linear, kernelized and related models
+
+- Deprecate ``random_state`` parameter in :class:`svm.OneClassSVM` as the
+  underlying implementation is not random.
+  :issue:`9497` by :user:`Albert Thomas <albertcthomas>`.
+
+Bug fixes
+.........
+
+Decomposition, manifold learning and clustering
+
+- Fix for uninformative error in :class:`decomposition.IncrementalPCA`:
+  now an error is raised if the number of components is larger than the
+  chosen batch size. The ``n_components=None`` case was adapted accordingly.
+  :issue:`6452`. By :user:`Wally Gauze <wallygauze>`.
+
+- Fixed a bug where the ``partial_fit`` method of
+  :class:`decomposition.IncrementalPCA` used integer division instead of float
+  division on Python 2 versions. :issue:`9492` by
+  :user:`James Bourbeau <jrbourbeau>`.
+
+- Fixed a bug where the ``fit`` method of
+  :class:`cluster.affinity_propagation_.AffinityPropagation` stored cluster
+  centers as 3d array instead of 2d array in case of non-convergence. For the
+  same class, fixed undefined and arbitrary behavior in case of training data
+  where all samples had equal similarity.
+  :issue:`9612`. By :user:`Jonatan Samoocha <jsamoocha>`.
+
+- In :class:`decomposition.PCA` selecting a n_components parameter greater than
+  the number of samples now raises an error.
+  Similarly, the ``n_components=None`` case now selects the minimum of
+  n_samples and n_features. :issue:`8484`. By :user:`Wally Gauze <wallygauze>`.
+
+API changes summary
+-------------------
+
+Linear, kernelized and related models
+
+- Deprecate ``random_state`` parameter in :class:`svm.OneClassSVM` as the
+  underlying implementation is not random.
+  :issue:`9497` by :user:`Albert Thomas <albertcthomas>`.