scikit-learn · jnothman · Aug 14, 2017 · Jul 8, 2017 · Jul 8, 2017 · Jul 8, 2017
diff --git a/doc/whats_new.rst b/doc/whats_new.rst
@@ -41,12 +41,16 @@ Bug fixes
 
 Decomposition, manifold learning and clustering
 
+- Fix for uninformative error in :class:`decomposition.incremental_pca`:
+  now an error is raised if the number of components is larger than the
+  chosen batch size. The ``n_components=None`` case was adapted accordingly.
+  :issue:`6452`. By :user:`Wally Gauze <wallygauze>`.
+
 - Fixed a bug where the ``partial_fit`` method of
   :class:`decomposition.IncrementalPCA` used integer division instead of float
   division on Python 2 versions. :issue:`9492` by
   :user:`James Bourbeau <jrbourbeau>`.
 
-
 Version 0.19
 ============
 

diff --git a/sklearn/decomposition/incremental_pca.py b/sklearn/decomposition/incremental_pca.py
@@ -211,11 +211,18 @@ def partial_fit(self, X, y=None, check_input=True):
             self.components_ = None
 
         if self.n_components is None:
-            self.n_components_ = n_features
+            if self.components_ is None:
+                self.n_components_ = min(n_samples, n_features)
+            else:
+                self.n_components_ = self.components_.shape[0]
         elif not 1 <= self.n_components <= n_features:
             raise ValueError("n_components=%r invalid for n_features=%d, need "
                              "more rows than columns for IncrementalPCA "
                              "processing" % (self.n_components, n_features))
+        elif not self.n_components <= n_samples:
+            raise ValueError("n_components=%r must be less or equal to "
+                             "the batch number of samples "
+                             "%d." % (self.n_components, n_samples))
         else:
             self.n_components_ = self.n_components
 

diff --git a/sklearn/decomposition/tests/test_incremental_pca.py b/sklearn/decomposition/tests/test_incremental_pca.py
@@ -4,6 +4,7 @@
 from sklearn.utils.testing import assert_almost_equal
 from sklearn.utils.testing import assert_array_almost_equal
 from sklearn.utils.testing import assert_raises
+from sklearn.utils.testing import assert_raises_regex
 
 from sklearn import datasets
 from sklearn.decomposition import PCA, IncrementalPCA
@@ -73,10 +74,41 @@ def test_incremental_pca_inverse():
 
 def test_incremental_pca_validation():
     # Test that n_components is >=1 and <= n_features.
-    X = [[0, 1], [1, 0]]
-    for n_components in [-1, 0, .99, 3]:
-        assert_raises(ValueError, IncrementalPCA(n_components,
-                                                 batch_size=10).fit, X)
+    X = np.array([[0, 1, 0], [1, 0, 0]])
+    n_samples, n_features = X.shape
+    for n_components in [-1, 0, .99, 4]:
+        assert_raises_regex(ValueError,
+                            "n_components={} invalid for n_features={}, need"
+                            " more rows than columns for IncrementalPCA "
+                            "processing".format(n_components, n_features),
+                            IncrementalPCA(n_components, batch_size=10).fit, X)
+
+    # Tests that n_components is also <= n_samples.
+    n_components = 3
+    assert_raises_regex(ValueError,
+                        "n_components={} must be less or equal to "
+                        "the batch number of samples {}".format(
+                            n_components, n_samples),
+                        IncrementalPCA(
+                            n_components=n_components).partial_fit, X)
+
+
+def test_n_components_none():
+    # Ensures that n_components == None is handled correctly
+    rng = np.random.RandomState(1999)
+    for n_samples, n_features in [(50, 10), (10, 50)]:
+        X = rng.rand(n_samples, n_features)
+        ipca = IncrementalPCA(n_components=None)
+
+        # First partial_fit call, ipca.n_components_ is inferred from
+        # min(X.shape)
+        ipca.partial_fit(X)
+        assert ipca.n_components_ == min(X.shape)
+
+        # Second partial_fit call, ipca.n_components_ is inferred from
+        # ipca.components_ computed from the first partial_fit call
+        ipca.partial_fit(X)
+        assert ipca.n_components_ == ipca.components_.shape[0]
 
 
 def test_incremental_pca_set_params():