scikit-learn · adrinjalali · Nov 7, 2024 · Nov 4, 2024 · Nov 4, 2024 · Nov 5, 2024
diff --git a/doc/whats_new/upcoming_changes/sklearn.decomposition/30224.fix.rst b/doc/whats_new/upcoming_changes/sklearn.decomposition/30224.fix.rst
@@ -0,0 +1,6 @@
+- :class:`~sklearn.decomposition.IncrementalPCA`
+  will now only raise a ``ValueError`` when the number of samples in the
+  input data to ``partial_fit`` is less than the number of components
+  on the first call to ``partial_fit``. Subsequent calls to ``partial_fit``
+  no longer face this restriction.
+  By :user:`Thomas Gessey-Jones <ThomasGesseyJonesPX>`
diff --git a/sklearn/decomposition/_incremental_pca.py b/sklearn/decomposition/_incremental_pca.py
@@ -306,11 +306,11 @@ def partial_fit(self, X, y=None, check_input=True):
                 "more rows than columns for IncrementalPCA "
                 "processing" % (self.n_components, n_features)
             )
-        elif not self.n_components <= n_samples:
+        elif self.n_components > n_samples and first_pass:
             raise ValueError(
-                "n_components=%r must be less or equal to "
-                "the batch number of samples "
-                "%d." % (self.n_components, n_samples)
+                f"n_components={self.n_components} must be less or equal to "
+                f"the batch number of samples {n_samples} for the first "
+                "partial_fit call."
             )
         else:
             self.n_components_ = self.n_components

diff --git a/sklearn/decomposition/tests/test_incremental_pca.py b/sklearn/decomposition/tests/test_incremental_pca.py
@@ -139,14 +139,13 @@ def test_incremental_pca_validation():
     ):
         IncrementalPCA(n_components, batch_size=10).fit(X)
 
-    # Tests that n_components is also <= n_samples.
+    # Test that n_components is also <= n_samples in first call to partial fit.
     n_components = 3
     with pytest.raises(
         ValueError,
         match=(
-            "n_components={} must be"
-            " less or equal to the batch number of"
-            " samples {}".format(n_components, n_samples)
+            f"n_components={n_components} must be less or equal to the batch "
+            f"number of samples {n_samples} for the first partial_fit call."
         ),
     ):
         IncrementalPCA(n_components=n_components).partial_fit(X)
@@ -233,6 +232,27 @@ def test_incremental_pca_batch_signs():
         assert_almost_equal(np.sign(i), np.sign(j), decimal=6)
 
 
+def test_incremental_pca_partial_fit_small_batch():
+    # Test that there is no minimum batch size after the first partial_fit
+    # Non-regression test
+    rng = np.random.RandomState(1999)
+    n, p = 50, 3
+    X = rng.randn(n, p)  # spherical data
+    X[:, 1] *= 0.00001  # make middle component relatively small
+    X += [5, 4, 3]  # make a large mean
+
+    n_components = p
+    pipca = IncrementalPCA(n_components=n_components)
+    pipca.partial_fit(X[:n_components])
+    for idx in range(n_components, n):
+        pipca.partial_fit(X[idx : idx + 1])
+
+    pca = PCA(n_components=n_components)
+    pca.fit(X)
+
+    assert_allclose(pca.components_, pipca.components_, atol=1e-3)
+
+
 def test_incremental_pca_batch_values():
     # Test that components_ values are stable over batch sizes.
     rng = np.random.RandomState(1999)