scikit-learn · glemaitre · Jan 11, 2024 · Jul 29, 2023 · Jul 31, 2023 · Jul 31, 2023
diff --git a/doc/conf.py b/doc/conf.py
@@ -303,6 +303,9 @@
     "auto_examples/ensemble/plot_adaboost_hastie_10_2": (
         "auto_examples/ensemble/plot_adaboost_multiclass"
     ),
+    "auto_examples/decomposition/plot_pca_3d": (
+        "auto_examples/decomposition/plot_pca_iris"
+    ),
 }
 html_context["redirects"] = redirects
 for old_link in redirects:

diff --git a/doc/modules/decomposition.rst b/doc/modules/decomposition.rst
@@ -53,6 +53,7 @@ data based on the amount of variance it explains. As such it implements a
 
 .. topic:: Examples:
 
+    * :ref:`sphx_glr_auto_examples_decomposition_plot_pca_iris.py`
     * :ref:`sphx_glr_auto_examples_decomposition_plot_pca_vs_lda.py`
     * :ref:`sphx_glr_auto_examples_decomposition_plot_pca_vs_fa_model_selection.py`
 

diff --git a/doc/tutorial/statistical_inference/unsupervised_learning.rst b/doc/tutorial/statistical_inference/unsupervised_learning.rst
@@ -204,51 +204,57 @@ Decompositions: from a signal to components and loadings
 Principal component analysis: PCA
 -----------------------------------
 
-:ref:`PCA` selects the successive components that
-explain the maximum variance in the signal.
+:ref:`PCA` selects the successive components that explain the maximum variance in the
+signal. Let's create a synthetic 3-dimensional dataset.
 
-.. |pca_3d_axis| image:: /auto_examples/decomposition/images/sphx_glr_plot_pca_3d_001.png
-   :target: ../../auto_examples/decomposition/plot_pca_3d.html
-   :scale: 70
-
-.. |pca_3d_aligned| image:: /auto_examples/decomposition/images/sphx_glr_plot_pca_3d_002.png
-   :target: ../../auto_examples/decomposition/plot_pca_3d.html
-   :scale: 70
+.. np.random.seed(0)
 
-.. rst-class:: centered
+::
 
-   |pca_3d_axis| |pca_3d_aligned|
+    >>> # Create a signal with only 2 useful dimensions
+    >>> x1 = np.random.normal(size=(100, 1))
+    >>> x2 = np.random.normal(size=(100, 1))
+    >>> x3 = x1 + x2
+    >>> X = np.concatenate([x1, x2, x3], axis=1)
 
 The point cloud spanned by the observations above is very flat in one
-direction: one of the three univariate features can almost be exactly
-computed using the other two. PCA finds the directions in which the data is
-not *flat*
+direction: one of the three univariate features (i.e. z-axis) can almost be exactly
+computed using the other two.
 
-When used to *transform* data, PCA can reduce the dimensionality of the
-data by projecting on a principal subspace.
+.. plot::
+   :context: close-figs
+   :align: center
 
-.. np.random.seed(0)
+   >>> import matplotlib.pyplot as plt
+   >>> fig = plt.figure()
+   >>> ax = fig.add_subplot(111, projection='3d')
+   >>> ax.scatter(X[:, 0], X[:, 1], X[:, 2])
+   <...>
+   >>> _ = ax.set(xlabel="x", ylabel="y", zlabel="z")
+
+
+PCA finds the directions in which the data is not *flat*.
 
 ::
 
-    >>> # Create a signal with only 2 useful dimensions
-    >>> x1 = np.random.normal(size=100)
-    >>> x2 = np.random.normal(size=100)
-    >>> x3 = x1 + x2
-    >>> X = np.c_[x1, x2, x3]
-
-    >>> from sklearn import decomposition
-    >>> pca = decomposition.PCA()
-    >>> pca.fit(X)
-    PCA()
-    >>> print(pca.explained_variance_)  # doctest: +SKIP
-    [  2.18565811e+00   1.19346747e+00   8.43026679e-32]
-
-    >>> # As we can see, only the 2 first components are useful
-    >>> pca.n_components = 2
-    >>> X_reduced = pca.fit_transform(X)
-    >>> X_reduced.shape
-    (100, 2)
+   >>> from sklearn import decomposition
+   >>> pca = decomposition.PCA()
+   >>> pca.fit(X)
+   PCA()
+   >>> print(pca.explained_variance_)  # doctest: +SKIP
+   [  2.18565811e+00   1.19346747e+00   8.43026679e-32]
+
+Looking at the explained variance, we see that only the first two components
+are useful. PCA can be used to reduce dimensionality while preserving
+most of the information. It will project the data on the principal subspace.
+
+::
+
+   >>> pca.set_params(n_components=2)
+   PCA(n_components=2)
+   >>> X_reduced = pca.fit_transform(X)
+   >>> X_reduced.shape
+   (100, 2)
 
 .. Eigenfaces here?
 

diff --git a/examples/decomposition/plot_pca_3d.py b/examples/decomposition/plot_pca_3d.py
diff --git a/sklearn/decomposition/_incremental_pca.py b/sklearn/decomposition/_incremental_pca.py
@@ -39,6 +39,9 @@ class IncrementalPCA(_BasePCA):
     computations to get the principal components, versus 1 large SVD of
     complexity ``O(n_samples * n_features ** 2)`` for PCA.
 
+    For a usage example, see
+    :ref:`sphx_glr_auto_examples_decomposition_plot_incremental_pca.py`.
+
     Read more in the :ref:`User Guide <IncrementalPCA>`.
 
     .. versionadded:: 0.16

diff --git a/sklearn/decomposition/_kernel_pca.py b/sklearn/decomposition/_kernel_pca.py
@@ -41,6 +41,9 @@ class KernelPCA(ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator
     components to extract. It can also use a randomized truncated SVD by the
     method proposed in [3]_, see `eigen_solver`.
 
+    For a usage example, see
+    :ref:`sphx_glr_auto_examples_decomposition_plot_kernel_pca.py`.
+
     Read more in the :ref:`User Guide <kernel_PCA>`.
 
     Parameters

diff --git a/sklearn/decomposition/_pca.py b/sklearn/decomposition/_pca.py
@@ -136,6 +136,9 @@ class PCA(_BasePCA):
     Notice that this class does not support sparse input. See
     :class:`TruncatedSVD` for an alternative with sparse data.
 
+    For a usage example, see
+    :ref:`sphx_glr_auto_examples_decomposition_plot_pca_iris.py`
+
     Read more in the :ref:`User Guide <PCA>`.
 
     Parameters

diff --git a/sklearn/decomposition/_sparse_pca.py b/sklearn/decomposition/_sparse_pca.py
@@ -342,6 +342,9 @@ class MiniBatchSparsePCA(_BaseSparsePCA):
     the data.  The amount of sparseness is controllable by the coefficient
     of the L1 penalty, given by the parameter alpha.
 
+    For an example comparing sparse PCA to PCA, see
+    :ref:`sphx_glr_auto_examples_decomposition_plot_faces_decomposition.py`
+
     Read more in the :ref:`User Guide <SparsePCA>`.
 
     Parameters