From e8c13404f1221fd0b4c6260dd1a9075fc5c509fd Mon Sep 17 00:00:00 2001
From: Chiara Marmo <chiara.marmo@u-psud.fr>
Date: Wed, 26 Aug 2020 14:04:30 +0200
Subject: [PATCH 01/14] FIx supervised learning.

---
 .../scikit-learn-modern/static/css/theme.css  |  30 ++-
 .../supervised_learning.rst                   | 236 ++++++++++++------
 2 files changed, 177 insertions(+), 89 deletions(-)

diff --git a/doc/themes/scikit-learn-modern/static/css/theme.css b/doc/themes/scikit-learn-modern/static/css/theme.css
index db2acbc3a11bb..e651205bac18a 100644
--- a/doc/themes/scikit-learn-modern/static/css/theme.css
+++ b/doc/themes/scikit-learn-modern/static/css/theme.css
@@ -76,6 +76,7 @@ a code {
 
 img {
    max-width: 100%;
+   border-radius: 0.25rem;
 }
 
 span.highlighted {
@@ -511,10 +512,6 @@ div.sk-sidebar-toc-logo {
   height: 52px;
 }
 
-.sk-toc-active {
-  font-weight: bold;
-}
-
 div.sk-sidebar-toc-wrapper {
   font-size: 0.9rem;
   width: 252px;
@@ -553,6 +550,7 @@ div.sk-sidebar-toc ul ul {
 }
 
 div.sk-sidebar-toc ul ul ul {
+  list-style: square;
   margin-left: 1rem;
 }
 
@@ -839,10 +837,6 @@ div.highlight:hover span.copybutton:hover {
     background-color: #20252B;
 }
 
-div.body img.align-center {
-  max-width: 800px;
-}
-
 div.body img {
     max-width: 100%;
     height: unset!important; /* Needed because sphinx sets the height */
@@ -1213,16 +1207,34 @@ div.sk-sponsor-div, div.sk-testimonial-div {
   align-items: center;
 }
 
-div.sk-sponsor-div-box, div.sk-testimonial-div-box {
+div.sk-sponsor-div-box, div.sk-testimonial-div-box,
+div.sk-doc-div-box {
   width: 100%;
 }
 
+div.sk-doc-div {
+  display: flex;
+  flex-wrap: wrap;
+  justify-content: center;
+}
+
+div.sk-doc-div-box {
+  padding: 0.30rem;
+  overflow: auto;
+}
+
 @media screen and (min-width: 500px) {
   div.sk-sponsor-div-box, div.sk-testimonial-div-box {
     width: 50%;
   }
 }
 
+@media screen and (min-width: 1200px) {
+  div.sk-doc-div-box {
+    width: 50%;
+  }
+}
+
 table.sk-sponsor-table tr, table.sk-sponsor-table tr:nth-child(odd) {
   border-style: none;
   background-color: white;
diff --git a/doc/tutorial/statistical_inference/supervised_learning.rst b/doc/tutorial/statistical_inference/supervised_learning.rst
index 18a7f1336da11..8ca50f7b167ed 100644
--- a/doc/tutorial/statistical_inference/supervised_learning.rst
+++ b/doc/tutorial/statistical_inference/supervised_learning.rst
@@ -38,10 +38,10 @@ Nearest neighbor and the curse of dimensionality
 
 .. topic:: Classifying irises:
 
-    .. image:: /auto_examples/datasets/images/sphx_glr_plot_iris_dataset_001.png
-        :target: ../../auto_examples/datasets/plot_iris_dataset.html
-        :align: right
-	:scale: 65
+    .. raw :: html
+
+       <div class="sk-doc-div">
+         <div class="sk-doc-div-box">
 
     The iris dataset is a classification task consisting in identifying 3
     different types of irises (Setosa, Versicolour, and Virginica) from
@@ -49,10 +49,28 @@ Nearest neighbor and the curse of dimensionality
 
         >>> import numpy as np
         >>> from sklearn import datasets
-        >>> iris_X, iris_y = datasets.load_iris(return_X_y=True)
+        >>> iris_X, iris_y = datasets.load_iris(
+        ...     return_X_y=True
+        ... )
         >>> np.unique(iris_y)
         array([0, 1, 2])
 
+    .. raw :: html
+
+         </div>
+         <div class="sk-doc-div-box">
+
+    .. image:: /auto_examples/datasets/images/sphx_glr_plot_iris_dataset_001.png
+        :target: ../../auto_examples/datasets/plot_iris_dataset.html
+        :align: center
+	:scale: 50
+
+    .. raw :: html
+
+         </div>
+       </div>
+
+
 k-Nearest neighbors classifier
 -------------------------------
 
@@ -155,10 +173,10 @@ in its simplest form, fits a linear model to the data set by adjusting
 a set of parameters in order to make the sum of the squared residuals
 of the model as small as possible.
 
-.. image:: /auto_examples/linear_model/images/sphx_glr_plot_ols_001.png
-   :target: ../../auto_examples/linear_model/plot_ols.html
-   :scale: 40
-   :align: right
+.. raw :: html
+
+   <div class="sk-doc-div">
+     <div class="sk-doc-div-box">
 
 Linear models: :math:`y = X\beta + \epsilon`
 
@@ -167,13 +185,28 @@ Linear models: :math:`y = X\beta + \epsilon`
  * :math:`\beta`: Coefficients
  * :math:`\epsilon`: Observation noise
 
+.. raw :: html
+
+     </div>
+     <div class="sk-doc-div-box">
+
+.. image:: /auto_examples/linear_model/images/sphx_glr_plot_ols_001.png
+   :target: ../../auto_examples/linear_model/plot_ols.html
+   :scale: 50
+   :align: center
+
+.. raw :: html
+
+     </div>
+   </div>
+
 ::
 
     >>> from sklearn import linear_model
     >>> regr = linear_model.LinearRegression()
     >>> regr.fit(diabetes_X_train, diabetes_y_train)
     LinearRegression()
-    >>> print(regr.coef_)
+    >>> print(regr.coef_)  # doctest: +SKIP
     [   0.30349955 -237.63931533  510.53060544  327.73698041 -814.13170937
       492.81458798  102.84845219  184.60648906  743.51961675   76.09517222]
 
@@ -197,10 +230,10 @@ Shrinkage
 If there are few data points per dimension, noise in the observations
 induces high variance:
 
-.. image:: /auto_examples/linear_model/images/sphx_glr_plot_ols_ridge_variance_001.png
-   :target: ../../auto_examples/linear_model/plot_ols_ridge_variance.html
-   :scale: 70
-   :align: right
+.. raw :: html
+
+   <div class="sk-doc-div">
+     <div class="sk-doc-div-box">
 
 ::
 
@@ -219,6 +252,19 @@ induces high variance:
     ...     plt.plot(test, regr.predict(test)) # doctest: +SKIP
     ...     plt.scatter(this_X, y, s=3)  # doctest: +SKIP
 
+.. raw :: html
+
+     </div>
+     <div class="sk-doc-div-box">
+
+.. image:: /auto_examples/linear_model/images/sphx_glr_plot_ols_ridge_variance_001.png
+   :target: ../../auto_examples/linear_model/plot_ols_ridge_variance.html
+   :align: center
+
+.. raw :: html
+
+     </div>
+    </div>
 
 
 A solution in high-dimensional statistical learning is to *shrink* the
@@ -226,10 +272,11 @@ regression coefficients to zero: any two randomly chosen set of
 observations are likely to be uncorrelated. This is called :class:`Ridge`
 regression:
 
-.. image:: /auto_examples/linear_model/images/sphx_glr_plot_ols_ridge_variance_002.png
-   :target: ../../auto_examples/linear_model/plot_ols_ridge_variance.html
-   :scale: 70
-   :align: right
+.. raw :: html
+
+   <div class="sk-doc-div">
+     <div class="sk-doc-div-box">
+
 
 ::
 
@@ -244,6 +291,21 @@ regression:
     ...     plt.plot(test, regr.predict(test)) # doctest: +SKIP
     ...     plt.scatter(this_X, y, s=3) # doctest: +SKIP
 
+.. raw :: html
+
+     </div>
+     <div class="sk-doc-div-box">
+
+.. image:: /auto_examples/linear_model/images/sphx_glr_plot_ols_ridge_variance_002.png
+   :target: ../../auto_examples/linear_model/plot_ols_ridge_variance.html
+   :align: center
+
+.. raw :: html
+
+     </div>
+   </div>
+
+
 This is an example of **bias/variance tradeoff**: the larger the ridge
 ``alpha`` parameter, the higher the bias and the lower the variance.
 
@@ -327,8 +389,8 @@ application of Occam's razor: *prefer simpler models*.
     >>> regr.fit(diabetes_X_train, diabetes_y_train)
     Lasso(alpha=0.025118864315095794)
     >>> print(regr.coef_)
-    [   0.         -212.43764548  517.19478111  313.77959962 -160.8303982    -0.
-     -187.19554705   69.38229038  508.66011217   71.84239008]
+    [   0.         -212.437...  517.194...  313.779... -160.830...
+       -0.         -187.195...   69.382...  508.660...   71.842...]
 
 .. topic:: **Different algorithms for the same problem**
 
@@ -346,10 +408,10 @@ application of Occam's razor: *prefer simpler models*.
 Classification
 ---------------
 
-.. image:: /auto_examples/linear_model/images/sphx_glr_plot_logistic_001.png
-   :target: ../../auto_examples/linear_model/plot_logistic.html
-   :scale: 65
-   :align: right
+.. raw :: html
+
+   <div class="sk-doc-div">
+     <div class="sk-doc-div-box">
 
 For classification, as in the labeling
 `iris <https://en.wikipedia.org/wiki/Iris_flower_data_set>`_ task, linear
@@ -357,6 +419,21 @@ regression is not the right approach as it will give too much weight to
 data far from the decision frontier. A linear approach is to fit a sigmoid
 function or **logistic** function:
 
+.. raw :: html
+
+     </div>
+     <div class="sk-doc-div-box">
+
+.. image:: /auto_examples/linear_model/images/sphx_glr_plot_logistic_001.png
+   :target: ../../auto_examples/linear_model/plot_logistic.html
+   :scale: 70
+   :align: center
+
+.. raw :: html
+
+     </div>
+   </div>
+
 .. math::
 
    y = \textrm{sigmoid}(X\beta - \textrm{offset}) + \epsilon =
@@ -373,6 +450,7 @@ This is known as :class:`LogisticRegression`.
 .. image:: /auto_examples/linear_model/images/sphx_glr_plot_iris_logistic_001.png
    :target: ../../auto_examples/linear_model/plot_iris_logistic.html
    :scale: 83
+   :align: center
 
 .. topic:: Multiclass classification
 
@@ -398,7 +476,7 @@ This is known as :class:`LogisticRegression`.
    .. literalinclude:: ../../auto_examples/exercises/plot_digits_classification_exercise.py
        :lines: 15-19
 
-   Solution: :download:`../../auto_examples/exercises/plot_digits_classification_exercise.py`
+   :download:`Solution <../../auto_examples/exercises/plot_digits_classification_exercise.py>`
 
 
 Support vector machines (SVMs)
@@ -420,19 +498,31 @@ the separating line (less regularization).
 
 .. |svm_margin_unreg| image:: /auto_examples/svm/images/sphx_glr_plot_svm_margin_001.png
    :target: ../../auto_examples/svm/plot_svm_margin.html
-   :scale: 70
 
 .. |svm_margin_reg| image:: /auto_examples/svm/images/sphx_glr_plot_svm_margin_002.png
    :target: ../../auto_examples/svm/plot_svm_margin.html
-   :scale: 70
 
-.. rst-class:: centered
+.. raw :: html
+
+   <div class="sk-doc-div">
+     <div class="sk-doc-div-box">
+      <h4>Unregularized SVM</h4>
+
+|svm_margin_unreg|
+
+.. raw :: html
+
+     </div>
+     <div class="sk-doc-div-box">
+      <h4>Regularized SVM (default)</h4>
+
+|svm_margin_reg|
+
+.. raw :: html
+
+     </div>
+    </div>
 
-    ============================= ==============================
-     **Unregularized SVM**         **Regularized SVM (default)**
-    ============================= ==============================
-    |svm_margin_unreg|  	  |svm_margin_reg|
-    ============================= ==============================
 
 .. topic:: Example:
 
@@ -459,7 +549,7 @@ classification --:class:`SVC` (Support Vector Classification).
 .. _using_kernels_tut:
 
 Using kernels
---------------
+-------------
 
 Classes are not always linearly separable in feature space. The solution is to
 build a decision function that is not linear but may be polynomial instead.
@@ -468,72 +558,58 @@ creating a decision energy by positioning *kernels* on observations:
 
 .. |svm_kernel_linear| image:: /auto_examples/svm/images/sphx_glr_plot_svm_kernels_001.png
    :target: ../../auto_examples/svm/plot_svm_kernels.html
-   :scale: 65
 
 .. |svm_kernel_poly| image:: /auto_examples/svm/images/sphx_glr_plot_svm_kernels_002.png
    :target: ../../auto_examples/svm/plot_svm_kernels.html
-   :scale: 65
 
-.. rst-class:: centered
-
-  .. list-table::
-
-     *
-
-       - **Linear kernel**
-
-       - **Polynomial kernel**
-
-
-
-     *
-
-       - |svm_kernel_linear|
-
-       - |svm_kernel_poly|
-
-
-
-     *
-
-       - ::
+.. |svm_kernel_rbf| image:: /auto_examples/svm/images/sphx_glr_plot_svm_kernels_003.png
+   :target: ../../auto_examples/svm/plot_svm_kernels.html
 
-            >>> svc = svm.SVC(kernel='linear')
+.. raw :: html
 
-       - ::
+   <div class="sk-doc-div">
+     <div class="sk-doc-div-box">
+      <h4>Linear kernel</h4>
 
-            >>> svc = svm.SVC(kernel='poly',
-            ...               degree=3)
-            >>> # degree: polynomial degree
+|svm_kernel_linear|
 
+::
 
+    >>> svc = svm.SVC(kernel='linear')
 
-.. |svm_kernel_rbf| image:: /auto_examples/svm/images/sphx_glr_plot_svm_kernels_003.png
-   :target: ../../auto_examples/svm/plot_svm_kernels.html
-   :scale: 65
 
-.. rst-class:: centered
+.. raw :: html
 
-  .. list-table::
+     </div>
+     <div class="sk-doc-div-box">
+      <h4>Polynomial kernel</h4>
 
-     *
+|svm_kernel_poly|
 
-       - **RBF kernel (Radial Basis Function)**
+::
 
+    >>> svc = svm.SVC(kernel='poly',
+    ...               degree=3)
+    >>> # degree: polynomial degree
 
-     *
+.. raw :: html
 
-       - |svm_kernel_rbf|
+     </div>
+     <div class="sk-doc-div-box">
+      <h4>RBF kernel (Radial Basis Function)</h4>
 
-     *
+|svm_kernel_rbf|
 
-       - ::
+::
 
-            >>> svc = svm.SVC(kernel='rbf')
-            >>> # gamma: inverse of size of
-            >>> # radial kernel
+    >>> svc = svm.SVC(kernel='rbf')
+    >>> # gamma: inverse of size of
+    >>> # radial kernel
 
+.. raw :: html
 
+     </div>
+    </div>
 
 .. topic:: **Interactive example**
 
@@ -543,7 +619,7 @@ creating a decision energy by positioning *kernels* on observations:
 
 .. image:: /auto_examples/datasets/images/sphx_glr_plot_iris_dataset_001.png
     :target: ../../auto_examples/datasets/plot_iris_dataset.html
-    :align: right
+    :align: center
     :scale: 70
 
 .. topic:: **Exercise**
@@ -562,4 +638,4 @@ creating a decision energy by positioning *kernels* on observations:
    .. literalinclude:: ../../auto_examples/exercises/plot_iris_exercise.py
        :lines: 18-23
 
-   Solution: :download:`../../auto_examples/exercises/plot_iris_exercise.py`
+   :download:`Solution <../../auto_examples/exercises/plot_iris_exercise.py>`

From 387870556e803b47ab2e37bc82d3aea52beb03ec Mon Sep 17 00:00:00 2001
From: Chiara Marmo <chiara.marmo@u-psud.fr>
Date: Wed, 26 Aug 2020 14:09:04 +0200
Subject: [PATCH 02/14] Fix settings and model selection.

---
 .../statistical_inference/model_selection.rst | 36 ++++++++++++-------
 .../statistical_inference/settings.rst        | 31 ++++++++++++----
 2 files changed, 48 insertions(+), 19 deletions(-)

diff --git a/doc/tutorial/statistical_inference/model_selection.rst b/doc/tutorial/statistical_inference/model_selection.rst
index 63af08b320752..0668b83167198 100644
--- a/doc/tutorial/statistical_inference/model_selection.rst
+++ b/doc/tutorial/statistical_inference/model_selection.rst
@@ -180,23 +180,35 @@ scoring method.
 .. currentmodule:: sklearn.svm
 
 .. topic:: **Exercise**
-   :class: green
 
-   On the digits dataset, plot the cross-validation score of a :class:`SVC`
-   estimator with an linear kernel as a function of parameter ``C`` (use a
-   logarithmic grid of points, from 1 to 10).
+    .. raw :: html
 
-   .. literalinclude:: ../../auto_examples/exercises/plot_cv_digits.py
-       :lines: 13-23
-    
-   .. image:: /auto_examples/exercises/images/sphx_glr_plot_cv_digits_001.png
+       <div class="sk-doc-div">
+         <div class="sk-doc-div-box">
+
+    On the digits dataset, plot the cross-validation score of a :class:`SVC`
+    estimator with an linear kernel as a function of parameter ``C`` (use a
+    logarithmic grid of points, from 1 to 10).
+
+        .. literalinclude:: ../../auto_examples/exercises/plot_cv_digits.py
+            :lines: 13-23
+
+    .. raw :: html
+
+         </div>
+           <div class="sk-doc-div-box">
+
+    .. image:: /auto_examples/exercises/images/sphx_glr_plot_cv_digits_001.png
         :target: ../../auto_examples/exercises/plot_cv_digits.html
         :align: center
         :scale: 90
 
-   **Solution:** :ref:`sphx_glr_auto_examples_exercises_plot_cv_digits.py`
+    .. raw :: html
 
+         </div>
+       </div>
 
+    **Solution:** :ref:`sphx_glr_auto_examples_exercises_plot_cv_digits.py`
 
 Grid-search and cross-validated estimators
 ============================================
@@ -227,9 +239,10 @@ estimator during the construction and exposes an estimator API::
     0.943...
 
 
-By default, the :class:`GridSearchCV` uses a 5-fold cross-validation. However,
+By default, the :class:`GridSearchCV` uses a 3-fold cross-validation. However,
 if it detects that a classifier is passed, rather than a regressor, it uses
-a stratified 5-fold.
+a stratified 3-fold. The default will change to a 5-fold cross-validation in
+version 0.22.
 
 .. topic:: Nested cross-validation
 
@@ -272,7 +285,6 @@ These estimators are called similarly to their counterparts, with 'CV'
 appended to their name.
 
 .. topic:: **Exercise**
-   :class: green
 
    On the diabetes dataset, find the optimal regularization parameter
    alpha.
diff --git a/doc/tutorial/statistical_inference/settings.rst b/doc/tutorial/statistical_inference/settings.rst
index 0ca4c69f48f2e..2934120c28eea 100644
--- a/doc/tutorial/statistical_inference/settings.rst
+++ b/doc/tutorial/statistical_inference/settings.rst
@@ -31,6 +31,11 @@ needs to be preprocessed in order to be used by scikit-learn.
 
 .. topic:: An example of reshaping data would be the digits dataset
 
+    .. raw :: html
+
+       <div class="sk-doc-div">
+         <div class="sk-doc-div-box">
+
     The digits dataset is made of 1797 8x8 images of hand-written
     digits ::
 
@@ -38,18 +43,30 @@ needs to be preprocessed in order to be used by scikit-learn.
         >>> digits.images.shape
         (1797, 8, 8)
         >>> import matplotlib.pyplot as plt #doctest: +SKIP
-        >>> plt.imshow(digits.images[-1], cmap=plt.cm.gray_r) #doctest: +SKIP
+        >>> plt.imshow(digits.images[-1],
+        ...            cmap=plt.cm.gray_r) #doctest: +SKIP
         <matplotlib.image.AxesImage object at ...>
     
-    .. image:: /auto_examples/datasets/images/sphx_glr_plot_digits_last_image_001.png
-        :target: ../../auto_examples/datasets/plot_digits_last_image.html
-        :align: left
-        :scale: 60
-    
     To use this dataset with scikit-learn, we transform each 8x8 image into a
     feature vector of length 64 ::
 
-        >>> data = digits.images.reshape((digits.images.shape[0], -1))
+        >>> data = digits.images.reshape(
+        ...     (digits.images.shape[0], -1)
+        ... )
+
+    .. raw :: html
+
+         </div>
+         <div class="sk-doc-div-box">
+
+    .. image:: /auto_examples/datasets/images/sphx_glr_plot_digits_last_image_001.png
+        :target: ../../auto_examples/datasets/plot_digits_last_image.html
+        :align: center
+
+    .. raw :: html
+
+         </div>
+       </div>
 
 
 Estimators objects

From f3d67aabd54f6d299de3a5e6c16d0e0819ca34ec Mon Sep 17 00:00:00 2001
From: Chiara Marmo <chiara.marmo@u-psud.fr>
Date: Wed, 26 Aug 2020 14:13:39 +0200
Subject: [PATCH 03/14] Fix putting together.

---
 .../putting_together.rst                      | 35 +++++++++++--------
 1 file changed, 20 insertions(+), 15 deletions(-)

diff --git a/doc/tutorial/statistical_inference/putting_together.rst b/doc/tutorial/statistical_inference/putting_together.rst
index 5106958d77e96..6b251317ff4bd 100644
--- a/doc/tutorial/statistical_inference/putting_together.rst
+++ b/doc/tutorial/statistical_inference/putting_together.rst
@@ -11,16 +11,13 @@ Pipelining
 We have seen that some estimators can transform data and that some estimators
 can predict variables. We can also create combined estimators:
 
-.. image:: ../../auto_examples/compose/images/sphx_glr_plot_digits_pipe_001.png
-   :target: ../../auto_examples/compose/plot_digits_pipe.html
-   :scale: 65
-   :align: right
-
 .. literalinclude:: ../../auto_examples/compose/plot_digits_pipe.py
     :lines: 23-63
 
-
-
+.. image:: ../../auto_examples/compose/images/sphx_glr_plot_digits_pipe_001.png
+   :target: ../../auto_examples/compose/plot_digits_pipe.html
+   :scale: 65
+   :align: center
 
 Face recognition with eigenfaces
 =================================
@@ -40,20 +37,28 @@ The dataset used in this example is a preprocessed excerpt of the
 .. |eigenfaces| image:: ../../images/plot_face_recognition_2.png
    :scale: 50
 
-.. list-table::
-   :class: centered
+.. raw :: html
+
+   <div class="sk-doc-div">
+    <div class="sk-doc-div-box">
+
+     <h4>Prediction</h4>
+
+|prediction|
 
-   *
+.. raw :: html
 
-     - |prediction|
+    </div>
+      <div class="sk-doc-div-box">
 
-     - |eigenfaces|
+     <h4>Eigenfaces</h4>
 
-   *
+|eigenfaces|
 
-     - **Prediction**
+.. raw :: html
 
-     - **Eigenfaces**
+     </div>
+    </div>
 
 Expected results for the top 5 most represented people in the dataset::
 

From c8a691ead9fd060ac5dc787f73fa8bc002c189a6 Mon Sep 17 00:00:00 2001
From: Chiara Marmo <chiara.marmo@u-psud.fr>
Date: Wed, 26 Aug 2020 14:16:15 +0200
Subject: [PATCH 04/14] Fix unsupervised learning.

---
 .../unsupervised_learning.rst                 | 74 ++++++++++++-------
 1 file changed, 48 insertions(+), 26 deletions(-)

diff --git a/doc/tutorial/statistical_inference/unsupervised_learning.rst b/doc/tutorial/statistical_inference/unsupervised_learning.rst
index b87fb64ec8d9b..faa83fb3f3dff 100644
--- a/doc/tutorial/statistical_inference/unsupervised_learning.rst
+++ b/doc/tutorial/statistical_inference/unsupervised_learning.rst
@@ -60,42 +60,49 @@ algorithms. The simplest clustering algorithm is :ref:`k_means`.
     is sensitive to initialization, and can fall into local minima,
     although scikit-learn employs several tricks to mitigate this issue.
 
-    .. list-table::
-        :class: centered
+    .. raw :: html
 
-        *
+       <div class="sk-doc-div">
+        <div class="sk-doc-div-box">
 
-            - |k_means_iris_bad_init|
+          <h4>Bad initialization</h4>
 
-            - |k_means_iris_8|
+    |k_means_iris_bad_init|
 
-            - |cluster_iris_truth|
+    .. raw :: html
 
-        *
+       </div>
+        <div class="sk-doc-div-box">
+         <h4>8 clusters</h4>
 
-            - **Bad initialization**
+    |k_means_iris_8|
 
-            - **8 clusters**
+    .. raw :: html
 
-            - **Ground truth**
+       </div>
+        <div class="sk-doc-div-box">
+         <h4>Ground truth</h4>
+
+    |cluster_iris_truth|
+
+    .. raw :: html
+
+        </div>
+       </div>
 
     **Don't over-interpret clustering results**
 
 .. |face| image:: /auto_examples/cluster/images/sphx_glr_plot_face_compress_001.png
    :target: ../../auto_examples/cluster/plot_face_compress.html
-   :scale: 60
 
 .. |face_regular| image:: /auto_examples/cluster/images/sphx_glr_plot_face_compress_002.png
    :target: ../../auto_examples/cluster/plot_face_compress.html
-   :scale: 60
 
 .. |face_compressed| image:: /auto_examples/cluster/images/sphx_glr_plot_face_compress_003.png
    :target: ../../auto_examples/cluster/plot_face_compress.html
-   :scale: 60
 
 .. |face_histogram| image:: /auto_examples/cluster/images/sphx_glr_plot_face_compress_004.png
    :target: ../../auto_examples/cluster/plot_face_compress.html
-   :scale: 60
 
 .. topic:: **Application example: vector quantization**
 
@@ -120,28 +127,43 @@ algorithms. The simplest clustering algorithm is :ref:`k_means`.
     	>>> face_compressed = np.choose(labels, values)
     	>>> face_compressed.shape = face.shape
 
-    .. list-table::
-      :class: centered
+    .. raw :: html
+
+       <div class="sk-doc-div">
+        <div class="sk-doc-div-box">
+
+          <h4>Raw image</h4>
+
+    |face|
+
+    .. raw :: html
 
-      *
-        - |face|
+       </div>
+        <div class="sk-doc-div-box">
+         <h4>K-means quantization</h4>
 
-        - |face_compressed|
+    |face_compressed|
 
-        - |face_regular|
+    .. raw :: html
 
-        - |face_histogram|
+       </div>
+        <div class="sk-doc-div-box">
+         <h4>Equal bins</h4>
 
-      *
+    |face_regular|
 
-        - Raw image
+    .. raw :: html
 
-        - K-means quantization
+       </div>
+        <div class="sk-doc-div-box">
+         <h4>Image histogram</h4>
 
-        - Equal bins
+    |face_histogram|
 
-        - Image histogram
+    .. raw :: html
 
+        </div>
+       </div>
 
 Hierarchical agglomerative clustering: Ward
 ---------------------------------------------

From 0c43ad923a98b6df037dc28ca8581583ac6b8f2a Mon Sep 17 00:00:00 2001
From: Chiara Marmo <chiara.marmo@u-psud.fr>
Date: Wed, 26 Aug 2020 14:27:02 +0200
Subject: [PATCH 05/14] Remove finding help.

---
 .../statistical_inference/finding_help.rst    | 32 -------------------
 doc/tutorial/statistical_inference/index.rst  |  1 -
 2 files changed, 33 deletions(-)
 delete mode 100644 doc/tutorial/statistical_inference/finding_help.rst

diff --git a/doc/tutorial/statistical_inference/finding_help.rst b/doc/tutorial/statistical_inference/finding_help.rst
deleted file mode 100644
index 69026e2e5dbd2..0000000000000
--- a/doc/tutorial/statistical_inference/finding_help.rst
+++ /dev/null
@@ -1,32 +0,0 @@
-Finding help
-============
-
-
-The project mailing list
-------------------------
-
-If you encounter a bug with ``scikit-learn`` or something that needs
-clarification in the docstring or the online documentation, please feel free to
-ask on the `Mailing List <http://scikit-learn.org/stable/support.html>`_
-
-
-Q&A communities with Machine Learning practitioners
-----------------------------------------------------
-
-  :Quora.com:
-
-    Quora has a topic for Machine Learning related questions that
-    also features some interesting discussions:
-    https://www.quora.com/topic/Machine-Learning
-
-  :Stack Exchange:
-
-    The Stack Exchange family of sites hosts `multiple subdomains for Machine Learning questions`_.
-
-.. _`How do I learn machine learning?`: https://www.quora.com/How-do-I-learn-machine-learning-1
-
-.. _`multiple subdomains for Machine Learning questions`: https://meta.stackexchange.com/q/130524
-
--- _'An excellent free online course for Machine Learning taught by Professor Andrew Ng of Stanford': https://www.coursera.org/learn/machine-learning
-
--- _'Another excellent free online course that takes a more general approach to Artificial Intelligence': https://www.udacity.com/course/intro-to-artificial-intelligence--cs271
diff --git a/doc/tutorial/statistical_inference/index.rst b/doc/tutorial/statistical_inference/index.rst
index f4aa9f8833129..1ea527054fc38 100644
--- a/doc/tutorial/statistical_inference/index.rst
+++ b/doc/tutorial/statistical_inference/index.rst
@@ -34,4 +34,3 @@ A tutorial on statistical-learning for scientific data processing
    model_selection
    unsupervised_learning
    putting_together
-   finding_help

From 3e36c8bb889517a8234b205820afef37d6972551 Mon Sep 17 00:00:00 2001
From: Chiara Marmo <chiara.marmo@u-psud.fr>
Date: Wed, 26 Aug 2020 18:40:08 +0200
Subject: [PATCH 06/14] Just simple rst.

---
 .../scikit-learn-modern/static/css/theme.css  | 22 +----
 .../statistical_inference/model_selection.rst | 15 ---
 .../putting_together.rst                      | 29 +-----
 .../statistical_inference/settings.rst        | 16 ---
 .../supervised_learning.rst                   | 64 ------------
 .../unsupervised_learning.rst                 | 97 +++++--------------
 6 files changed, 33 insertions(+), 210 deletions(-)

diff --git a/doc/themes/scikit-learn-modern/static/css/theme.css b/doc/themes/scikit-learn-modern/static/css/theme.css
index e651205bac18a..f3c87abe13d58 100644
--- a/doc/themes/scikit-learn-modern/static/css/theme.css
+++ b/doc/themes/scikit-learn-modern/static/css/theme.css
@@ -77,6 +77,7 @@ a code {
 img {
    max-width: 100%;
    border-radius: 0.25rem;
+   padding: 0.5rem
 }
 
 span.highlighted {
@@ -1207,34 +1208,17 @@ div.sk-sponsor-div, div.sk-testimonial-div {
   align-items: center;
 }
 
-div.sk-sponsor-div-box, div.sk-testimonial-div-box,
-div.sk-doc-div-box {
+div.sk-sponsor-div-box, div.sk-testimonial-div-box
+{
   width: 100%;
 }
 
-div.sk-doc-div {
-  display: flex;
-  flex-wrap: wrap;
-  justify-content: center;
-}
-
-div.sk-doc-div-box {
-  padding: 0.30rem;
-  overflow: auto;
-}
-
 @media screen and (min-width: 500px) {
   div.sk-sponsor-div-box, div.sk-testimonial-div-box {
     width: 50%;
   }
 }
 
-@media screen and (min-width: 1200px) {
-  div.sk-doc-div-box {
-    width: 50%;
-  }
-}
-
 table.sk-sponsor-table tr, table.sk-sponsor-table tr:nth-child(odd) {
   border-style: none;
   background-color: white;
diff --git a/doc/tutorial/statistical_inference/model_selection.rst b/doc/tutorial/statistical_inference/model_selection.rst
index 0668b83167198..f29d262d7b101 100644
--- a/doc/tutorial/statistical_inference/model_selection.rst
+++ b/doc/tutorial/statistical_inference/model_selection.rst
@@ -181,11 +181,6 @@ scoring method.
 
 .. topic:: **Exercise**
 
-    .. raw :: html
-
-       <div class="sk-doc-div">
-         <div class="sk-doc-div-box">
-
     On the digits dataset, plot the cross-validation score of a :class:`SVC`
     estimator with an linear kernel as a function of parameter ``C`` (use a
     logarithmic grid of points, from 1 to 10).
@@ -193,21 +188,11 @@ scoring method.
         .. literalinclude:: ../../auto_examples/exercises/plot_cv_digits.py
             :lines: 13-23
 
-    .. raw :: html
-
-         </div>
-           <div class="sk-doc-div-box">
-
     .. image:: /auto_examples/exercises/images/sphx_glr_plot_cv_digits_001.png
         :target: ../../auto_examples/exercises/plot_cv_digits.html
         :align: center
         :scale: 90
 
-    .. raw :: html
-
-         </div>
-       </div>
-
     **Solution:** :ref:`sphx_glr_auto_examples_exercises_plot_cv_digits.py`
 
 Grid-search and cross-validated estimators
diff --git a/doc/tutorial/statistical_inference/putting_together.rst b/doc/tutorial/statistical_inference/putting_together.rst
index 6b251317ff4bd..033bed2e33884 100644
--- a/doc/tutorial/statistical_inference/putting_together.rst
+++ b/doc/tutorial/statistical_inference/putting_together.rst
@@ -31,34 +31,15 @@ The dataset used in this example is a preprocessed excerpt of the
 
 .. literalinclude:: ../../auto_examples/applications/plot_face_recognition.py
 
-.. |prediction| image:: ../../images/plot_face_recognition_1.png
+.. figure:: ../../images/plot_face_recognition_1.png
    :scale: 50
 
-.. |eigenfaces| image:: ../../images/plot_face_recognition_2.png
-   :scale: 50
-
-.. raw :: html
-
-   <div class="sk-doc-div">
-    <div class="sk-doc-div-box">
-
-     <h4>Prediction</h4>
-
-|prediction|
+   **Prediction**
 
-.. raw :: html
-
-    </div>
-      <div class="sk-doc-div-box">
-
-     <h4>Eigenfaces</h4>
-
-|eigenfaces|
-
-.. raw :: html
+.. figure:: ../../images/plot_face_recognition_2.png
+   :scale: 50
 
-     </div>
-    </div>
+   **Eigenfaces**
 
 Expected results for the top 5 most represented people in the dataset::
 
diff --git a/doc/tutorial/statistical_inference/settings.rst b/doc/tutorial/statistical_inference/settings.rst
index 2934120c28eea..32bd737e6d489 100644
--- a/doc/tutorial/statistical_inference/settings.rst
+++ b/doc/tutorial/statistical_inference/settings.rst
@@ -31,11 +31,6 @@ needs to be preprocessed in order to be used by scikit-learn.
 
 .. topic:: An example of reshaping data would be the digits dataset
 
-    .. raw :: html
-
-       <div class="sk-doc-div">
-         <div class="sk-doc-div-box">
-
     The digits dataset is made of 1797 8x8 images of hand-written
     digits ::
 
@@ -54,21 +49,10 @@ needs to be preprocessed in order to be used by scikit-learn.
         ...     (digits.images.shape[0], -1)
         ... )
 
-    .. raw :: html
-
-         </div>
-         <div class="sk-doc-div-box">
-
     .. image:: /auto_examples/datasets/images/sphx_glr_plot_digits_last_image_001.png
         :target: ../../auto_examples/datasets/plot_digits_last_image.html
         :align: center
 
-    .. raw :: html
-
-         </div>
-       </div>
-
-
 Estimators objects
 ===================
 
diff --git a/doc/tutorial/statistical_inference/supervised_learning.rst b/doc/tutorial/statistical_inference/supervised_learning.rst
index 8ca50f7b167ed..7f6cbbfa5a8cf 100644
--- a/doc/tutorial/statistical_inference/supervised_learning.rst
+++ b/doc/tutorial/statistical_inference/supervised_learning.rst
@@ -38,11 +38,6 @@ Nearest neighbor and the curse of dimensionality
 
 .. topic:: Classifying irises:
 
-    .. raw :: html
-
-       <div class="sk-doc-div">
-         <div class="sk-doc-div-box">
-
     The iris dataset is a classification task consisting in identifying 3
     different types of irises (Setosa, Versicolour, and Virginica) from
     their petal and sepal length and width::
@@ -55,22 +50,11 @@ Nearest neighbor and the curse of dimensionality
         >>> np.unique(iris_y)
         array([0, 1, 2])
 
-    .. raw :: html
-
-         </div>
-         <div class="sk-doc-div-box">
-
     .. image:: /auto_examples/datasets/images/sphx_glr_plot_iris_dataset_001.png
         :target: ../../auto_examples/datasets/plot_iris_dataset.html
         :align: center
 	:scale: 50
 
-    .. raw :: html
-
-         </div>
-       </div>
-
-
 k-Nearest neighbors classifier
 -------------------------------
 
@@ -173,11 +157,6 @@ in its simplest form, fits a linear model to the data set by adjusting
 a set of parameters in order to make the sum of the squared residuals
 of the model as small as possible.
 
-.. raw :: html
-
-   <div class="sk-doc-div">
-     <div class="sk-doc-div-box">
-
 Linear models: :math:`y = X\beta + \epsilon`
 
  * :math:`X`: data
@@ -185,21 +164,11 @@ Linear models: :math:`y = X\beta + \epsilon`
  * :math:`\beta`: Coefficients
  * :math:`\epsilon`: Observation noise
 
-.. raw :: html
-
-     </div>
-     <div class="sk-doc-div-box">
-
 .. image:: /auto_examples/linear_model/images/sphx_glr_plot_ols_001.png
    :target: ../../auto_examples/linear_model/plot_ols.html
    :scale: 50
    :align: center
 
-.. raw :: html
-
-     </div>
-   </div>
-
 ::
 
     >>> from sklearn import linear_model
@@ -230,11 +199,6 @@ Shrinkage
 If there are few data points per dimension, noise in the observations
 induces high variance:
 
-.. raw :: html
-
-   <div class="sk-doc-div">
-     <div class="sk-doc-div-box">
-
 ::
 
     >>> X = np.c_[ .5, 1].T
@@ -252,32 +216,15 @@ induces high variance:
     ...     plt.plot(test, regr.predict(test)) # doctest: +SKIP
     ...     plt.scatter(this_X, y, s=3)  # doctest: +SKIP
 
-.. raw :: html
-
-     </div>
-     <div class="sk-doc-div-box">
-
 .. image:: /auto_examples/linear_model/images/sphx_glr_plot_ols_ridge_variance_001.png
    :target: ../../auto_examples/linear_model/plot_ols_ridge_variance.html
    :align: center
 
-.. raw :: html
-
-     </div>
-    </div>
-
-
 A solution in high-dimensional statistical learning is to *shrink* the
 regression coefficients to zero: any two randomly chosen set of
 observations are likely to be uncorrelated. This is called :class:`Ridge`
 regression:
 
-.. raw :: html
-
-   <div class="sk-doc-div">
-     <div class="sk-doc-div-box">
-
-
 ::
 
     >>> regr = linear_model.Ridge(alpha=.1)
@@ -291,21 +238,10 @@ regression:
     ...     plt.plot(test, regr.predict(test)) # doctest: +SKIP
     ...     plt.scatter(this_X, y, s=3) # doctest: +SKIP
 
-.. raw :: html
-
-     </div>
-     <div class="sk-doc-div-box">
-
 .. image:: /auto_examples/linear_model/images/sphx_glr_plot_ols_ridge_variance_002.png
    :target: ../../auto_examples/linear_model/plot_ols_ridge_variance.html
    :align: center
 
-.. raw :: html
-
-     </div>
-   </div>
-
-
 This is an example of **bias/variance tradeoff**: the larger the ridge
 ``alpha`` parameter, the higher the bias and the lower the variance.
 
diff --git a/doc/tutorial/statistical_inference/unsupervised_learning.rst b/doc/tutorial/statistical_inference/unsupervised_learning.rst
index faa83fb3f3dff..4af05047ba2c3 100644
--- a/doc/tutorial/statistical_inference/unsupervised_learning.rst
+++ b/doc/tutorial/statistical_inference/unsupervised_learning.rst
@@ -41,18 +41,6 @@ algorithms. The simplest clustering algorithm is :ref:`k_means`.
     >>> print(y_iris[::10])
     [0 0 0 0 0 1 1 1 1 1 2 2 2 2 2]
 
-.. |k_means_iris_bad_init| image:: /auto_examples/cluster/images/sphx_glr_plot_cluster_iris_003.png
-   :target: ../../auto_examples/cluster/plot_cluster_iris.html
-   :scale: 63
-
-.. |k_means_iris_8| image:: /auto_examples/cluster/images/sphx_glr_plot_cluster_iris_001.png
-   :target: ../../auto_examples/cluster/plot_cluster_iris.html
-   :scale: 63
-
-.. |cluster_iris_truth| image:: /auto_examples/cluster/images/sphx_glr_plot_cluster_iris_004.png
-   :target: ../../auto_examples/cluster/plot_cluster_iris.html
-   :scale: 63
-
 .. warning::
 
     There is absolutely no guarantee of recovering a ground truth. First,
@@ -60,49 +48,30 @@ algorithms. The simplest clustering algorithm is :ref:`k_means`.
     is sensitive to initialization, and can fall into local minima,
     although scikit-learn employs several tricks to mitigate this issue.
 
-    .. raw :: html
-
-       <div class="sk-doc-div">
-        <div class="sk-doc-div-box">
-
-          <h4>Bad initialization</h4>
-
-    |k_means_iris_bad_init|
+    |
 
-    .. raw :: html
+    .. figure:: /auto_examples/cluster/images/sphx_glr_plot_cluster_iris_003.png
+       :target: ../../auto_examples/cluster/plot_cluster_iris.html
+       :scale: 63
 
-       </div>
-        <div class="sk-doc-div-box">
-         <h4>8 clusters</h4>
+       **Bad initialization**
 
-    |k_means_iris_8|
+    .. figure:: /auto_examples/cluster/images/sphx_glr_plot_cluster_iris_001.png
+       :target: ../../auto_examples/cluster/plot_cluster_iris.html
+       :scale: 63
 
-    .. raw :: html
+       **8 clusters**
 
-       </div>
-        <div class="sk-doc-div-box">
-         <h4>Ground truth</h4>
+    .. figure:: /auto_examples/cluster/images/sphx_glr_plot_cluster_iris_004.png
+       :target: ../../auto_examples/cluster/plot_cluster_iris.html
+       :scale: 63
 
-    |cluster_iris_truth|
-
-    .. raw :: html
-
-        </div>
-       </div>
+       **Ground truth**
 
     **Don't over-interpret clustering results**
 
-.. |face| image:: /auto_examples/cluster/images/sphx_glr_plot_face_compress_001.png
-   :target: ../../auto_examples/cluster/plot_face_compress.html
 
-.. |face_regular| image:: /auto_examples/cluster/images/sphx_glr_plot_face_compress_002.png
-   :target: ../../auto_examples/cluster/plot_face_compress.html
 
-.. |face_compressed| image:: /auto_examples/cluster/images/sphx_glr_plot_face_compress_003.png
-   :target: ../../auto_examples/cluster/plot_face_compress.html
-
-.. |face_histogram| image:: /auto_examples/cluster/images/sphx_glr_plot_face_compress_004.png
-   :target: ../../auto_examples/cluster/plot_face_compress.html
 
 .. topic:: **Application example: vector quantization**
 
@@ -127,43 +96,27 @@ algorithms. The simplest clustering algorithm is :ref:`k_means`.
     	>>> face_compressed = np.choose(labels, values)
     	>>> face_compressed.shape = face.shape
 
-    .. raw :: html
-
-       <div class="sk-doc-div">
-        <div class="sk-doc-div-box">
-
-          <h4>Raw image</h4>
-
-    |face|
-
-    .. raw :: html
-
-       </div>
-        <div class="sk-doc-div-box">
-         <h4>K-means quantization</h4>
 
-    |face_compressed|
+    .. figure:: /auto_examples/cluster/images/sphx_glr_plot_face_compress_001.png
+       :target: ../../auto_examples/cluster/plot_face_compress.html
 
-    .. raw :: html
+       **Raw image**
 
-       </div>
-        <div class="sk-doc-div-box">
-         <h4>Equal bins</h4>
+    .. figure:: /auto_examples/cluster/images/sphx_glr_plot_face_compress_003.png
+       :target: ../../auto_examples/cluster/plot_face_compress.html
 
-    |face_regular|
+       **K-means quantization**
 
-    .. raw :: html
+    .. figure:: /auto_examples/cluster/images/sphx_glr_plot_face_compress_002.png
+       :target: ../../auto_examples/cluster/plot_face_compress.html
 
-       </div>
-        <div class="sk-doc-div-box">
-         <h4>Image histogram</h4>
+       **Equal bins**
 
-    |face_histogram|
 
-    .. raw :: html
+    .. figure:: /auto_examples/cluster/images/sphx_glr_plot_face_compress_004.png
+       :target: ../../auto_examples/cluster/plot_face_compress.html
 
-        </div>
-       </div>
+       **Image histogram**
 
 Hierarchical agglomerative clustering: Ward
 ---------------------------------------------

From 53b2d21cbc4f3ac1bac1637755c3b928746d7753 Mon Sep 17 00:00:00 2001
From: Chiara Marmo <chiara.marmo@u-psud.fr>
Date: Wed, 26 Aug 2020 20:38:46 +0200
Subject: [PATCH 07/14] Just simple rst... again.

---
 .../supervised_learning.rst                   | 81 +++----------------
 1 file changed, 11 insertions(+), 70 deletions(-)

diff --git a/doc/tutorial/statistical_inference/supervised_learning.rst b/doc/tutorial/statistical_inference/supervised_learning.rst
index 7f6cbbfa5a8cf..4e1c871506e2a 100644
--- a/doc/tutorial/statistical_inference/supervised_learning.rst
+++ b/doc/tutorial/statistical_inference/supervised_learning.rst
@@ -344,32 +344,17 @@ application of Occam's razor: *prefer simpler models*.
 Classification
 ---------------
 
-.. raw :: html
-
-   <div class="sk-doc-div">
-     <div class="sk-doc-div-box">
-
 For classification, as in the labeling
 `iris <https://en.wikipedia.org/wiki/Iris_flower_data_set>`_ task, linear
 regression is not the right approach as it will give too much weight to
 data far from the decision frontier. A linear approach is to fit a sigmoid
 function or **logistic** function:
 
-.. raw :: html
-
-     </div>
-     <div class="sk-doc-div-box">
-
 .. image:: /auto_examples/linear_model/images/sphx_glr_plot_logistic_001.png
    :target: ../../auto_examples/linear_model/plot_logistic.html
    :scale: 70
    :align: center
 
-.. raw :: html
-
-     </div>
-   </div>
-
 .. math::
 
    y = \textrm{sigmoid}(X\beta - \textrm{offset}) + \epsilon =
@@ -432,34 +417,12 @@ the separating line (less regularization).
 
 .. currentmodule :: sklearn.svm
 
-.. |svm_margin_unreg| image:: /auto_examples/svm/images/sphx_glr_plot_svm_margin_001.png
+.. image:: /auto_examples/svm/images/sphx_glr_plot_svm_margin_001.png
    :target: ../../auto_examples/svm/plot_svm_margin.html
 
 .. |svm_margin_reg| image:: /auto_examples/svm/images/sphx_glr_plot_svm_margin_002.png
    :target: ../../auto_examples/svm/plot_svm_margin.html
 
-.. raw :: html
-
-   <div class="sk-doc-div">
-     <div class="sk-doc-div-box">
-      <h4>Unregularized SVM</h4>
-
-|svm_margin_unreg|
-
-.. raw :: html
-
-     </div>
-     <div class="sk-doc-div-box">
-      <h4>Regularized SVM (default)</h4>
-
-|svm_margin_reg|
-
-.. raw :: html
-
-     </div>
-    </div>
-
-
 .. topic:: Example:
 
  - :ref:`sphx_glr_auto_examples_svm_plot_iris_svc.py`
@@ -492,35 +455,19 @@ build a decision function that is not linear but may be polynomial instead.
 This is done using the *kernel trick* that can be seen as
 creating a decision energy by positioning *kernels* on observations:
 
-.. |svm_kernel_linear| image:: /auto_examples/svm/images/sphx_glr_plot_svm_kernels_001.png
-   :target: ../../auto_examples/svm/plot_svm_kernels.html
-
-.. |svm_kernel_poly| image:: /auto_examples/svm/images/sphx_glr_plot_svm_kernels_002.png
+.. figure:: /auto_examples/svm/images/sphx_glr_plot_svm_kernels_001.png
    :target: ../../auto_examples/svm/plot_svm_kernels.html
 
-.. |svm_kernel_rbf| image:: /auto_examples/svm/images/sphx_glr_plot_svm_kernels_003.png
-   :target: ../../auto_examples/svm/plot_svm_kernels.html
-
-.. raw :: html
-
-   <div class="sk-doc-div">
-     <div class="sk-doc-div-box">
-      <h4>Linear kernel</h4>
-
-|svm_kernel_linear|
-
+   **Linear kernel**
+   
 ::
 
     >>> svc = svm.SVC(kernel='linear')
 
+.. figure:: /auto_examples/svm/images/sphx_glr_plot_svm_kernels_002.png
+   :target: ../../auto_examples/svm/plot_svm_kernels.html
 
-.. raw :: html
-
-     </div>
-     <div class="sk-doc-div-box">
-      <h4>Polynomial kernel</h4>
-
-|svm_kernel_poly|
+   **Polynomial kernel**
 
 ::
 
@@ -528,13 +475,12 @@ creating a decision energy by positioning *kernels* on observations:
     ...               degree=3)
     >>> # degree: polynomial degree
 
-.. raw :: html
+      <h4></h4>
 
-     </div>
-     <div class="sk-doc-div-box">
-      <h4>RBF kernel (Radial Basis Function)</h4>
+.. figure:: /auto_examples/svm/images/sphx_glr_plot_svm_kernels_003.png
+   :target: ../../auto_examples/svm/plot_svm_kernels.html
 
-|svm_kernel_rbf|
+   **RBF kernel (Radial Basis Function)**
 
 ::
 
@@ -542,11 +488,6 @@ creating a decision energy by positioning *kernels* on observations:
     >>> # gamma: inverse of size of
     >>> # radial kernel
 
-.. raw :: html
-
-     </div>
-    </div>
-
 .. topic:: **Interactive example**
 
    See the :ref:`SVM GUI <sphx_glr_auto_examples_applications_svm_gui.py>` to download

From 728b8547e00dbb9690bfa2c66314bb5937f03228 Mon Sep 17 00:00:00 2001
From: Chiara Marmo <chiara.marmo@u-psud.fr>
Date: Wed, 26 Aug 2020 20:43:23 +0200
Subject: [PATCH 08/14] Address comments. Revert theme.css.

---
 .../scikit-learn-modern/static/css/theme.css       | 14 +++++++++-----
 doc/tutorial/statistical_inference/settings.rst    |  8 ++++----
 2 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/doc/themes/scikit-learn-modern/static/css/theme.css b/doc/themes/scikit-learn-modern/static/css/theme.css
index f3c87abe13d58..db2acbc3a11bb 100644
--- a/doc/themes/scikit-learn-modern/static/css/theme.css
+++ b/doc/themes/scikit-learn-modern/static/css/theme.css
@@ -76,8 +76,6 @@ a code {
 
 img {
    max-width: 100%;
-   border-radius: 0.25rem;
-   padding: 0.5rem
 }
 
 span.highlighted {
@@ -513,6 +511,10 @@ div.sk-sidebar-toc-logo {
   height: 52px;
 }
 
+.sk-toc-active {
+  font-weight: bold;
+}
+
 div.sk-sidebar-toc-wrapper {
   font-size: 0.9rem;
   width: 252px;
@@ -551,7 +553,6 @@ div.sk-sidebar-toc ul ul {
 }
 
 div.sk-sidebar-toc ul ul ul {
-  list-style: square;
   margin-left: 1rem;
 }
 
@@ -838,6 +839,10 @@ div.highlight:hover span.copybutton:hover {
     background-color: #20252B;
 }
 
+div.body img.align-center {
+  max-width: 800px;
+}
+
 div.body img {
     max-width: 100%;
     height: unset!important; /* Needed because sphinx sets the height */
@@ -1208,8 +1213,7 @@ div.sk-sponsor-div, div.sk-testimonial-div {
   align-items: center;
 }
 
-div.sk-sponsor-div-box, div.sk-testimonial-div-box
-{
+div.sk-sponsor-div-box, div.sk-testimonial-div-box {
   width: 100%;
 }
 
diff --git a/doc/tutorial/statistical_inference/settings.rst b/doc/tutorial/statistical_inference/settings.rst
index 32bd737e6d489..0ab9e39d63345 100644
--- a/doc/tutorial/statistical_inference/settings.rst
+++ b/doc/tutorial/statistical_inference/settings.rst
@@ -42,6 +42,10 @@ needs to be preprocessed in order to be used by scikit-learn.
         ...            cmap=plt.cm.gray_r) #doctest: +SKIP
         <matplotlib.image.AxesImage object at ...>
     
+    .. image:: /auto_examples/datasets/images/sphx_glr_plot_digits_last_image_001.png
+        :target: ../../auto_examples/datasets/plot_digits_last_image.html
+        :align: center
+
     To use this dataset with scikit-learn, we transform each 8x8 image into a
     feature vector of length 64 ::
 
@@ -49,10 +53,6 @@ needs to be preprocessed in order to be used by scikit-learn.
         ...     (digits.images.shape[0], -1)
         ... )
 
-    .. image:: /auto_examples/datasets/images/sphx_glr_plot_digits_last_image_001.png
-        :target: ../../auto_examples/datasets/plot_digits_last_image.html
-        :align: center
-
 Estimators objects
 ===================
 

From 5416d3daff02b2aab9d09e2fe35e06178aaffdc8 Mon Sep 17 00:00:00 2001
From: Chiara Marmo <chiara.marmo@u-psud.fr>
Date: Thu, 27 Aug 2020 16:08:40 +0200
Subject: [PATCH 09/14] Address some of the comments.

---
 .../supervised_learning.rst                   | 28 +++++++++----------
 .../unsupervised_learning.rst                 |  6 ++--
 2 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/doc/tutorial/statistical_inference/supervised_learning.rst b/doc/tutorial/statistical_inference/supervised_learning.rst
index 4e1c871506e2a..0b02f7d2ef678 100644
--- a/doc/tutorial/statistical_inference/supervised_learning.rst
+++ b/doc/tutorial/statistical_inference/supervised_learning.rst
@@ -44,9 +44,7 @@ Nearest neighbor and the curse of dimensionality
 
         >>> import numpy as np
         >>> from sklearn import datasets
-        >>> iris_X, iris_y = datasets.load_iris(
-        ...     return_X_y=True
-        ... )
+        >>> iris_X, iris_y = datasets.load_iris(return_X_y=True)
         >>> np.unique(iris_y)
         array([0, 1, 2])
 
@@ -175,7 +173,7 @@ Linear models: :math:`y = X\beta + \epsilon`
     >>> regr = linear_model.LinearRegression()
     >>> regr.fit(diabetes_X_train, diabetes_y_train)
     LinearRegression()
-    >>> print(regr.coef_)  # doctest: +SKIP
+    >>> print(regr.coef_)
     [   0.30349955 -237.63931533  510.53060544  327.73698041 -814.13170937
       492.81458798  102.84845219  184.60648906  743.51961675   76.09517222]
 
@@ -397,7 +395,7 @@ This is known as :class:`LogisticRegression`.
    .. literalinclude:: ../../auto_examples/exercises/plot_digits_classification_exercise.py
        :lines: 15-19
 
-   :download:`Solution <../../auto_examples/exercises/plot_digits_classification_exercise.py>`
+   A solution can be downloaded :download:`here <../../auto_examples/exercises/plot_digits_classification_exercise.py>`.
 
 
 Support vector machines (SVMs)
@@ -417,12 +415,16 @@ the separating line (less regularization).
 
 .. currentmodule :: sklearn.svm
 
-.. image:: /auto_examples/svm/images/sphx_glr_plot_svm_margin_001.png
+.. figure:: /auto_examples/svm/images/sphx_glr_plot_svm_margin_001.png
    :target: ../../auto_examples/svm/plot_svm_margin.html
+   
+   **Unregularized SVM**
 
-.. |svm_margin_reg| image:: /auto_examples/svm/images/sphx_glr_plot_svm_margin_002.png
+.. figure:: /auto_examples/svm/images/sphx_glr_plot_svm_margin_002.png
    :target: ../../auto_examples/svm/plot_svm_margin.html
 
+   **Regularized SVM (default)**
+
 .. topic:: Example:
 
  - :ref:`sphx_glr_auto_examples_svm_plot_iris_svc.py`
@@ -475,8 +477,6 @@ creating a decision energy by positioning *kernels* on observations:
     ...               degree=3)
     >>> # degree: polynomial degree
 
-      <h4></h4>
-
 .. figure:: /auto_examples/svm/images/sphx_glr_plot_svm_kernels_003.png
    :target: ../../auto_examples/svm/plot_svm_kernels.html
 
@@ -494,10 +494,10 @@ creating a decision energy by positioning *kernels* on observations:
    ``svm_gui.py``; add data points of both classes with right and left button,
    fit the model and change parameters and data.
 
-.. image:: /auto_examples/datasets/images/sphx_glr_plot_iris_dataset_001.png
-    :target: ../../auto_examples/datasets/plot_iris_dataset.html
-    :align: center
-    :scale: 70
+   .. image:: /auto_examples/datasets/images/sphx_glr_plot_iris_dataset_001.png
+      :target: ../../auto_examples/datasets/plot_iris_dataset.html
+      :align: center
+      :scale: 70
 
 .. topic:: **Exercise**
    :class: green
@@ -515,4 +515,4 @@ creating a decision energy by positioning *kernels* on observations:
    .. literalinclude:: ../../auto_examples/exercises/plot_iris_exercise.py
        :lines: 18-23
 
-   :download:`Solution <../../auto_examples/exercises/plot_iris_exercise.py>`
+   A solution can be downloaded :download:`here <../../auto_examples/exercises/plot_iris_exercise.py>`
diff --git a/doc/tutorial/statistical_inference/unsupervised_learning.rst b/doc/tutorial/statistical_inference/unsupervised_learning.rst
index 4af05047ba2c3..60f59e4110236 100644
--- a/doc/tutorial/statistical_inference/unsupervised_learning.rst
+++ b/doc/tutorial/statistical_inference/unsupervised_learning.rst
@@ -53,26 +53,26 @@ algorithms. The simplest clustering algorithm is :ref:`k_means`.
     .. figure:: /auto_examples/cluster/images/sphx_glr_plot_cluster_iris_003.png
        :target: ../../auto_examples/cluster/plot_cluster_iris.html
        :scale: 63
+       :align: center
 
        **Bad initialization**
 
     .. figure:: /auto_examples/cluster/images/sphx_glr_plot_cluster_iris_001.png
        :target: ../../auto_examples/cluster/plot_cluster_iris.html
        :scale: 63
+       :align: center
 
        **8 clusters**
 
     .. figure:: /auto_examples/cluster/images/sphx_glr_plot_cluster_iris_004.png
        :target: ../../auto_examples/cluster/plot_cluster_iris.html
        :scale: 63
+       :align: center
 
        **Ground truth**
 
     **Don't over-interpret clustering results**
 
-
-
-
 .. topic:: **Application example: vector quantization**
 
     Clustering in general and KMeans, in particular, can be seen as a way

From 3476aacd8a03ada60fe49e0267522a4963654b7d Mon Sep 17 00:00:00 2001
From: Chiara Marmo <chiara.marmo@u-psud.fr>
Date: Thu, 27 Aug 2020 17:34:09 +0200
Subject: [PATCH 10/14] Remove centering.

---
 doc/tutorial/statistical_inference/unsupervised_learning.rst | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/doc/tutorial/statistical_inference/unsupervised_learning.rst b/doc/tutorial/statistical_inference/unsupervised_learning.rst
index 60f59e4110236..033872fac895e 100644
--- a/doc/tutorial/statistical_inference/unsupervised_learning.rst
+++ b/doc/tutorial/statistical_inference/unsupervised_learning.rst
@@ -53,21 +53,18 @@ algorithms. The simplest clustering algorithm is :ref:`k_means`.
     .. figure:: /auto_examples/cluster/images/sphx_glr_plot_cluster_iris_003.png
        :target: ../../auto_examples/cluster/plot_cluster_iris.html
        :scale: 63
-       :align: center
 
        **Bad initialization**
 
     .. figure:: /auto_examples/cluster/images/sphx_glr_plot_cluster_iris_001.png
        :target: ../../auto_examples/cluster/plot_cluster_iris.html
        :scale: 63
-       :align: center
 
        **8 clusters**
 
     .. figure:: /auto_examples/cluster/images/sphx_glr_plot_cluster_iris_004.png
        :target: ../../auto_examples/cluster/plot_cluster_iris.html
        :scale: 63
-       :align: center
 
        **Ground truth**
 

From 414cee0d0673f1fcbf673e1a1b2d74eb822fa056 Mon Sep 17 00:00:00 2001
From: Chiara Marmo <chiara.marmo@u-psud.fr>
Date: Thu, 27 Aug 2020 22:47:19 +0200
Subject: [PATCH 11/14] Move iris plot.

---
 .../statistical_inference/supervised_learning.rst     | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/doc/tutorial/statistical_inference/supervised_learning.rst b/doc/tutorial/statistical_inference/supervised_learning.rst
index 0b02f7d2ef678..b08894aaed8f0 100644
--- a/doc/tutorial/statistical_inference/supervised_learning.rst
+++ b/doc/tutorial/statistical_inference/supervised_learning.rst
@@ -494,11 +494,6 @@ creating a decision energy by positioning *kernels* on observations:
    ``svm_gui.py``; add data points of both classes with right and left button,
    fit the model and change parameters and data.
 
-   .. image:: /auto_examples/datasets/images/sphx_glr_plot_iris_dataset_001.png
-      :target: ../../auto_examples/datasets/plot_iris_dataset.html
-      :align: center
-      :scale: 70
-
 .. topic:: **Exercise**
    :class: green
 
@@ -515,4 +510,10 @@ creating a decision energy by positioning *kernels* on observations:
    .. literalinclude:: ../../auto_examples/exercises/plot_iris_exercise.py
        :lines: 18-23
 
+   .. image:: /auto_examples/datasets/images/sphx_glr_plot_iris_dataset_001.png
+      :target: ../../auto_examples/datasets/plot_iris_dataset.html
+      :align: center
+      :scale: 70
+
+
    A solution can be downloaded :download:`here <../../auto_examples/exercises/plot_iris_exercise.py>`

From a93045a7a355675cad438f8d470c1ab542596959 Mon Sep 17 00:00:00 2001
From: Chiara Marmo <chiara.marmo@u-psud.fr>
Date: Thu, 27 Aug 2020 23:37:41 +0200
Subject: [PATCH 12/14] Some aesthetic

---
 doc/themes/scikit-learn-modern/static/css/theme.css | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/doc/themes/scikit-learn-modern/static/css/theme.css b/doc/themes/scikit-learn-modern/static/css/theme.css
index db2acbc3a11bb..c00ecee1ad1ad 100644
--- a/doc/themes/scikit-learn-modern/static/css/theme.css
+++ b/doc/themes/scikit-learn-modern/static/css/theme.css
@@ -839,10 +839,6 @@ div.highlight:hover span.copybutton:hover {
     background-color: #20252B;
 }
 
-div.body img.align-center {
-  max-width: 800px;
-}
-
 div.body img {
     max-width: 100%;
     height: unset!important; /* Needed because sphinx sets the height */
@@ -1234,6 +1230,10 @@ table.sk-sponsor-table td {
   padding: 0.30rem;
 }
 
+.caption {
+  text-align: center
+}
+
 /* pygments - highlightning */
 
 .highlight .hll { background-color: #ffffcc }

From 644489e62ddf2931bafd99ee0341d98e50ae2947 Mon Sep 17 00:00:00 2001
From: Chiara Marmo <chiara.marmo@u-psud.fr>
Date: Fri, 28 Aug 2020 12:09:52 +0200
Subject: [PATCH 13/14] Reorganize SVM.

---
 .../supervised_learning.rst                   | 21 ++++++++++++-------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/doc/tutorial/statistical_inference/supervised_learning.rst b/doc/tutorial/statistical_inference/supervised_learning.rst
index b08894aaed8f0..013100a054648 100644
--- a/doc/tutorial/statistical_inference/supervised_learning.rst
+++ b/doc/tutorial/statistical_inference/supervised_learning.rst
@@ -457,19 +457,18 @@ build a decision function that is not linear but may be polynomial instead.
 This is done using the *kernel trick* that can be seen as
 creating a decision energy by positioning *kernels* on observations:
 
-.. figure:: /auto_examples/svm/images/sphx_glr_plot_svm_kernels_001.png
-   :target: ../../auto_examples/svm/plot_svm_kernels.html
+Linear kernel
+^^^^^^^^^^^^^
 
-   **Linear kernel**
-   
 ::
 
     >>> svc = svm.SVC(kernel='linear')
 
-.. figure:: /auto_examples/svm/images/sphx_glr_plot_svm_kernels_002.png
+.. image:: /auto_examples/svm/images/sphx_glr_plot_svm_kernels_001.png
    :target: ../../auto_examples/svm/plot_svm_kernels.html
 
-   **Polynomial kernel**
+Polynomial kernel
+^^^^^^^^^^^^^^^^^
 
 ::
 
@@ -477,10 +476,11 @@ creating a decision energy by positioning *kernels* on observations:
     ...               degree=3)
     >>> # degree: polynomial degree
 
-.. figure:: /auto_examples/svm/images/sphx_glr_plot_svm_kernels_003.png
+.. image:: /auto_examples/svm/images/sphx_glr_plot_svm_kernels_002.png
    :target: ../../auto_examples/svm/plot_svm_kernels.html
 
-   **RBF kernel (Radial Basis Function)**
+RBF kernel (Radial Basis Function)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 ::
 
@@ -488,6 +488,11 @@ creating a decision energy by positioning *kernels* on observations:
     >>> # gamma: inverse of size of
     >>> # radial kernel
 
+.. image:: /auto_examples/svm/images/sphx_glr_plot_svm_kernels_003.png
+   :target: ../../auto_examples/svm/plot_svm_kernels.html
+
+
+
 .. topic:: **Interactive example**
 
    See the :ref:`SVM GUI <sphx_glr_auto_examples_applications_svm_gui.py>` to download

From 7cf085c2279a38f20078b344cea80a9c25347651 Mon Sep 17 00:00:00 2001
From: Chiara Marmo <cmarmo@users.noreply.github.com>
Date: Sat, 29 Aug 2020 14:22:19 +0200
Subject: [PATCH 14/14] Apply suggestions from code review

Co-authored-by: Thomas J. Fan <thomasjpfan@gmail.com>
---
 doc/tutorial/statistical_inference/model_selection.rst | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/doc/tutorial/statistical_inference/model_selection.rst b/doc/tutorial/statistical_inference/model_selection.rst
index f29d262d7b101..070e86c18e8b1 100644
--- a/doc/tutorial/statistical_inference/model_selection.rst
+++ b/doc/tutorial/statistical_inference/model_selection.rst
@@ -224,10 +224,9 @@ estimator during the construction and exposes an estimator API::
     0.943...
 
 
-By default, the :class:`GridSearchCV` uses a 3-fold cross-validation. However,
+By default, the :class:`GridSearchCV` uses a 5-fold cross-validation. However,
 if it detects that a classifier is passed, rather than a regressor, it uses
-a stratified 3-fold. The default will change to a 5-fold cross-validation in
-version 0.22.
+a stratified 5-fold.
 
 .. topic:: Nested cross-validation