From 41f9e9fb62783f66819bba897f3dfd5e26a0b0c3 Mon Sep 17 00:00:00 2001
From: ArturoAmorQ <arturo.amor-quiroz@polytechnique.edu>
Date: Wed, 12 Mar 2025 17:18:29 +0100
Subject: [PATCH 01/15] DOC Rework VotingClassifier decision boundaries example

---
 .../ensemble/plot_voting_decision_regions.py  | 187 ++++++++++++++----
 1 file changed, 145 insertions(+), 42 deletions(-)

diff --git a/examples/ensemble/plot_voting_decision_regions.py b/examples/ensemble/plot_voting_decision_regions.py
index d40d831fb911f..faf2529c1bd43 100644
--- a/examples/ensemble/plot_voting_decision_regions.py
+++ b/examples/ensemble/plot_voting_decision_regions.py
@@ -5,51 +5,84 @@
 
 .. currentmodule:: sklearn
 
-Plot the decision boundaries of a :class:`~ensemble.VotingClassifier` for two
-features of the Iris dataset.
-
-Plot the class probabilities of the first sample in a toy dataset predicted by
-three different classifiers and averaged by the
-:class:`~ensemble.VotingClassifier`.
-
-First, three exemplary classifiers are initialized
-(:class:`~tree.DecisionTreeClassifier`,
-:class:`~neighbors.KNeighborsClassifier`, and :class:`~svm.SVC`) and used to
-initialize a soft-voting :class:`~ensemble.VotingClassifier` with weights `[2,
-1, 2]`, which means that the predicted probabilities of the
-:class:`~tree.DecisionTreeClassifier` and :class:`~svm.SVC` each count 2 times
-as much as the weights of the :class:`~neighbors.KNeighborsClassifier`
-classifier when the averaged probability is calculated.
+Plot the predicted class probabilities in a toy dataset predicted by three
+different classifiers and averaged by the :class:`~ensemble.VotingClassifier`.
+
+First, three linear classifiers are initialized. Two are spline models with
+interaction terms, one using constant extrapolation and the other using periodic
+extrapolation. The third classifier is a :class:`~kernel_approximation.Nystroem`
+with the default "rbf" kernel.
+
+In the first part of this example these three classifiers are used to
+demonstrate soft-voting using :class:`~ensemble.VotingClassifier` with weighted
+average. We set `weights=[2, 1, 3]`, meaning the constant extrapolation spline
+model's predictions are weighted twice as much as the periodic spline model's,
+and the Nystroem model's predictions are weighted three times as much as the
+periodic spline.
+
+The second part demonstrates how soft predictions can be converted into hard
+predictions.
 
 """
 
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
 
-from itertools import product
+# %%
+# We first generate a noisy XOR dataset, which is a binary classification task.
 
-import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+
+n_samples = 500
+rng = np.random.default_rng(0)
+feature_names = ["Feature #0", "Feature #1"]
+xor = pd.DataFrame(
+    np.random.RandomState(0).uniform(low=-1, high=1, size=(n_samples, 2)),
+    columns=feature_names,
+)
+noise = rng.normal(loc=0, scale=0.1, size=(n_samples, 2))
+target_xor = np.logical_xor(
+    xor["Feature #0"] + noise[:, 0] > 0, xor["Feature #1"] + noise[:, 1] > 0
+)
+
+X = xor[feature_names]
+y = target_xor.astype(np.int32)
+
+# %%
+# We define and fit the models on the whole dataset.
 
-from sklearn import datasets
 from sklearn.ensemble import VotingClassifier
-from sklearn.inspection import DecisionBoundaryDisplay
-from sklearn.neighbors import KNeighborsClassifier
-from sklearn.svm import SVC
-from sklearn.tree import DecisionTreeClassifier
-
-# Loading some example data
-iris = datasets.load_iris()
-X = iris.data[:, [0, 2]]
-y = iris.target
-
-# Training classifiers
-clf1 = DecisionTreeClassifier(max_depth=4)
-clf2 = KNeighborsClassifier(n_neighbors=7)
-clf3 = SVC(gamma=0.1, kernel="rbf", probability=True)
+from sklearn.kernel_approximation import Nystroem
+from sklearn.linear_model import LogisticRegression
+from sklearn.pipeline import make_pipeline
+from sklearn.preprocessing import PolynomialFeatures, SplineTransformer, StandardScaler
+
+clf1 = make_pipeline(
+    SplineTransformer(degree=2, n_knots=2),
+    PolynomialFeatures(interaction_only=True),
+    LogisticRegression(C=10),
+)
+clf2 = make_pipeline(
+    SplineTransformer(
+        degree=2,
+        n_knots=4,
+        extrapolation="periodic",
+        include_bias=True,
+    ),
+    PolynomialFeatures(interaction_only=True),
+    LogisticRegression(C=10),
+)
+clf3 = make_pipeline(
+    StandardScaler(),
+    Nystroem(gamma=2, random_state=0),
+    LogisticRegression(C=10),
+)
+weights = [2, 1, 3]
 eclf = VotingClassifier(
-    estimators=[("dt", clf1), ("knn", clf2), ("svc", clf3)],
+    estimators=[("constant", clf1), ("periodic", clf2), ("nystroem", clf3)],
     voting="soft",
-    weights=[2, 1, 2],
+    weights=weights,
 )
 
 clf1.fit(X, y)
@@ -57,17 +90,87 @@
 clf3.fit(X, y)
 eclf.fit(X, y)
 
-# Plotting decision regions
-f, axarr = plt.subplots(2, 2, sharex="col", sharey="row", figsize=(10, 8))
-for idx, clf, tt in zip(
+# %%
+# Finally we use :class:`~inspection.DecisionBoundaryDisplay` to plot the
+# predicted probabilities. By using a diverging colormap (such as `"RdBu"`), we
+# can ensure that darker colors correspond to `predict_proba` close to either 0
+# or 1.
+
+from itertools import product
+
+import matplotlib.pyplot as plt
+from matplotlib.colors import ListedColormap
+
+from sklearn.inspection import DecisionBoundaryDisplay
+
+fig, axarr = plt.subplots(2, 2, sharex="col", sharey="row", figsize=(10, 8))
+for idx, clf, title in zip(
     product([0, 1], [0, 1]),
     [clf1, clf2, clf3, eclf],
-    ["Decision Tree (depth=4)", "KNN (k=7)", "Kernel SVM", "Soft Voting"],
+    [
+        "Splines with\nconstant extrapolation",
+        "Splines with\nperiodic extrapolation",
+        "RBF Nystroem",
+        "Soft Voting",
+    ],
 ):
-    DecisionBoundaryDisplay.from_estimator(
-        clf, X, alpha=0.4, ax=axarr[idx[0], idx[1]], response_method="predict"
+    disp = DecisionBoundaryDisplay.from_estimator(
+        clf,
+        X,
+        response_method="predict_proba",
+        plot_method="pcolormesh",
+        cmap="RdBu",
+        alpha=0.8,
+        ax=axarr[idx[0], idx[1]],
     )
-    axarr[idx[0], idx[1]].scatter(X[:, 0], X[:, 1], c=y, s=20, edgecolor="k")
-    axarr[idx[0], idx[1]].set_title(tt)
+    axarr[idx[0], idx[1]].scatter(
+        X["Feature #0"],
+        X["Feature #1"],
+        c=y,
+        cmap=ListedColormap(["tab:red", "tab:blue"]),
+        edgecolor="white",
+        linewidth=1,
+    )
+    axarr[idx[0], idx[1]].set_title(title)
+    fig.colorbar(disp.surface_, ax=axarr[idx[0], idx[1]], label="Probability estimate")
 
 plt.show()
+
+# %%
+# As a sanity check, we can verify for a given sample that the probability
+# predicted by the :class:`~ensemble.VotingClassifier` is indeed the weighted
+# average of the individual classifiers' soft-predictions.
+#
+# In the case of binary classification such as in the present example, the
+# `predict_proba` arrays contain the probability of belonging to class 0 (here
+# in red) as the first entry, and the probability of belonging to class 1 (here
+# in blue) as the second entry.
+
+test_sample = pd.DataFrame({"Feature #0": [-0.5], "Feature #1": [1.5]})
+predict_probas = [est.predict_proba(test_sample).ravel() for est in eclf.estimators_]
+print(f"Individual predicted probabilities: {predict_probas}")
+print(
+    "Weighted average of soft-predictions: "
+    f"{np.dot(weights, predict_probas)/np.sum(weights)}"
+)
+print(
+    "Predicted probability of VotingClassifier: "
+    f"{eclf.predict_proba(test_sample).ravel()}"
+)
+
+# %%
+# To convert soft predictions into hard predictions when weights are provided,
+# the weighted average predicted probabilities are computed for each class.
+# Then, the final class label is then derived from the class label with the
+# highest average probability.
+
+print(
+    "Class with the highest weighted average of soft-predictions: "
+    f"{np.argmax(np.dot(weights, predict_probas)/np.sum(weights))}"
+)
+
+# %%
+# Which corresponds to the default threshold at 0.5 in the case of binary
+# classification. Equivalently:
+
+print(f"Predicted class of VotingClassifier: {eclf.predict(test_sample).ravel()}")

From 22b8854a9bcdd02b2b448f3a2bc173d6a9889121 Mon Sep 17 00:00:00 2001
From: ArturoAmorQ <arturo.amor-quiroz@polytechnique.edu>
Date: Wed, 12 Mar 2025 17:23:35 +0100
Subject: [PATCH 02/15] Update User Guide accordingly

---
 doc/modules/ensemble.rst | 30 +++---------------------------
 1 file changed, 3 insertions(+), 27 deletions(-)

diff --git a/doc/modules/ensemble.rst b/doc/modules/ensemble.rst
index 71f91621c54af..0e08b3faf8050 100644
--- a/doc/modules/ensemble.rst
+++ b/doc/modules/ensemble.rst
@@ -1413,33 +1413,9 @@ weighted average  0.37	        0.4             0.23
 Here, the predicted class label is 2, since it has the
 highest average probability.
 
-The following example illustrates how the decision regions may change
-when a soft :class:`VotingClassifier` is used based on a linear Support
-Vector Machine, a Decision Tree, and a K-nearest neighbor classifier::
-
-   >>> from sklearn import datasets
-   >>> from sklearn.tree import DecisionTreeClassifier
-   >>> from sklearn.neighbors import KNeighborsClassifier
-   >>> from sklearn.svm import SVC
-   >>> from itertools import product
-   >>> from sklearn.ensemble import VotingClassifier
-
-   >>> # Loading some example data
-   >>> iris = datasets.load_iris()
-   >>> X = iris.data[:, [0, 2]]
-   >>> y = iris.target
-
-   >>> # Training classifiers
-   >>> clf1 = DecisionTreeClassifier(max_depth=4)
-   >>> clf2 = KNeighborsClassifier(n_neighbors=7)
-   >>> clf3 = SVC(kernel='rbf', probability=True)
-   >>> eclf = VotingClassifier(estimators=[('dt', clf1), ('knn', clf2), ('svc', clf3)],
-   ...                         voting='soft', weights=[2, 1, 2])
-
-   >>> clf1 = clf1.fit(X, y)
-   >>> clf2 = clf2.fit(X, y)
-   >>> clf3 = clf3.fit(X, y)
-   >>> eclf = eclf.fit(X, y)
+The following figure illustrates how the decision regions may change when
+a soft :class:`VotingClassifier` is trained with weights on three linear
+models::
 
 .. figure:: ../auto_examples/ensemble/images/sphx_glr_plot_voting_decision_regions_001.png
     :target: ../auto_examples/ensemble/plot_voting_decision_regions.html

From 47553ac31526fd4ac62a1f174c923e99b8abefe5 Mon Sep 17 00:00:00 2001
From: ArturoAmorQ <arturo.amor-quiroz@polytechnique.edu>
Date: Wed, 12 Mar 2025 17:31:50 +0100
Subject: [PATCH 03/15] Add redirect from Plot class probabilities example

---
 doc/conf.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/doc/conf.py b/doc/conf.py
index f749b188b3274..ec5af6463b7fb 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -491,6 +491,9 @@ def add_js_css_files(app, pagename, templatename, context, doctree):
     "auto_examples/ensemble/plot_forest_importances_faces": (
         "auto_examples/ensemble/plot_forest_importances"
     ),
+    "auto_examples/ensemble/plot_voting_probas": (
+        "auto_examples/ensemble/plot_voting_decision_regions"
+    ),
     "auto_examples/datasets/plot_iris_dataset": (
         "auto_examples/decomposition/plot_pca_iris"
     ),

From 28be627d11f72c7a62a8ede8ff6cac127dd0fdb1 Mon Sep 17 00:00:00 2001
From: ArturoAmorQ <arturo.amor-quiroz@polytechnique.edu>
Date: Wed, 12 Mar 2025 17:33:42 +0100
Subject: [PATCH 04/15] Remove Plot class probabilities example

---
 examples/ensemble/plot_voting_probas.py | 97 -------------------------
 1 file changed, 97 deletions(-)
 delete mode 100644 examples/ensemble/plot_voting_probas.py

diff --git a/examples/ensemble/plot_voting_probas.py b/examples/ensemble/plot_voting_probas.py
deleted file mode 100644
index 848358ca1d208..0000000000000
--- a/examples/ensemble/plot_voting_probas.py
+++ /dev/null
@@ -1,97 +0,0 @@
-"""
-===========================================================
-Plot class probabilities calculated by the VotingClassifier
-===========================================================
-
-.. currentmodule:: sklearn
-
-Plot the class probabilities of the first sample in a toy dataset predicted by
-three different classifiers and averaged by the
-:class:`~ensemble.VotingClassifier`.
-
-First, three exemplary classifiers are initialized
-(:class:`~linear_model.LogisticRegression`, :class:`~naive_bayes.GaussianNB`,
-and :class:`~ensemble.RandomForestClassifier`) and used to initialize a
-soft-voting :class:`~ensemble.VotingClassifier` with weights `[1, 1, 5]`, which
-means that the predicted probabilities of the
-:class:`~ensemble.RandomForestClassifier` count 5 times as much as the weights
-of the other classifiers when the averaged probability is calculated.
-
-To visualize the probability weighting, we fit each classifier on the training
-set and plot the predicted class probabilities for the first sample in this
-example dataset.
-
-"""
-
-# Authors: The scikit-learn developers
-# SPDX-License-Identifier: BSD-3-Clause
-
-import matplotlib.pyplot as plt
-import numpy as np
-
-from sklearn.ensemble import RandomForestClassifier, VotingClassifier
-from sklearn.linear_model import LogisticRegression
-from sklearn.naive_bayes import GaussianNB
-
-clf1 = LogisticRegression(max_iter=1000, random_state=123)
-clf2 = RandomForestClassifier(n_estimators=100, random_state=123)
-clf3 = GaussianNB()
-X = np.array([[-1.0, -1.0], [-1.2, -1.4], [-3.4, -2.2], [1.1, 1.2]])
-y = np.array([1, 1, 2, 2])
-
-eclf = VotingClassifier(
-    estimators=[("lr", clf1), ("rf", clf2), ("gnb", clf3)],
-    voting="soft",
-    weights=[1, 1, 5],
-)
-
-# predict class probabilities for all classifiers
-probas = [c.fit(X, y).predict_proba(X) for c in (clf1, clf2, clf3, eclf)]
-
-# get class probabilities for the first sample in the dataset
-class1_1 = [pr[0, 0] for pr in probas]
-class2_1 = [pr[0, 1] for pr in probas]
-
-
-# plotting
-
-N = 4  # number of groups
-ind = np.arange(N)  # group positions
-width = 0.35  # bar width
-
-fig, ax = plt.subplots()
-
-# bars for classifier 1-3
-p1 = ax.bar(ind, np.hstack(([class1_1[:-1], [0]])), width, color="green", edgecolor="k")
-p2 = ax.bar(
-    ind + width,
-    np.hstack(([class2_1[:-1], [0]])),
-    width,
-    color="lightgreen",
-    edgecolor="k",
-)
-
-# bars for VotingClassifier
-p3 = ax.bar(ind, [0, 0, 0, class1_1[-1]], width, color="blue", edgecolor="k")
-p4 = ax.bar(
-    ind + width, [0, 0, 0, class2_1[-1]], width, color="steelblue", edgecolor="k"
-)
-
-# plot annotations
-plt.axvline(2.8, color="k", linestyle="dashed")
-ax.set_xticks(ind + width)
-ax.set_xticklabels(
-    [
-        "LogisticRegression\nweight 1",
-        "GaussianNB\nweight 1",
-        "RandomForestClassifier\nweight 5",
-        "VotingClassifier\n(average probabilities)",
-    ],
-    rotation=40,
-    ha="right",
-)
-plt.ylim([0, 1])
-plt.title("Class probabilities for sample 1 by different classifiers")
-plt.legend([p1[0], p2[0]], ["class 1", "class 2"], loc="upper left")
-plt.tight_layout()
-plt.show()

From c4f46c9b95aa5d6dbefba35ddede7297f2b5cfc6 Mon Sep 17 00:00:00 2001
From: Arturo Amor <86408019+ArturoAmorQ@users.noreply.github.com>
Date: Tue, 18 Mar 2025 10:43:55 +0100
Subject: [PATCH 05/15] Apply suggestions from code review

Co-authored-by: Olivier Grisel <olivier.grisel@ensta.org>
---
 examples/ensemble/plot_voting_decision_regions.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/examples/ensemble/plot_voting_decision_regions.py b/examples/ensemble/plot_voting_decision_regions.py
index faf2529c1bd43..25f49076a9741 100644
--- a/examples/ensemble/plot_voting_decision_regions.py
+++ b/examples/ensemble/plot_voting_decision_regions.py
@@ -148,7 +148,8 @@
 
 test_sample = pd.DataFrame({"Feature #0": [-0.5], "Feature #1": [1.5]})
 predict_probas = [est.predict_proba(test_sample).ravel() for est in eclf.estimators_]
-print(f"Individual predicted probabilities: {predict_probas}")
+for (est_name, _), est_probas in zip(eclf.estimators, predict_probas):
+    print(f"{est_name}'s predicted probabilities: {est_probas}")
 print(
     "Weighted average of soft-predictions: "
     f"{np.dot(weights, predict_probas)/np.sum(weights)}"

From 85dedec5678a5eebc50ad39db4ee5e96196e7575 Mon Sep 17 00:00:00 2001
From: ArturoAmorQ <arturo.amor-quiroz@polytechnique.edu>
Date: Tue, 18 Mar 2025 10:55:49 +0100
Subject: [PATCH 06/15] Address comments from ogrisel

---
 .../ensemble/plot_voting_decision_regions.py  | 30 ++++++++++++++-----
 1 file changed, 23 insertions(+), 7 deletions(-)

diff --git a/examples/ensemble/plot_voting_decision_regions.py b/examples/ensemble/plot_voting_decision_regions.py
index 25f49076a9741..b9dedb80ff5de 100644
--- a/examples/ensemble/plot_voting_decision_regions.py
+++ b/examples/ensemble/plot_voting_decision_regions.py
@@ -31,12 +31,20 @@
 # %%
 # We first generate a noisy XOR dataset, which is a binary classification task.
 
+import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
+from matplotlib.colors import ListedColormap
 
 n_samples = 500
 rng = np.random.default_rng(0)
 feature_names = ["Feature #0", "Feature #1"]
+common_scatter_plot_params = dict(
+    cmap=ListedColormap(["tab:red", "tab:blue"]),
+    edgecolor="white",
+    linewidth=1,
+)
+
 xor = pd.DataFrame(
     np.random.RandomState(0).uniform(low=-1, high=1, size=(n_samples, 2)),
     columns=feature_names,
@@ -49,6 +57,11 @@
 X = xor[feature_names]
 y = target_xor.astype(np.int32)
 
+fig, ax = plt.subplots()
+ax.scatter(X["Feature #0"], X["Feature #1"], c=y, **common_scatter_plot_params)
+ax.set_title("The XOR dataset")
+plt.show()
+
 # %%
 # We define and fit the models on the whole dataset.
 
@@ -80,7 +93,11 @@
 )
 weights = [2, 1, 3]
 eclf = VotingClassifier(
-    estimators=[("constant", clf1), ("periodic", clf2), ("nystroem", clf3)],
+    estimators=[
+        ("constant splines model", clf1),
+        ("periodic splines model", clf2),
+        ("nystroem model", clf3),
+    ],
     voting="soft",
     weights=weights,
 )
@@ -98,9 +115,6 @@
 
 from itertools import product
 
-import matplotlib.pyplot as plt
-from matplotlib.colors import ListedColormap
-
 from sklearn.inspection import DecisionBoundaryDisplay
 
 fig, axarr = plt.subplots(2, 2, sharex="col", sharey="row", figsize=(10, 8))
@@ -127,9 +141,7 @@
         X["Feature #0"],
         X["Feature #1"],
         c=y,
-        cmap=ListedColormap(["tab:red", "tab:blue"]),
-        edgecolor="white",
-        linewidth=1,
+        **common_scatter_plot_params,
     )
     axarr[idx[0], idx[1]].set_title(title)
     fig.colorbar(disp.surface_, ax=axarr[idx[0], idx[1]], label="Probability estimate")
@@ -150,10 +162,14 @@
 predict_probas = [est.predict_proba(test_sample).ravel() for est in eclf.estimators_]
 for (est_name, _), est_probas in zip(eclf.estimators, predict_probas):
     print(f"{est_name}'s predicted probabilities: {est_probas}")
+
+# %%
 print(
     "Weighted average of soft-predictions: "
     f"{np.dot(weights, predict_probas)/np.sum(weights)}"
 )
+
+# %%
 print(
     "Predicted probability of VotingClassifier: "
     f"{eclf.predict_proba(test_sample).ravel()}"

From 82b0c91ba088d275fdda0dc900ea4fbc30088cd4 Mon Sep 17 00:00:00 2001
From: ArturoAmorQ <arturo.amor-quiroz@polytechnique.edu>
Date: Tue, 25 Mar 2025 10:57:32 +0100
Subject: [PATCH 07/15] Add comment on thresholding

---
 examples/ensemble/plot_voting_decision_regions.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/examples/ensemble/plot_voting_decision_regions.py b/examples/ensemble/plot_voting_decision_regions.py
index b9dedb80ff5de..2f569cc0a481c 100644
--- a/examples/ensemble/plot_voting_decision_regions.py
+++ b/examples/ensemble/plot_voting_decision_regions.py
@@ -191,3 +191,17 @@
 # classification. Equivalently:
 
 print(f"Predicted class of VotingClassifier: {eclf.predict(test_sample).ravel()}")
+
+# %%
+# Soft votes can be thresholded as for any other probabilistic classifier.
+
+# %%
+from sklearn.model_selection import FixedThresholdClassifier
+
+eclf_other_threshold = FixedThresholdClassifier(
+    eclf, threshold=0.7, response_method="predict_proba"
+).fit(X, y)
+print(
+    "Predicted class of thresholded VotingClassifier: "
+    f"{eclf_other_threshold.predict(test_sample)}"
+)

From b0455e714014814049174109c2d8f49002514122 Mon Sep 17 00:00:00 2001
From: ArturoAmorQ <arturo.amor-quiroz@polytechnique.edu>
Date: Tue, 25 Mar 2025 12:31:57 +0100
Subject: [PATCH 08/15] Change example's title and match sphinx ref

---
 doc/modules/ensemble.rst                          | 9 +++++----
 examples/ensemble/plot_voting_decision_regions.py | 6 +++---
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/doc/modules/ensemble.rst b/doc/modules/ensemble.rst
index 80aad872fccbf..d1041fccce772 100644
--- a/doc/modules/ensemble.rst
+++ b/doc/modules/ensemble.rst
@@ -1410,10 +1410,11 @@ classifier 3      w3 * 0.3      w3 * 0.4        w3 * 0.3
 weighted average  0.37	        0.4             0.23
 ================  ==========    ==========      ==========
 
-Here, the predicted class label is 2, since it has the highest average probability. See
-this example on :ref:`Visualising class probabilities in a Voting Classifier
-<sphx_glr_auto_examples_ensemble_plot_voting_probas.py>` for a detailed illustration of
-class probabilities averaged by soft voting.
+Here, the predicted class label is 2, since it has the highest average
+probability. See the example on
+:ref:`sphx_glr_auto_examples_ensemble_plot_voting_decision_regions.py` for a
+demonstration of how the predicted class label can be obtained from the weighted
+average of predicted probabilities.
 
 The following figure illustrates how the decision regions may change when
 a soft :class:`VotingClassifier` is trained with weights on three linear
diff --git a/examples/ensemble/plot_voting_decision_regions.py b/examples/ensemble/plot_voting_decision_regions.py
index 2f569cc0a481c..6a23916b0346c 100644
--- a/examples/ensemble/plot_voting_decision_regions.py
+++ b/examples/ensemble/plot_voting_decision_regions.py
@@ -1,7 +1,7 @@
 """
-==================================================
-Plot the decision boundaries of a VotingClassifier
-==================================================
+=====================================================
+Visualizing class probabilities in a VotingClassifier
+=====================================================
 
 .. currentmodule:: sklearn
 

From 822c2e7d9461a58e497c8055ca93dbe9c4f26bc5 Mon Sep 17 00:00:00 2001
From: Arturo Amor <86408019+ArturoAmorQ@users.noreply.github.com>
Date: Thu, 27 Mar 2025 12:19:45 +0100
Subject: [PATCH 09/15] Address Olivier comment

Co-authored-by: Olivier Grisel <olivier.grisel@ensta.org>
---
 doc/modules/ensemble.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/modules/ensemble.rst b/doc/modules/ensemble.rst
index d1041fccce772..0e3172e79b800 100644
--- a/doc/modules/ensemble.rst
+++ b/doc/modules/ensemble.rst
@@ -1411,7 +1411,7 @@ weighted average  0.37	        0.4             0.23
 ================  ==========    ==========      ==========
 
 Here, the predicted class label is 2, since it has the highest average
-probability. See the example on
+predicted probability. See the example on
 :ref:`sphx_glr_auto_examples_ensemble_plot_voting_decision_regions.py` for a
 demonstration of how the predicted class label can be obtained from the weighted
 average of predicted probabilities.

From 1d352a119a2868efc2fe0e68221ea7afb4c910b5 Mon Sep 17 00:00:00 2001
From: Arturo Amor <86408019+ArturoAmorQ@users.noreply.github.com>
Date: Thu, 27 Mar 2025 12:20:13 +0100
Subject: [PATCH 10/15] Address Olivier's comment

Co-authored-by: Olivier Grisel <olivier.grisel@ensta.org>
---
 examples/ensemble/plot_voting_decision_regions.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/examples/ensemble/plot_voting_decision_regions.py b/examples/ensemble/plot_voting_decision_regions.py
index 6a23916b0346c..897ea79aeb04d 100644
--- a/examples/ensemble/plot_voting_decision_regions.py
+++ b/examples/ensemble/plot_voting_decision_regions.py
@@ -1,7 +1,7 @@
 """
-=====================================================
-Visualizing class probabilities in a VotingClassifier
-=====================================================
+===============================================================
+Visualizing the probabilistic predictions of a VotingClassifier
+===============================================================
 
 .. currentmodule:: sklearn
 

From fc73e782f0038a239d1a369bbbf2a2d39dbda994 Mon Sep 17 00:00:00 2001
From: Lucy Liu <jliu176@gmail.com>
Date: Wed, 30 Apr 2025 11:07:52 +1000
Subject: [PATCH 11/15] fix lint

---
 examples/ensemble/plot_voting_decision_regions.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/ensemble/plot_voting_decision_regions.py b/examples/ensemble/plot_voting_decision_regions.py
index 897ea79aeb04d..43faa456724b5 100644
--- a/examples/ensemble/plot_voting_decision_regions.py
+++ b/examples/ensemble/plot_voting_decision_regions.py
@@ -166,7 +166,7 @@
 # %%
 print(
     "Weighted average of soft-predictions: "
-    f"{np.dot(weights, predict_probas)/np.sum(weights)}"
+    f"{np.dot(weights, predict_probas) / np.sum(weights)}"
 )
 
 # %%
@@ -183,7 +183,7 @@
 
 print(
     "Class with the highest weighted average of soft-predictions: "
-    f"{np.argmax(np.dot(weights, predict_probas)/np.sum(weights))}"
+    f"{np.argmax(np.dot(weights, predict_probas) / np.sum(weights))}"
 )
 
 # %%

From 9d010327ac5096af45dbb47ccf5244c47c79463d Mon Sep 17 00:00:00 2001
From: Arturo Amor <86408019+ArturoAmorQ@users.noreply.github.com>
Date: Wed, 30 Apr 2025 10:07:19 +0200
Subject: [PATCH 12/15] Apply suggestions from code review

Co-authored-by: Lucy Liu <jliu176@gmail.com>
---
 examples/ensemble/plot_voting_decision_regions.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/examples/ensemble/plot_voting_decision_regions.py b/examples/ensemble/plot_voting_decision_regions.py
index 43faa456724b5..d70da8f1e697a 100644
--- a/examples/ensemble/plot_voting_decision_regions.py
+++ b/examples/ensemble/plot_voting_decision_regions.py
@@ -13,7 +13,7 @@
 extrapolation. The third classifier is a :class:`~kernel_approximation.Nystroem`
 with the default "rbf" kernel.
 
-In the first part of this example these three classifiers are used to
+In the first part of this example, these three classifiers are used to
 demonstrate soft-voting using :class:`~ensemble.VotingClassifier` with weighted
 average. We set `weights=[2, 1, 3]`, meaning the constant extrapolation spline
 model's predictions are weighted twice as much as the periodic spline model's,
@@ -154,7 +154,7 @@
 # average of the individual classifiers' soft-predictions.
 #
 # In the case of binary classification such as in the present example, the
-# `predict_proba` arrays contain the probability of belonging to class 0 (here
+# term:`predict_proba` arrays contain the probability of belonging to class 0 (here
 # in red) as the first entry, and the probability of belonging to class 1 (here
 # in blue) as the second entry.
 
@@ -188,7 +188,8 @@
 
 # %%
 # Which corresponds to the default threshold at 0.5 in the case of binary
-# classification. Equivalently:
+# classification. This is equivalent to the output of `VotingClassifier`'s `predict`
+# method:
 
 print(f"Predicted class of VotingClassifier: {eclf.predict(test_sample).ravel()}")
 

From 5bbacd631f2205f5be5bdd65b5ce47a445c092c6 Mon Sep 17 00:00:00 2001
From: ArturoAmorQ <arturo.amor-quiroz@polytechnique.edu>
Date: Wed, 30 Apr 2025 11:46:13 +0200
Subject: [PATCH 13/15] Address comments form Lucy

---
 doc/modules/ensemble.rst                      |  4 ++--
 .../ensemble/plot_voting_decision_regions.py  | 22 +++++++++++--------
 2 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/doc/modules/ensemble.rst b/doc/modules/ensemble.rst
index 564179ef58333..b336a25d8048d 100644
--- a/doc/modules/ensemble.rst
+++ b/doc/modules/ensemble.rst
@@ -1418,9 +1418,9 @@ average of predicted probabilities.
 
 The following figure illustrates how the decision regions may change when
 a soft :class:`VotingClassifier` is trained with weights on three linear
-models::
+models:
 
-.. figure:: ../auto_examples/ensemble/images/sphx_glr_plot_voting_decision_regions_001.png
+.. figure:: ../auto_examples/ensemble/images/sphx_glr_plot_voting_decision_regions_002.png
     :target: ../auto_examples/ensemble/plot_voting_decision_regions.html
     :align: center
     :scale: 75%
diff --git a/examples/ensemble/plot_voting_decision_regions.py b/examples/ensemble/plot_voting_decision_regions.py
index d70da8f1e697a..f5ec07214d692 100644
--- a/examples/ensemble/plot_voting_decision_regions.py
+++ b/examples/ensemble/plot_voting_decision_regions.py
@@ -154,9 +154,9 @@
 # average of the individual classifiers' soft-predictions.
 #
 # In the case of binary classification such as in the present example, the
-# term:`predict_proba` arrays contain the probability of belonging to class 0 (here
-# in red) as the first entry, and the probability of belonging to class 1 (here
-# in blue) as the second entry.
+# term:`predict_proba` arrays contain the probability of belonging to class 0
+# (here in red) as the first entry, and the probability of belonging to class 1
+# (here in blue) as the second entry.
 
 test_sample = pd.DataFrame({"Feature #0": [-0.5], "Feature #1": [1.5]})
 predict_probas = [est.predict_proba(test_sample).ravel() for est in eclf.estimators_]
@@ -170,6 +170,9 @@
 )
 
 # %%
+# We can see that manual calculation of predicted probabilities above is
+# equivalent to that produced by the `VotingClassifier`:
+
 print(
     "Predicted probability of VotingClassifier: "
     f"{eclf.predict_proba(test_sample).ravel()}"
@@ -179,7 +182,8 @@
 # To convert soft predictions into hard predictions when weights are provided,
 # the weighted average predicted probabilities are computed for each class.
 # Then, the final class label is then derived from the class label with the
-# highest average probability.
+# highest average probability, which corresponds to the default threshold at
+# `predict_proba=0.5` in the case of binary classification.
 
 print(
     "Class with the highest weighted average of soft-predictions: "
@@ -187,16 +191,16 @@
 )
 
 # %%
-# Which corresponds to the default threshold at 0.5 in the case of binary
-# classification. This is equivalent to the output of `VotingClassifier`'s `predict`
-# method:
+# This is equivalent to the output of `VotingClassifier`'s `predict` method:
 
 print(f"Predicted class of VotingClassifier: {eclf.predict(test_sample).ravel()}")
 
 # %%
-# Soft votes can be thresholded as for any other probabilistic classifier.
+# Soft votes can be thresholded as for any other probabilistic classifier. This
+# allows you to set a threshold probability at which the positive class will be
+# predicted, instead of simply selecting the class with the highest predicted
+# probability.
 
-# %%
 from sklearn.model_selection import FixedThresholdClassifier
 
 eclf_other_threshold = FixedThresholdClassifier(

From eb7c8f581e0bdf38880477ef3db283b7ca711d4a Mon Sep 17 00:00:00 2001
From: ArturoAmorQ <arturo.amor-quiroz@polytechnique.edu>
Date: Wed, 30 Apr 2025 11:58:54 +0200
Subject: [PATCH 14/15] Add comment on linear-separability

---
 examples/ensemble/plot_voting_decision_regions.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/examples/ensemble/plot_voting_decision_regions.py b/examples/ensemble/plot_voting_decision_regions.py
index f5ec07214d692..584097fac73ff 100644
--- a/examples/ensemble/plot_voting_decision_regions.py
+++ b/examples/ensemble/plot_voting_decision_regions.py
@@ -63,6 +63,12 @@
 plt.show()
 
 # %%
+# Due to the inherent non-linear separability of the XOR dataset, tree-based
+# models would often be preferred. However, appropriate feature engineering
+# combined with a linear model can yield effective results, with the added
+# benefit of producing better-calibrated probabilities for samples located in
+# the transition regions affected by noise.
+#
 # We define and fit the models on the whole dataset.
 
 from sklearn.ensemble import VotingClassifier

From ec42ff12af05548474f81435e22da21c5c853154 Mon Sep 17 00:00:00 2001
From: Arturo Amor <86408019+ArturoAmorQ@users.noreply.github.com>
Date: Thu, 1 May 2025 10:17:10 +0200
Subject: [PATCH 15/15] Apply suggestions from code review

Co-authored-by: Lucy Liu <jliu176@gmail.com>
---
 examples/ensemble/plot_voting_decision_regions.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/ensemble/plot_voting_decision_regions.py b/examples/ensemble/plot_voting_decision_regions.py
index 584097fac73ff..57f3f4b22b947 100644
--- a/examples/ensemble/plot_voting_decision_regions.py
+++ b/examples/ensemble/plot_voting_decision_regions.py
@@ -117,7 +117,7 @@
 # Finally we use :class:`~inspection.DecisionBoundaryDisplay` to plot the
 # predicted probabilities. By using a diverging colormap (such as `"RdBu"`), we
 # can ensure that darker colors correspond to `predict_proba` close to either 0
-# or 1.
+# or 1, and white corresponds to `predict_proba` of 0.5.
 
 from itertools import product
 
@@ -160,7 +160,7 @@
 # average of the individual classifiers' soft-predictions.
 #
 # In the case of binary classification such as in the present example, the
-# term:`predict_proba` arrays contain the probability of belonging to class 0
+# :term:`predict_proba` arrays contain the probability of belonging to class 0
 # (here in red) as the first entry, and the probability of belonging to class 1
 # (here in blue) as the second entry.