diff --git a/doc/conf.py b/doc/conf.py
index aea5d52b53da4..1113d4b2c100a 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -491,6 +491,9 @@ def add_js_css_files(app, pagename, templatename, context, doctree):
     "auto_examples/ensemble/plot_forest_importances_faces": (
         "auto_examples/ensemble/plot_forest_importances"
     ),
+    "auto_examples/ensemble/plot_voting_probas": (
+        "auto_examples/ensemble/plot_voting_decision_regions"
+    ),
     "auto_examples/datasets/plot_iris_dataset": (
         "auto_examples/decomposition/plot_pca_iris"
     ),
diff --git a/doc/modules/ensemble.rst b/doc/modules/ensemble.rst
index 35ef9f6d7bbfc..b336a25d8048d 100644
--- a/doc/modules/ensemble.rst
+++ b/doc/modules/ensemble.rst
@@ -1410,40 +1410,17 @@ classifier 3      w3 * 0.3      w3 * 0.4        w3 * 0.3
 weighted average  0.37          0.4             0.23
 ================  ==========    ==========      ==========
 
-Here, the predicted class label is 2, since it has the highest average probability. See
-this example on :ref:`Visualising class probabilities in a Voting Classifier
-<sphx_glr_auto_examples_ensemble_plot_voting_probas.py>` for a detailed illustration of
-class probabilities averaged by soft voting.
+Here, the predicted class label is 2, since it has the highest average
+predicted probability. See the example on
+:ref:`sphx_glr_auto_examples_ensemble_plot_voting_decision_regions.py` for a
+demonstration of how the predicted class label can be obtained from the weighted
+average of predicted probabilities.
 
-Also, the following example illustrates how the decision regions may change
-when a soft :class:`VotingClassifier` is used based on a linear Support
-Vector Machine, a Decision Tree, and a K-nearest neighbor classifier::
+The following figure illustrates how the decision regions may change when
+a soft :class:`VotingClassifier` is trained with weights on three linear
+models:
 
-   >>> from sklearn import datasets
-   >>> from sklearn.tree import DecisionTreeClassifier
-   >>> from sklearn.neighbors import KNeighborsClassifier
-   >>> from sklearn.svm import SVC
-   >>> from itertools import product
-   >>> from sklearn.ensemble import VotingClassifier
-
-   >>> # Loading some example data
-   >>> iris = datasets.load_iris()
-   >>> X = iris.data[:, [0, 2]]
-   >>> y = iris.target
-
-   >>> # Training classifiers
-   >>> clf1 = DecisionTreeClassifier(max_depth=4)
-   >>> clf2 = KNeighborsClassifier(n_neighbors=7)
-   >>> clf3 = SVC(kernel='rbf', probability=True)
-   >>> eclf = VotingClassifier(estimators=[('dt', clf1), ('knn', clf2), ('svc', clf3)],
-   ...                         voting='soft', weights=[2, 1, 2])
-
-   >>> clf1 = clf1.fit(X, y)
-   >>> clf2 = clf2.fit(X, y)
-   >>> clf3 = clf3.fit(X, y)
-   >>> eclf = eclf.fit(X, y)
-
-.. figure:: ../auto_examples/ensemble/images/sphx_glr_plot_voting_decision_regions_001.png
+.. figure:: ../auto_examples/ensemble/images/sphx_glr_plot_voting_decision_regions_002.png
     :target: ../auto_examples/ensemble/plot_voting_decision_regions.html
     :align: center
     :scale: 75%
diff --git a/examples/ensemble/plot_voting_decision_regions.py b/examples/ensemble/plot_voting_decision_regions.py
index d40d831fb911f..57f3f4b22b947 100644
--- a/examples/ensemble/plot_voting_decision_regions.py
+++ b/examples/ensemble/plot_voting_decision_regions.py
@@ -1,55 +1,111 @@
 """
-==================================================
-Plot the decision boundaries of a VotingClassifier
-==================================================
+===============================================================
+Visualizing the probabilistic predictions of a VotingClassifier
+===============================================================
 
 .. currentmodule:: sklearn
 
-Plot the decision boundaries of a :class:`~ensemble.VotingClassifier` for two
-features of the Iris dataset.
+Plot the predicted class probabilities in a toy dataset predicted by three
+different classifiers and averaged by the :class:`~ensemble.VotingClassifier`.
 
-Plot the class probabilities of the first sample in a toy dataset predicted by
-three different classifiers and averaged by the
-:class:`~ensemble.VotingClassifier`.
+First, three linear classifiers are initialized. Two are spline models with
+interaction terms, one using constant extrapolation and the other using periodic
+extrapolation. The third classifier is a :class:`~kernel_approximation.Nystroem`
+with the default "rbf" kernel.
 
-First, three exemplary classifiers are initialized
-(:class:`~tree.DecisionTreeClassifier`,
-:class:`~neighbors.KNeighborsClassifier`, and :class:`~svm.SVC`) and used to
-initialize a soft-voting :class:`~ensemble.VotingClassifier` with weights `[2,
-1, 2]`, which means that the predicted probabilities of the
-:class:`~tree.DecisionTreeClassifier` and :class:`~svm.SVC` each count 2 times
-as much as the weights of the :class:`~neighbors.KNeighborsClassifier`
-classifier when the averaged probability is calculated.
+In the first part of this example, these three classifiers are used to
+demonstrate soft-voting using :class:`~ensemble.VotingClassifier` with weighted
+average. We set `weights=[2, 1, 3]`, meaning the constant extrapolation spline
+model's predictions are weighted twice as much as the periodic spline model's,
+and the Nystroem model's predictions are weighted three times as much as the
+periodic spline.
+
+The second part demonstrates how soft predictions can be converted into hard
+predictions.
 
 """
 
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
 
-from itertools import product
+# %%
+# We first generate a noisy XOR dataset, which is a binary classification task.
 
 import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+from matplotlib.colors import ListedColormap
+
+n_samples = 500
+rng = np.random.default_rng(0)
+feature_names = ["Feature #0", "Feature #1"]
+common_scatter_plot_params = dict(
+    cmap=ListedColormap(["tab:red", "tab:blue"]),
+    edgecolor="white",
+    linewidth=1,
+)
+
+xor = pd.DataFrame(
+    np.random.RandomState(0).uniform(low=-1, high=1, size=(n_samples, 2)),
+    columns=feature_names,
+)
+noise = rng.normal(loc=0, scale=0.1, size=(n_samples, 2))
+target_xor = np.logical_xor(
+    xor["Feature #0"] + noise[:, 0] > 0, xor["Feature #1"] + noise[:, 1] > 0
+)
+
+X = xor[feature_names]
+y = target_xor.astype(np.int32)
+
+fig, ax = plt.subplots()
+ax.scatter(X["Feature #0"], X["Feature #1"], c=y, **common_scatter_plot_params)
+ax.set_title("The XOR dataset")
+plt.show()
+
+# %%
+# Due to the inherent non-linear separability of the XOR dataset, tree-based
+# models would often be preferred. However, appropriate feature engineering
+# combined with a linear model can yield effective results, with the added
+# benefit of producing better-calibrated probabilities for samples located in
+# the transition regions affected by noise.
+#
+# We define and fit the models on the whole dataset.
 
-from sklearn import datasets
 from sklearn.ensemble import VotingClassifier
-from sklearn.inspection import DecisionBoundaryDisplay
-from sklearn.neighbors import KNeighborsClassifier
-from sklearn.svm import SVC
-from sklearn.tree import DecisionTreeClassifier
-
-# Loading some example data
-iris = datasets.load_iris()
-X = iris.data[:, [0, 2]]
-y = iris.target
-
-# Training classifiers
-clf1 = DecisionTreeClassifier(max_depth=4)
-clf2 = KNeighborsClassifier(n_neighbors=7)
-clf3 = SVC(gamma=0.1, kernel="rbf", probability=True)
+from sklearn.kernel_approximation import Nystroem
+from sklearn.linear_model import LogisticRegression
+from sklearn.pipeline import make_pipeline
+from sklearn.preprocessing import PolynomialFeatures, SplineTransformer, StandardScaler
+
+clf1 = make_pipeline(
+    SplineTransformer(degree=2, n_knots=2),
+    PolynomialFeatures(interaction_only=True),
+    LogisticRegression(C=10),
+)
+clf2 = make_pipeline(
+    SplineTransformer(
+        degree=2,
+        n_knots=4,
+        extrapolation="periodic",
+        include_bias=True,
+    ),
+    PolynomialFeatures(interaction_only=True),
+    LogisticRegression(C=10),
+)
+clf3 = make_pipeline(
+    StandardScaler(),
+    Nystroem(gamma=2, random_state=0),
+    LogisticRegression(C=10),
+)
+weights = [2, 1, 3]
 eclf = VotingClassifier(
-    estimators=[("dt", clf1), ("knn", clf2), ("svc", clf3)],
+    estimators=[
+        ("constant splines model", clf1),
+        ("periodic splines model", clf2),
+        ("nystroem model", clf3),
+    ],
     voting="soft",
-    weights=[2, 1, 2],
+    weights=weights,
 )
 
 clf1.fit(X, y)
@@ -57,17 +113,106 @@
 clf3.fit(X, y)
 eclf.fit(X, y)
 
-# Plotting decision regions
-f, axarr = plt.subplots(2, 2, sharex="col", sharey="row", figsize=(10, 8))
-for idx, clf, tt in zip(
+# %%
+# Finally we use :class:`~inspection.DecisionBoundaryDisplay` to plot the
+# predicted probabilities. By using a diverging colormap (such as `"RdBu"`), we
+# can ensure that darker colors correspond to `predict_proba` close to either 0
+# or 1, and white corresponds to `predict_proba` of 0.5.
+
+from itertools import product
+
+from sklearn.inspection import DecisionBoundaryDisplay
+
+fig, axarr = plt.subplots(2, 2, sharex="col", sharey="row", figsize=(10, 8))
+for idx, clf, title in zip(
     product([0, 1], [0, 1]),
     [clf1, clf2, clf3, eclf],
-    ["Decision Tree (depth=4)", "KNN (k=7)", "Kernel SVM", "Soft Voting"],
+    [
+        "Splines with\nconstant extrapolation",
+        "Splines with\nperiodic extrapolation",
+        "RBF Nystroem",
+        "Soft Voting",
+    ],
 ):
-    DecisionBoundaryDisplay.from_estimator(
-        clf, X, alpha=0.4, ax=axarr[idx[0], idx[1]], response_method="predict"
+    disp = DecisionBoundaryDisplay.from_estimator(
+        clf,
+        X,
+        response_method="predict_proba",
+        plot_method="pcolormesh",
+        cmap="RdBu",
+        alpha=0.8,
+        ax=axarr[idx[0], idx[1]],
+    )
+    axarr[idx[0], idx[1]].scatter(
+        X["Feature #0"],
+        X["Feature #1"],
+        c=y,
+        **common_scatter_plot_params,
     )
-    axarr[idx[0], idx[1]].scatter(X[:, 0], X[:, 1], c=y, s=20, edgecolor="k")
-    axarr[idx[0], idx[1]].set_title(tt)
+    axarr[idx[0], idx[1]].set_title(title)
+    fig.colorbar(disp.surface_, ax=axarr[idx[0], idx[1]], label="Probability estimate")
 
 plt.show()
+
+# %%
+# As a sanity check, we can verify for a given sample that the probability
+# predicted by the :class:`~ensemble.VotingClassifier` is indeed the weighted
+# average of the individual classifiers' soft-predictions.
+#
+# In the case of binary classification such as in the present example, the
+# :term:`predict_proba` arrays contain the probability of belonging to class 0
+# (here in red) as the first entry, and the probability of belonging to class 1
+# (here in blue) as the second entry.
+
+test_sample = pd.DataFrame({"Feature #0": [-0.5], "Feature #1": [1.5]})
+predict_probas = [est.predict_proba(test_sample).ravel() for est in eclf.estimators_]
+for (est_name, _), est_probas in zip(eclf.estimators, predict_probas):
+    print(f"{est_name}'s predicted probabilities: {est_probas}")
+
+# %%
+print(
+    "Weighted average of soft-predictions: "
+    f"{np.dot(weights, predict_probas) / np.sum(weights)}"
+)
+
+# %%
+# We can see that manual calculation of predicted probabilities above is
+# equivalent to that produced by the `VotingClassifier`:
+
+print(
+    "Predicted probability of VotingClassifier: "
+    f"{eclf.predict_proba(test_sample).ravel()}"
+)
+
+# %%
+# To convert soft predictions into hard predictions when weights are provided,
+# the weighted average predicted probabilities are computed for each class.
+# Then, the final class label is then derived from the class label with the
+# highest average probability, which corresponds to the default threshold at
+# `predict_proba=0.5` in the case of binary classification.
+
+print(
+    "Class with the highest weighted average of soft-predictions: "
+    f"{np.argmax(np.dot(weights, predict_probas) / np.sum(weights))}"
+)
+
+# %%
+# This is equivalent to the output of `VotingClassifier`'s `predict` method:
+
+print(f"Predicted class of VotingClassifier: {eclf.predict(test_sample).ravel()}")
+
+# %%
+# Soft votes can be thresholded as for any other probabilistic classifier. This
+# allows you to set a threshold probability at which the positive class will be
+# predicted, instead of simply selecting the class with the highest predicted
+# probability.
+
+from sklearn.model_selection import FixedThresholdClassifier
+
+eclf_other_threshold = FixedThresholdClassifier(
+    eclf, threshold=0.7, response_method="predict_proba"
+).fit(X, y)
+print(
+    "Predicted class of thresholded VotingClassifier: "
+    f"{eclf_other_threshold.predict(test_sample)}"
+)
diff --git a/examples/ensemble/plot_voting_probas.py b/examples/ensemble/plot_voting_probas.py
deleted file mode 100644
index 848358ca1d208..0000000000000
--- a/examples/ensemble/plot_voting_probas.py
+++ /dev/null
@@ -1,97 +0,0 @@
-"""
-===========================================================
-Plot class probabilities calculated by the VotingClassifier
-===========================================================
-
-.. currentmodule:: sklearn
-
-Plot the class probabilities of the first sample in a toy dataset predicted by
-three different classifiers and averaged by the
-:class:`~ensemble.VotingClassifier`.
-
-First, three exemplary classifiers are initialized
-(:class:`~linear_model.LogisticRegression`, :class:`~naive_bayes.GaussianNB`,
-and :class:`~ensemble.RandomForestClassifier`) and used to initialize a
-soft-voting :class:`~ensemble.VotingClassifier` with weights `[1, 1, 5]`, which
-means that the predicted probabilities of the
-:class:`~ensemble.RandomForestClassifier` count 5 times as much as the weights
-of the other classifiers when the averaged probability is calculated.
-
-To visualize the probability weighting, we fit each classifier on the training
-set and plot the predicted class probabilities for the first sample in this
-example dataset.
-
-"""
-
-# Authors: The scikit-learn developers
-# SPDX-License-Identifier: BSD-3-Clause
-
-import matplotlib.pyplot as plt
-import numpy as np
-
-from sklearn.ensemble import RandomForestClassifier, VotingClassifier
-from sklearn.linear_model import LogisticRegression
-from sklearn.naive_bayes import GaussianNB
-
-clf1 = LogisticRegression(max_iter=1000, random_state=123)
-clf2 = RandomForestClassifier(n_estimators=100, random_state=123)
-clf3 = GaussianNB()
-X = np.array([[-1.0, -1.0], [-1.2, -1.4], [-3.4, -2.2], [1.1, 1.2]])
-y = np.array([1, 1, 2, 2])
-
-eclf = VotingClassifier(
-    estimators=[("lr", clf1), ("rf", clf2), ("gnb", clf3)],
-    voting="soft",
-    weights=[1, 1, 5],
-)
-
-# predict class probabilities for all classifiers
-probas = [c.fit(X, y).predict_proba(X) for c in (clf1, clf2, clf3, eclf)]
-
-# get class probabilities for the first sample in the dataset
-class1_1 = [pr[0, 0] for pr in probas]
-class2_1 = [pr[0, 1] for pr in probas]
-
-
-# plotting
-
-N = 4  # number of groups
-ind = np.arange(N)  # group positions
-width = 0.35  # bar width
-
-fig, ax = plt.subplots()
-
-# bars for classifier 1-3
-p1 = ax.bar(ind, np.hstack(([class1_1[:-1], [0]])), width, color="green", edgecolor="k")
-p2 = ax.bar(
-    ind + width,
-    np.hstack(([class2_1[:-1], [0]])),
-    width,
-    color="lightgreen",
-    edgecolor="k",
-)
-
-# bars for VotingClassifier
-p3 = ax.bar(ind, [0, 0, 0, class1_1[-1]], width, color="blue", edgecolor="k")
-p4 = ax.bar(
-    ind + width, [0, 0, 0, class2_1[-1]], width, color="steelblue", edgecolor="k"
-)
-
-# plot annotations
-plt.axvline(2.8, color="k", linestyle="dashed")
-ax.set_xticks(ind + width)
-ax.set_xticklabels(
-    [
-        "LogisticRegression\nweight 1",
-        "GaussianNB\nweight 1",
-        "RandomForestClassifier\nweight 5",
-        "VotingClassifier\n(average probabilities)",
-    ],
-    rotation=40,
-    ha="right",
-)
-plt.ylim([0, 1])
-plt.title("Class probabilities for sample 1 by different classifiers")
-plt.legend([p1[0], p2[0]], ["class 1", "class 2"], loc="upper left")
-plt.tight_layout()
-plt.show()