From 41f9e9fb62783f66819bba897f3dfd5e26a0b0c3 Mon Sep 17 00:00:00 2001 From: ArturoAmorQ Date: Wed, 12 Mar 2025 17:18:29 +0100 Subject: [PATCH 01/15] DOC Rework VotingClassifier decision boundaries example --- .../ensemble/plot_voting_decision_regions.py | 187 ++++++++++++++---- 1 file changed, 145 insertions(+), 42 deletions(-) diff --git a/examples/ensemble/plot_voting_decision_regions.py b/examples/ensemble/plot_voting_decision_regions.py index d40d831fb911f..faf2529c1bd43 100644 --- a/examples/ensemble/plot_voting_decision_regions.py +++ b/examples/ensemble/plot_voting_decision_regions.py @@ -5,51 +5,84 @@ .. currentmodule:: sklearn -Plot the decision boundaries of a :class:`~ensemble.VotingClassifier` for two -features of the Iris dataset. - -Plot the class probabilities of the first sample in a toy dataset predicted by -three different classifiers and averaged by the -:class:`~ensemble.VotingClassifier`. - -First, three exemplary classifiers are initialized -(:class:`~tree.DecisionTreeClassifier`, -:class:`~neighbors.KNeighborsClassifier`, and :class:`~svm.SVC`) and used to -initialize a soft-voting :class:`~ensemble.VotingClassifier` with weights `[2, -1, 2]`, which means that the predicted probabilities of the -:class:`~tree.DecisionTreeClassifier` and :class:`~svm.SVC` each count 2 times -as much as the weights of the :class:`~neighbors.KNeighborsClassifier` -classifier when the averaged probability is calculated. +Plot the predicted class probabilities in a toy dataset predicted by three +different classifiers and averaged by the :class:`~ensemble.VotingClassifier`. + +First, three linear classifiers are initialized. Two are spline models with +interaction terms, one using constant extrapolation and the other using periodic +extrapolation. The third classifier is a :class:`~kernel_approximation.Nystroem` +with the default "rbf" kernel. + +In the first part of this example these three classifiers are used to +demonstrate soft-voting using :class:`~ensemble.VotingClassifier` with weighted +average. We set `weights=[2, 1, 3]`, meaning the constant extrapolation spline +model's predictions are weighted twice as much as the periodic spline model's, +and the Nystroem model's predictions are weighted three times as much as the +periodic spline. + +The second part demonstrates how soft predictions can be converted into hard +predictions. """ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause -from itertools import product +# %% +# We first generate a noisy XOR dataset, which is a binary classification task. -import matplotlib.pyplot as plt +import numpy as np +import pandas as pd + +n_samples = 500 +rng = np.random.default_rng(0) +feature_names = ["Feature #0", "Feature #1"] +xor = pd.DataFrame( + np.random.RandomState(0).uniform(low=-1, high=1, size=(n_samples, 2)), + columns=feature_names, +) +noise = rng.normal(loc=0, scale=0.1, size=(n_samples, 2)) +target_xor = np.logical_xor( + xor["Feature #0"] + noise[:, 0] > 0, xor["Feature #1"] + noise[:, 1] > 0 +) + +X = xor[feature_names] +y = target_xor.astype(np.int32) + +# %% +# We define and fit the models on the whole dataset. -from sklearn import datasets from sklearn.ensemble import VotingClassifier -from sklearn.inspection import DecisionBoundaryDisplay -from sklearn.neighbors import KNeighborsClassifier -from sklearn.svm import SVC -from sklearn.tree import DecisionTreeClassifier - -# Loading some example data -iris = datasets.load_iris() -X = iris.data[:, [0, 2]] -y = iris.target - -# Training classifiers -clf1 = DecisionTreeClassifier(max_depth=4) -clf2 = KNeighborsClassifier(n_neighbors=7) -clf3 = SVC(gamma=0.1, kernel="rbf", probability=True) +from sklearn.kernel_approximation import Nystroem +from sklearn.linear_model import LogisticRegression +from sklearn.pipeline import make_pipeline +from sklearn.preprocessing import PolynomialFeatures, SplineTransformer, StandardScaler + +clf1 = make_pipeline( + SplineTransformer(degree=2, n_knots=2), + PolynomialFeatures(interaction_only=True), + LogisticRegression(C=10), +) +clf2 = make_pipeline( + SplineTransformer( + degree=2, + n_knots=4, + extrapolation="periodic", + include_bias=True, + ), + PolynomialFeatures(interaction_only=True), + LogisticRegression(C=10), +) +clf3 = make_pipeline( + StandardScaler(), + Nystroem(gamma=2, random_state=0), + LogisticRegression(C=10), +) +weights = [2, 1, 3] eclf = VotingClassifier( - estimators=[("dt", clf1), ("knn", clf2), ("svc", clf3)], + estimators=[("constant", clf1), ("periodic", clf2), ("nystroem", clf3)], voting="soft", - weights=[2, 1, 2], + weights=weights, ) clf1.fit(X, y) @@ -57,17 +90,87 @@ clf3.fit(X, y) eclf.fit(X, y) -# Plotting decision regions -f, axarr = plt.subplots(2, 2, sharex="col", sharey="row", figsize=(10, 8)) -for idx, clf, tt in zip( +# %% +# Finally we use :class:`~inspection.DecisionBoundaryDisplay` to plot the +# predicted probabilities. By using a diverging colormap (such as `"RdBu"`), we +# can ensure that darker colors correspond to `predict_proba` close to either 0 +# or 1. + +from itertools import product + +import matplotlib.pyplot as plt +from matplotlib.colors import ListedColormap + +from sklearn.inspection import DecisionBoundaryDisplay + +fig, axarr = plt.subplots(2, 2, sharex="col", sharey="row", figsize=(10, 8)) +for idx, clf, title in zip( product([0, 1], [0, 1]), [clf1, clf2, clf3, eclf], - ["Decision Tree (depth=4)", "KNN (k=7)", "Kernel SVM", "Soft Voting"], + [ + "Splines with\nconstant extrapolation", + "Splines with\nperiodic extrapolation", + "RBF Nystroem", + "Soft Voting", + ], ): - DecisionBoundaryDisplay.from_estimator( - clf, X, alpha=0.4, ax=axarr[idx[0], idx[1]], response_method="predict" + disp = DecisionBoundaryDisplay.from_estimator( + clf, + X, + response_method="predict_proba", + plot_method="pcolormesh", + cmap="RdBu", + alpha=0.8, + ax=axarr[idx[0], idx[1]], ) - axarr[idx[0], idx[1]].scatter(X[:, 0], X[:, 1], c=y, s=20, edgecolor="k") - axarr[idx[0], idx[1]].set_title(tt) + axarr[idx[0], idx[1]].scatter( + X["Feature #0"], + X["Feature #1"], + c=y, + cmap=ListedColormap(["tab:red", "tab:blue"]), + edgecolor="white", + linewidth=1, + ) + axarr[idx[0], idx[1]].set_title(title) + fig.colorbar(disp.surface_, ax=axarr[idx[0], idx[1]], label="Probability estimate") plt.show() + +# %% +# As a sanity check, we can verify for a given sample that the probability +# predicted by the :class:`~ensemble.VotingClassifier` is indeed the weighted +# average of the individual classifiers' soft-predictions. +# +# In the case of binary classification such as in the present example, the +# `predict_proba` arrays contain the probability of belonging to class 0 (here +# in red) as the first entry, and the probability of belonging to class 1 (here +# in blue) as the second entry. + +test_sample = pd.DataFrame({"Feature #0": [-0.5], "Feature #1": [1.5]}) +predict_probas = [est.predict_proba(test_sample).ravel() for est in eclf.estimators_] +print(f"Individual predicted probabilities: {predict_probas}") +print( + "Weighted average of soft-predictions: " + f"{np.dot(weights, predict_probas)/np.sum(weights)}" +) +print( + "Predicted probability of VotingClassifier: " + f"{eclf.predict_proba(test_sample).ravel()}" +) + +# %% +# To convert soft predictions into hard predictions when weights are provided, +# the weighted average predicted probabilities are computed for each class. +# Then, the final class label is then derived from the class label with the +# highest average probability. + +print( + "Class with the highest weighted average of soft-predictions: " + f"{np.argmax(np.dot(weights, predict_probas)/np.sum(weights))}" +) + +# %% +# Which corresponds to the default threshold at 0.5 in the case of binary +# classification. Equivalently: + +print(f"Predicted class of VotingClassifier: {eclf.predict(test_sample).ravel()}") From 22b8854a9bcdd02b2b448f3a2bc173d6a9889121 Mon Sep 17 00:00:00 2001 From: ArturoAmorQ Date: Wed, 12 Mar 2025 17:23:35 +0100 Subject: [PATCH 02/15] Update User Guide accordingly --- doc/modules/ensemble.rst | 30 +++--------------------------- 1 file changed, 3 insertions(+), 27 deletions(-) diff --git a/doc/modules/ensemble.rst b/doc/modules/ensemble.rst index 71f91621c54af..0e08b3faf8050 100644 --- a/doc/modules/ensemble.rst +++ b/doc/modules/ensemble.rst @@ -1413,33 +1413,9 @@ weighted average 0.37 0.4 0.23 Here, the predicted class label is 2, since it has the highest average probability. -The following example illustrates how the decision regions may change -when a soft :class:`VotingClassifier` is used based on a linear Support -Vector Machine, a Decision Tree, and a K-nearest neighbor classifier:: - - >>> from sklearn import datasets - >>> from sklearn.tree import DecisionTreeClassifier - >>> from sklearn.neighbors import KNeighborsClassifier - >>> from sklearn.svm import SVC - >>> from itertools import product - >>> from sklearn.ensemble import VotingClassifier - - >>> # Loading some example data - >>> iris = datasets.load_iris() - >>> X = iris.data[:, [0, 2]] - >>> y = iris.target - - >>> # Training classifiers - >>> clf1 = DecisionTreeClassifier(max_depth=4) - >>> clf2 = KNeighborsClassifier(n_neighbors=7) - >>> clf3 = SVC(kernel='rbf', probability=True) - >>> eclf = VotingClassifier(estimators=[('dt', clf1), ('knn', clf2), ('svc', clf3)], - ... voting='soft', weights=[2, 1, 2]) - - >>> clf1 = clf1.fit(X, y) - >>> clf2 = clf2.fit(X, y) - >>> clf3 = clf3.fit(X, y) - >>> eclf = eclf.fit(X, y) +The following figure illustrates how the decision regions may change when +a soft :class:`VotingClassifier` is trained with weights on three linear +models:: .. figure:: ../auto_examples/ensemble/images/sphx_glr_plot_voting_decision_regions_001.png :target: ../auto_examples/ensemble/plot_voting_decision_regions.html From 47553ac31526fd4ac62a1f174c923e99b8abefe5 Mon Sep 17 00:00:00 2001 From: ArturoAmorQ Date: Wed, 12 Mar 2025 17:31:50 +0100 Subject: [PATCH 03/15] Add redirect from Plot class probabilities example --- doc/conf.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/conf.py b/doc/conf.py index f749b188b3274..ec5af6463b7fb 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -491,6 +491,9 @@ def add_js_css_files(app, pagename, templatename, context, doctree): "auto_examples/ensemble/plot_forest_importances_faces": ( "auto_examples/ensemble/plot_forest_importances" ), + "auto_examples/ensemble/plot_voting_probas": ( + "auto_examples/ensemble/plot_voting_decision_regions" + ), "auto_examples/datasets/plot_iris_dataset": ( "auto_examples/decomposition/plot_pca_iris" ), From 28be627d11f72c7a62a8ede8ff6cac127dd0fdb1 Mon Sep 17 00:00:00 2001 From: ArturoAmorQ Date: Wed, 12 Mar 2025 17:33:42 +0100 Subject: [PATCH 04/15] Remove Plot class probabilities example --- examples/ensemble/plot_voting_probas.py | 97 ------------------------- 1 file changed, 97 deletions(-) delete mode 100644 examples/ensemble/plot_voting_probas.py diff --git a/examples/ensemble/plot_voting_probas.py b/examples/ensemble/plot_voting_probas.py deleted file mode 100644 index 848358ca1d208..0000000000000 --- a/examples/ensemble/plot_voting_probas.py +++ /dev/null @@ -1,97 +0,0 @@ -""" -=========================================================== -Plot class probabilities calculated by the VotingClassifier -=========================================================== - -.. currentmodule:: sklearn - -Plot the class probabilities of the first sample in a toy dataset predicted by -three different classifiers and averaged by the -:class:`~ensemble.VotingClassifier`. - -First, three exemplary classifiers are initialized -(:class:`~linear_model.LogisticRegression`, :class:`~naive_bayes.GaussianNB`, -and :class:`~ensemble.RandomForestClassifier`) and used to initialize a -soft-voting :class:`~ensemble.VotingClassifier` with weights `[1, 1, 5]`, which -means that the predicted probabilities of the -:class:`~ensemble.RandomForestClassifier` count 5 times as much as the weights -of the other classifiers when the averaged probability is calculated. - -To visualize the probability weighting, we fit each classifier on the training -set and plot the predicted class probabilities for the first sample in this -example dataset. - -""" - -# Authors: The scikit-learn developers -# SPDX-License-Identifier: BSD-3-Clause - -import matplotlib.pyplot as plt -import numpy as np - -from sklearn.ensemble import RandomForestClassifier, VotingClassifier -from sklearn.linear_model import LogisticRegression -from sklearn.naive_bayes import GaussianNB - -clf1 = LogisticRegression(max_iter=1000, random_state=123) -clf2 = RandomForestClassifier(n_estimators=100, random_state=123) -clf3 = GaussianNB() -X = np.array([[-1.0, -1.0], [-1.2, -1.4], [-3.4, -2.2], [1.1, 1.2]]) -y = np.array([1, 1, 2, 2]) - -eclf = VotingClassifier( - estimators=[("lr", clf1), ("rf", clf2), ("gnb", clf3)], - voting="soft", - weights=[1, 1, 5], -) - -# predict class probabilities for all classifiers -probas = [c.fit(X, y).predict_proba(X) for c in (clf1, clf2, clf3, eclf)] - -# get class probabilities for the first sample in the dataset -class1_1 = [pr[0, 0] for pr in probas] -class2_1 = [pr[0, 1] for pr in probas] - - -# plotting - -N = 4 # number of groups -ind = np.arange(N) # group positions -width = 0.35 # bar width - -fig, ax = plt.subplots() - -# bars for classifier 1-3 -p1 = ax.bar(ind, np.hstack(([class1_1[:-1], [0]])), width, color="green", edgecolor="k") -p2 = ax.bar( - ind + width, - np.hstack(([class2_1[:-1], [0]])), - width, - color="lightgreen", - edgecolor="k", -) - -# bars for VotingClassifier -p3 = ax.bar(ind, [0, 0, 0, class1_1[-1]], width, color="blue", edgecolor="k") -p4 = ax.bar( - ind + width, [0, 0, 0, class2_1[-1]], width, color="steelblue", edgecolor="k" -) - -# plot annotations -plt.axvline(2.8, color="k", linestyle="dashed") -ax.set_xticks(ind + width) -ax.set_xticklabels( - [ - "LogisticRegression\nweight 1", - "GaussianNB\nweight 1", - "RandomForestClassifier\nweight 5", - "VotingClassifier\n(average probabilities)", - ], - rotation=40, - ha="right", -) -plt.ylim([0, 1]) -plt.title("Class probabilities for sample 1 by different classifiers") -plt.legend([p1[0], p2[0]], ["class 1", "class 2"], loc="upper left") -plt.tight_layout() -plt.show() From c4f46c9b95aa5d6dbefba35ddede7297f2b5cfc6 Mon Sep 17 00:00:00 2001 From: Arturo Amor <86408019+ArturoAmorQ@users.noreply.github.com> Date: Tue, 18 Mar 2025 10:43:55 +0100 Subject: [PATCH 05/15] Apply suggestions from code review Co-authored-by: Olivier Grisel --- examples/ensemble/plot_voting_decision_regions.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/examples/ensemble/plot_voting_decision_regions.py b/examples/ensemble/plot_voting_decision_regions.py index faf2529c1bd43..25f49076a9741 100644 --- a/examples/ensemble/plot_voting_decision_regions.py +++ b/examples/ensemble/plot_voting_decision_regions.py @@ -148,7 +148,8 @@ test_sample = pd.DataFrame({"Feature #0": [-0.5], "Feature #1": [1.5]}) predict_probas = [est.predict_proba(test_sample).ravel() for est in eclf.estimators_] -print(f"Individual predicted probabilities: {predict_probas}") +for (est_name, _), est_probas in zip(eclf.estimators, predict_probas): + print(f"{est_name}'s predicted probabilities: {est_probas}") print( "Weighted average of soft-predictions: " f"{np.dot(weights, predict_probas)/np.sum(weights)}" From 85dedec5678a5eebc50ad39db4ee5e96196e7575 Mon Sep 17 00:00:00 2001 From: ArturoAmorQ Date: Tue, 18 Mar 2025 10:55:49 +0100 Subject: [PATCH 06/15] Address comments from ogrisel --- .../ensemble/plot_voting_decision_regions.py | 30 ++++++++++++++----- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/examples/ensemble/plot_voting_decision_regions.py b/examples/ensemble/plot_voting_decision_regions.py index 25f49076a9741..b9dedb80ff5de 100644 --- a/examples/ensemble/plot_voting_decision_regions.py +++ b/examples/ensemble/plot_voting_decision_regions.py @@ -31,12 +31,20 @@ # %% # We first generate a noisy XOR dataset, which is a binary classification task. +import matplotlib.pyplot as plt import numpy as np import pandas as pd +from matplotlib.colors import ListedColormap n_samples = 500 rng = np.random.default_rng(0) feature_names = ["Feature #0", "Feature #1"] +common_scatter_plot_params = dict( + cmap=ListedColormap(["tab:red", "tab:blue"]), + edgecolor="white", + linewidth=1, +) + xor = pd.DataFrame( np.random.RandomState(0).uniform(low=-1, high=1, size=(n_samples, 2)), columns=feature_names, @@ -49,6 +57,11 @@ X = xor[feature_names] y = target_xor.astype(np.int32) +fig, ax = plt.subplots() +ax.scatter(X["Feature #0"], X["Feature #1"], c=y, **common_scatter_plot_params) +ax.set_title("The XOR dataset") +plt.show() + # %% # We define and fit the models on the whole dataset. @@ -80,7 +93,11 @@ ) weights = [2, 1, 3] eclf = VotingClassifier( - estimators=[("constant", clf1), ("periodic", clf2), ("nystroem", clf3)], + estimators=[ + ("constant splines model", clf1), + ("periodic splines model", clf2), + ("nystroem model", clf3), + ], voting="soft", weights=weights, ) @@ -98,9 +115,6 @@ from itertools import product -import matplotlib.pyplot as plt -from matplotlib.colors import ListedColormap - from sklearn.inspection import DecisionBoundaryDisplay fig, axarr = plt.subplots(2, 2, sharex="col", sharey="row", figsize=(10, 8)) @@ -127,9 +141,7 @@ X["Feature #0"], X["Feature #1"], c=y, - cmap=ListedColormap(["tab:red", "tab:blue"]), - edgecolor="white", - linewidth=1, + **common_scatter_plot_params, ) axarr[idx[0], idx[1]].set_title(title) fig.colorbar(disp.surface_, ax=axarr[idx[0], idx[1]], label="Probability estimate") @@ -150,10 +162,14 @@ predict_probas = [est.predict_proba(test_sample).ravel() for est in eclf.estimators_] for (est_name, _), est_probas in zip(eclf.estimators, predict_probas): print(f"{est_name}'s predicted probabilities: {est_probas}") + +# %% print( "Weighted average of soft-predictions: " f"{np.dot(weights, predict_probas)/np.sum(weights)}" ) + +# %% print( "Predicted probability of VotingClassifier: " f"{eclf.predict_proba(test_sample).ravel()}" From 82b0c91ba088d275fdda0dc900ea4fbc30088cd4 Mon Sep 17 00:00:00 2001 From: ArturoAmorQ Date: Tue, 25 Mar 2025 10:57:32 +0100 Subject: [PATCH 07/15] Add comment on thresholding --- examples/ensemble/plot_voting_decision_regions.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/examples/ensemble/plot_voting_decision_regions.py b/examples/ensemble/plot_voting_decision_regions.py index b9dedb80ff5de..2f569cc0a481c 100644 --- a/examples/ensemble/plot_voting_decision_regions.py +++ b/examples/ensemble/plot_voting_decision_regions.py @@ -191,3 +191,17 @@ # classification. Equivalently: print(f"Predicted class of VotingClassifier: {eclf.predict(test_sample).ravel()}") + +# %% +# Soft votes can be thresholded as for any other probabilistic classifier. + +# %% +from sklearn.model_selection import FixedThresholdClassifier + +eclf_other_threshold = FixedThresholdClassifier( + eclf, threshold=0.7, response_method="predict_proba" +).fit(X, y) +print( + "Predicted class of thresholded VotingClassifier: " + f"{eclf_other_threshold.predict(test_sample)}" +) From b0455e714014814049174109c2d8f49002514122 Mon Sep 17 00:00:00 2001 From: ArturoAmorQ Date: Tue, 25 Mar 2025 12:31:57 +0100 Subject: [PATCH 08/15] Change example's title and match sphinx ref --- doc/modules/ensemble.rst | 9 +++++---- examples/ensemble/plot_voting_decision_regions.py | 6 +++--- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/doc/modules/ensemble.rst b/doc/modules/ensemble.rst index 80aad872fccbf..d1041fccce772 100644 --- a/doc/modules/ensemble.rst +++ b/doc/modules/ensemble.rst @@ -1410,10 +1410,11 @@ classifier 3 w3 * 0.3 w3 * 0.4 w3 * 0.3 weighted average 0.37 0.4 0.23 ================ ========== ========== ========== -Here, the predicted class label is 2, since it has the highest average probability. See -this example on :ref:`Visualising class probabilities in a Voting Classifier -` for a detailed illustration of -class probabilities averaged by soft voting. +Here, the predicted class label is 2, since it has the highest average +probability. See the example on +:ref:`sphx_glr_auto_examples_ensemble_plot_voting_decision_regions.py` for a +demonstration of how the predicted class label can be obtained from the weighted +average of predicted probabilities. The following figure illustrates how the decision regions may change when a soft :class:`VotingClassifier` is trained with weights on three linear diff --git a/examples/ensemble/plot_voting_decision_regions.py b/examples/ensemble/plot_voting_decision_regions.py index 2f569cc0a481c..6a23916b0346c 100644 --- a/examples/ensemble/plot_voting_decision_regions.py +++ b/examples/ensemble/plot_voting_decision_regions.py @@ -1,7 +1,7 @@ """ -================================================== -Plot the decision boundaries of a VotingClassifier -================================================== +===================================================== +Visualizing class probabilities in a VotingClassifier +===================================================== .. currentmodule:: sklearn From 822c2e7d9461a58e497c8055ca93dbe9c4f26bc5 Mon Sep 17 00:00:00 2001 From: Arturo Amor <86408019+ArturoAmorQ@users.noreply.github.com> Date: Thu, 27 Mar 2025 12:19:45 +0100 Subject: [PATCH 09/15] Address Olivier comment Co-authored-by: Olivier Grisel --- doc/modules/ensemble.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/modules/ensemble.rst b/doc/modules/ensemble.rst index d1041fccce772..0e3172e79b800 100644 --- a/doc/modules/ensemble.rst +++ b/doc/modules/ensemble.rst @@ -1411,7 +1411,7 @@ weighted average 0.37 0.4 0.23 ================ ========== ========== ========== Here, the predicted class label is 2, since it has the highest average -probability. See the example on +predicted probability. See the example on :ref:`sphx_glr_auto_examples_ensemble_plot_voting_decision_regions.py` for a demonstration of how the predicted class label can be obtained from the weighted average of predicted probabilities. From 1d352a119a2868efc2fe0e68221ea7afb4c910b5 Mon Sep 17 00:00:00 2001 From: Arturo Amor <86408019+ArturoAmorQ@users.noreply.github.com> Date: Thu, 27 Mar 2025 12:20:13 +0100 Subject: [PATCH 10/15] Address Olivier's comment Co-authored-by: Olivier Grisel --- examples/ensemble/plot_voting_decision_regions.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/ensemble/plot_voting_decision_regions.py b/examples/ensemble/plot_voting_decision_regions.py index 6a23916b0346c..897ea79aeb04d 100644 --- a/examples/ensemble/plot_voting_decision_regions.py +++ b/examples/ensemble/plot_voting_decision_regions.py @@ -1,7 +1,7 @@ """ -===================================================== -Visualizing class probabilities in a VotingClassifier -===================================================== +=============================================================== +Visualizing the probabilistic predictions of a VotingClassifier +=============================================================== .. currentmodule:: sklearn From fc73e782f0038a239d1a369bbbf2a2d39dbda994 Mon Sep 17 00:00:00 2001 From: Lucy Liu Date: Wed, 30 Apr 2025 11:07:52 +1000 Subject: [PATCH 11/15] fix lint --- examples/ensemble/plot_voting_decision_regions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/ensemble/plot_voting_decision_regions.py b/examples/ensemble/plot_voting_decision_regions.py index 897ea79aeb04d..43faa456724b5 100644 --- a/examples/ensemble/plot_voting_decision_regions.py +++ b/examples/ensemble/plot_voting_decision_regions.py @@ -166,7 +166,7 @@ # %% print( "Weighted average of soft-predictions: " - f"{np.dot(weights, predict_probas)/np.sum(weights)}" + f"{np.dot(weights, predict_probas) / np.sum(weights)}" ) # %% @@ -183,7 +183,7 @@ print( "Class with the highest weighted average of soft-predictions: " - f"{np.argmax(np.dot(weights, predict_probas)/np.sum(weights))}" + f"{np.argmax(np.dot(weights, predict_probas) / np.sum(weights))}" ) # %% From 9d010327ac5096af45dbb47ccf5244c47c79463d Mon Sep 17 00:00:00 2001 From: Arturo Amor <86408019+ArturoAmorQ@users.noreply.github.com> Date: Wed, 30 Apr 2025 10:07:19 +0200 Subject: [PATCH 12/15] Apply suggestions from code review Co-authored-by: Lucy Liu --- examples/ensemble/plot_voting_decision_regions.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/examples/ensemble/plot_voting_decision_regions.py b/examples/ensemble/plot_voting_decision_regions.py index 43faa456724b5..d70da8f1e697a 100644 --- a/examples/ensemble/plot_voting_decision_regions.py +++ b/examples/ensemble/plot_voting_decision_regions.py @@ -13,7 +13,7 @@ extrapolation. The third classifier is a :class:`~kernel_approximation.Nystroem` with the default "rbf" kernel. -In the first part of this example these three classifiers are used to +In the first part of this example, these three classifiers are used to demonstrate soft-voting using :class:`~ensemble.VotingClassifier` with weighted average. We set `weights=[2, 1, 3]`, meaning the constant extrapolation spline model's predictions are weighted twice as much as the periodic spline model's, @@ -154,7 +154,7 @@ # average of the individual classifiers' soft-predictions. # # In the case of binary classification such as in the present example, the -# `predict_proba` arrays contain the probability of belonging to class 0 (here +# term:`predict_proba` arrays contain the probability of belonging to class 0 (here # in red) as the first entry, and the probability of belonging to class 1 (here # in blue) as the second entry. @@ -188,7 +188,8 @@ # %% # Which corresponds to the default threshold at 0.5 in the case of binary -# classification. Equivalently: +# classification. This is equivalent to the output of `VotingClassifier`'s `predict` +# method: print(f"Predicted class of VotingClassifier: {eclf.predict(test_sample).ravel()}") From 5bbacd631f2205f5be5bdd65b5ce47a445c092c6 Mon Sep 17 00:00:00 2001 From: ArturoAmorQ Date: Wed, 30 Apr 2025 11:46:13 +0200 Subject: [PATCH 13/15] Address comments form Lucy --- doc/modules/ensemble.rst | 4 ++-- .../ensemble/plot_voting_decision_regions.py | 22 +++++++++++-------- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/doc/modules/ensemble.rst b/doc/modules/ensemble.rst index 564179ef58333..b336a25d8048d 100644 --- a/doc/modules/ensemble.rst +++ b/doc/modules/ensemble.rst @@ -1418,9 +1418,9 @@ average of predicted probabilities. The following figure illustrates how the decision regions may change when a soft :class:`VotingClassifier` is trained with weights on three linear -models:: +models: -.. figure:: ../auto_examples/ensemble/images/sphx_glr_plot_voting_decision_regions_001.png +.. figure:: ../auto_examples/ensemble/images/sphx_glr_plot_voting_decision_regions_002.png :target: ../auto_examples/ensemble/plot_voting_decision_regions.html :align: center :scale: 75% diff --git a/examples/ensemble/plot_voting_decision_regions.py b/examples/ensemble/plot_voting_decision_regions.py index d70da8f1e697a..f5ec07214d692 100644 --- a/examples/ensemble/plot_voting_decision_regions.py +++ b/examples/ensemble/plot_voting_decision_regions.py @@ -154,9 +154,9 @@ # average of the individual classifiers' soft-predictions. # # In the case of binary classification such as in the present example, the -# term:`predict_proba` arrays contain the probability of belonging to class 0 (here -# in red) as the first entry, and the probability of belonging to class 1 (here -# in blue) as the second entry. +# term:`predict_proba` arrays contain the probability of belonging to class 0 +# (here in red) as the first entry, and the probability of belonging to class 1 +# (here in blue) as the second entry. test_sample = pd.DataFrame({"Feature #0": [-0.5], "Feature #1": [1.5]}) predict_probas = [est.predict_proba(test_sample).ravel() for est in eclf.estimators_] @@ -170,6 +170,9 @@ ) # %% +# We can see that manual calculation of predicted probabilities above is +# equivalent to that produced by the `VotingClassifier`: + print( "Predicted probability of VotingClassifier: " f"{eclf.predict_proba(test_sample).ravel()}" @@ -179,7 +182,8 @@ # To convert soft predictions into hard predictions when weights are provided, # the weighted average predicted probabilities are computed for each class. # Then, the final class label is then derived from the class label with the -# highest average probability. +# highest average probability, which corresponds to the default threshold at +# `predict_proba=0.5` in the case of binary classification. print( "Class with the highest weighted average of soft-predictions: " @@ -187,16 +191,16 @@ ) # %% -# Which corresponds to the default threshold at 0.5 in the case of binary -# classification. This is equivalent to the output of `VotingClassifier`'s `predict` -# method: +# This is equivalent to the output of `VotingClassifier`'s `predict` method: print(f"Predicted class of VotingClassifier: {eclf.predict(test_sample).ravel()}") # %% -# Soft votes can be thresholded as for any other probabilistic classifier. +# Soft votes can be thresholded as for any other probabilistic classifier. This +# allows you to set a threshold probability at which the positive class will be +# predicted, instead of simply selecting the class with the highest predicted +# probability. -# %% from sklearn.model_selection import FixedThresholdClassifier eclf_other_threshold = FixedThresholdClassifier( From eb7c8f581e0bdf38880477ef3db283b7ca711d4a Mon Sep 17 00:00:00 2001 From: ArturoAmorQ Date: Wed, 30 Apr 2025 11:58:54 +0200 Subject: [PATCH 14/15] Add comment on linear-separability --- examples/ensemble/plot_voting_decision_regions.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/examples/ensemble/plot_voting_decision_regions.py b/examples/ensemble/plot_voting_decision_regions.py index f5ec07214d692..584097fac73ff 100644 --- a/examples/ensemble/plot_voting_decision_regions.py +++ b/examples/ensemble/plot_voting_decision_regions.py @@ -63,6 +63,12 @@ plt.show() # %% +# Due to the inherent non-linear separability of the XOR dataset, tree-based +# models would often be preferred. However, appropriate feature engineering +# combined with a linear model can yield effective results, with the added +# benefit of producing better-calibrated probabilities for samples located in +# the transition regions affected by noise. +# # We define and fit the models on the whole dataset. from sklearn.ensemble import VotingClassifier From ec42ff12af05548474f81435e22da21c5c853154 Mon Sep 17 00:00:00 2001 From: Arturo Amor <86408019+ArturoAmorQ@users.noreply.github.com> Date: Thu, 1 May 2025 10:17:10 +0200 Subject: [PATCH 15/15] Apply suggestions from code review Co-authored-by: Lucy Liu --- examples/ensemble/plot_voting_decision_regions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/ensemble/plot_voting_decision_regions.py b/examples/ensemble/plot_voting_decision_regions.py index 584097fac73ff..57f3f4b22b947 100644 --- a/examples/ensemble/plot_voting_decision_regions.py +++ b/examples/ensemble/plot_voting_decision_regions.py @@ -117,7 +117,7 @@ # Finally we use :class:`~inspection.DecisionBoundaryDisplay` to plot the # predicted probabilities. By using a diverging colormap (such as `"RdBu"`), we # can ensure that darker colors correspond to `predict_proba` close to either 0 -# or 1. +# or 1, and white corresponds to `predict_proba` of 0.5. from itertools import product @@ -160,7 +160,7 @@ # average of the individual classifiers' soft-predictions. # # In the case of binary classification such as in the present example, the -# term:`predict_proba` arrays contain the probability of belonging to class 0 +# :term:`predict_proba` arrays contain the probability of belonging to class 0 # (here in red) as the first entry, and the probability of belonging to class 1 # (here in blue) as the second entry.