From 330fad1b241df2fcc9e1b58806d25ebf88e046c6 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Sun, 29 Sep 2024 20:38:48 +0200 Subject: [PATCH 1/3] DOC remove redundant example multiclass logistic regression --- doc/conf.py | 3 ++ examples/linear_model/plot_iris_logistic.py | 52 --------------------- 2 files changed, 3 insertions(+), 52 deletions(-) delete mode 100644 examples/linear_model/plot_iris_logistic.py diff --git a/doc/conf.py b/doc/conf.py index 9ab1966b70e73..e14205bb0ba0f 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -475,6 +475,9 @@ def add_js_css_files(app, pagename, templatename, context, doctree): "auto_examples/linear_model/plot_lasso_coordinate_descent_path.py": ( "auto_examples/linear_model/plot_lasso_lasso_lars_elasticnet_path.py" ), + "auto_examples/linear_model/plot_iris_logistic": ( + "auto_examples/linear_model/plot_logistic_multinomial" + ), } html_context["redirects"] = redirects for old_link in redirects: diff --git a/examples/linear_model/plot_iris_logistic.py b/examples/linear_model/plot_iris_logistic.py deleted file mode 100644 index 481312c94c789..0000000000000 --- a/examples/linear_model/plot_iris_logistic.py +++ /dev/null @@ -1,52 +0,0 @@ -""" -========================================================= -Logistic Regression 3-class Classifier -========================================================= - -Show below is a logistic-regression classifiers decision boundaries on the -first two dimensions (sepal length and width) of the `iris -`_ dataset. The datapoints -are colored according to their labels. - -""" - -# Authors: The scikit-learn developers -# SPDX-License-Identifier: BSD-3-Clause - -import matplotlib.pyplot as plt - -from sklearn import datasets -from sklearn.inspection import DecisionBoundaryDisplay -from sklearn.linear_model import LogisticRegression - -# import some data to play with -iris = datasets.load_iris() -X = iris.data[:, :2] # we only take the first two features. -Y = iris.target - -# Create an instance of Logistic Regression Classifier and fit the data. -logreg = LogisticRegression(C=1e5) -logreg.fit(X, Y) - -_, ax = plt.subplots(figsize=(4, 3)) -DecisionBoundaryDisplay.from_estimator( - logreg, - X, - cmap=plt.cm.Paired, - ax=ax, - response_method="predict", - plot_method="pcolormesh", - shading="auto", - xlabel="Sepal length", - ylabel="Sepal width", - eps=0.5, -) - -# Plot also the training points -plt.scatter(X[:, 0], X[:, 1], c=Y, edgecolors="k", cmap=plt.cm.Paired) - - -plt.xticks(()) -plt.yticks(()) - -plt.show() From 30dda90641f1c109e2c61e0e57c145fe05292190 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Sun, 29 Sep 2024 23:01:23 +0200 Subject: [PATCH 2/3] improve multiclass exampel --- .../linear_model/plot_logistic_multinomial.py | 199 ++++++++++++++---- 1 file changed, 156 insertions(+), 43 deletions(-) diff --git a/examples/linear_model/plot_logistic_multinomial.py b/examples/linear_model/plot_logistic_multinomial.py index ca9f1717fe346..60679999a59cf 100644 --- a/examples/linear_model/plot_logistic_multinomial.py +++ b/examples/linear_model/plot_logistic_multinomial.py @@ -1,70 +1,183 @@ """ -==================================================== -Plot multinomial and One-vs-Rest Logistic Regression -==================================================== +====================================================================== +Decision Boundaries of Multinomial and One-vs-Rest Logistic Regression +====================================================================== -Plot decision surface of multinomial and One-vs-Rest Logistic Regression. -The hyperplanes corresponding to the three One-vs-Rest (OVR) classifiers -are represented by the dashed lines. +This example compares decision boundaries of multinomial and one-vs-rest +logistic regression on a 2D dataset with three classes. +We make a comparison of the decision boundaries of both methods that is equivalent +to call the method `predict`. In addition, we plot the hyperplanes that correspond to +the line when the probability estimate for a class is of 0.5. """ # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause +# %% +# Dataset Generation +# ------------------ +# +# We generate a synthetic dataset using :func:`~sklearn.datasets.make_blobs` function. +# The dataset consists of 1,000 samples from three different classes, +# centered around [-5, 0], [0, 1.5], and [5, -1]. After generation, we apply a linear +# transformation to introduce some correlation between features and make the problem +# more challenging. This results in a 2D dataset with three overlapping classes, +# suitable for demonstrating the differences between multinomial and one-vs-rest +# logistic regression. import matplotlib.pyplot as plt import numpy as np from sklearn.datasets import make_blobs -from sklearn.inspection import DecisionBoundaryDisplay -from sklearn.linear_model import LogisticRegression -from sklearn.multiclass import OneVsRestClassifier -# make 3-class dataset for classification centers = [[-5, 0], [0, 1.5], [5, -1]] -X, y = make_blobs(n_samples=1000, centers=centers, random_state=40) +X, y = make_blobs(n_samples=1_000, centers=centers, random_state=40) transformation = [[0.4, 0.2], [-0.4, 1.2]] X = np.dot(X, transformation) -for multi_class in ("multinomial", "ovr"): - clf = LogisticRegression(solver="sag", max_iter=100, random_state=42) - if multi_class == "ovr": - clf = OneVsRestClassifier(clf) - clf.fit(X, y) +fig, ax = plt.subplots(figsize=(6, 4)) + +scatter = ax.scatter(X[:, 0], X[:, 1], c=y, edgecolor="black") +ax.set(title="Synthetic Dataset", xlabel="Feature 1", ylabel="Feature 2") +_ = ax.legend(*scatter.legend_elements(), title="Classes") + + +# %% +# Classifier Training +# ------------------- +# +# We train two different logistic regression classifiers: multinomial and one-vs-rest. +# The multinomial classifier handles all classes simultaneously, while the one-vs-rest +# approach trains a binary classifier for each class against all others. +from sklearn.linear_model import LogisticRegression +from sklearn.multiclass import OneVsRestClassifier + +logistic_regression_multinomial = LogisticRegression().fit(X, y) +logistic_regression_ovr = OneVsRestClassifier(LogisticRegression()).fit(X, y) + +accuracy_multinomial = logistic_regression_multinomial.score(X, y) +accuracy_ovr = logistic_regression_ovr.score(X, y) - # print the training scores - print("training score : %.3f (%s)" % (clf.score(X, y), multi_class)) +# %% +# Decision Boundaries Visualization +# --------------------------------- +# +# Let's visualize the decision boundaries of both models that is provided by the +# method `predict` of the classifiers. +from sklearn.inspection import DecisionBoundaryDisplay + +fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5), sharex=True, sharey=True) - _, ax = plt.subplots() +for model, title, ax in [ + ( + logistic_regression_multinomial, + f"Multinomial Logistic Regression\n(Accuracy: {accuracy_multinomial:.3f})", + ax1, + ), + ( + logistic_regression_ovr, + f"One-vs-Rest Logistic Regression\n(Accuracy: {accuracy_ovr:.3f})", + ax2, + ), +]: DecisionBoundaryDisplay.from_estimator( - clf, X, response_method="predict", cmap=plt.cm.Paired, ax=ax + model, + X, + ax=ax, + response_method="predict", + alpha=0.8, ) - plt.title("Decision surface of LogisticRegression (%s)" % multi_class) - plt.axis("tight") - - # Plot also the training points - colors = "bry" - for i, color in zip(clf.classes_, colors): - idx = np.where(y == i) - plt.scatter(X[idx, 0], X[idx, 1], c=color, edgecolor="black", s=20) - - # Plot the three one-against-all classifiers - xmin, xmax = plt.xlim() - ymin, ymax = plt.ylim() - if multi_class == "ovr": - coef = np.concatenate([est.coef_ for est in clf.estimators_]) - intercept = np.concatenate([est.intercept_ for est in clf.estimators_]) + scatter = ax.scatter(X[:, 0], X[:, 1], c=y, edgecolor="k") + legend = ax.legend(*scatter.legend_elements(), title="Classes") + ax.add_artist(legend) + ax.set_title(title) + + +# %% +# We see that the decision boundaries are different. This difference stems from their +# approaches: +# +# - Multinomial logistic regression considers all classes simultaneously during +# optimization. +# - One-vs-rest logistic regression fits each class independently against all others. +# +# These distinct strategies can lead to varying decision boundaries, especially in +# complex multi-class problems. +# +# Hyperplanes Visualization +# -------------------------- +# +# We also visualize the hyperplanes that correspond to the line when the probability +# estimate for a class is of 0.5. +def plot_hyperplanes(classifier, X, ax): + xmin, xmax = X[:, 0].min(), X[:, 0].max() + ymin, ymax = X[:, 1].min(), X[:, 1].max() + ax.set(xlim=(xmin, xmax), ylim=(ymin, ymax)) + + if isinstance(classifier, OneVsRestClassifier): + coef = np.concatenate([est.coef_ for est in classifier.estimators_]) + intercept = np.concatenate([est.intercept_ for est in classifier.estimators_]) else: - coef = clf.coef_ - intercept = clf.intercept_ + coef = classifier.coef_ + intercept = classifier.intercept_ - def plot_hyperplane(c, color): - def line(x0): - return (-(x0 * coef[c, 0]) - intercept[c]) / coef[c, 1] + for i in range(coef.shape[0]): + w = coef[i] + a = -w[0] / w[1] + xx = np.linspace(xmin, xmax) + yy = a * xx - (intercept[i]) / w[1] + ax.plot(xx, yy, "--", linewidth=3, label=f"Class {i}") - plt.plot([xmin, xmax], [line(xmin), line(xmax)], ls="--", color=color) + return ax.get_legend_handles_labels() - for i, color in zip(clf.classes_, colors): - plot_hyperplane(i, color) + +# %% +fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5), sharex=True, sharey=True) + +for model, title, ax in [ + ( + logistic_regression_multinomial, + "Multinomial Logistic Regression Hyperplanes", + ax1, + ), + (logistic_regression_ovr, "One-vs-Rest Logistic Regression Hyperplanes", ax2), +]: + hyperplane_handles, hyperplane_labels = plot_hyperplanes(model, X, ax) + scatter = ax.scatter(X[:, 0], X[:, 1], c=y, edgecolor="k") + scatter_handles, scatter_labels = scatter.legend_elements() + + all_handles = hyperplane_handles + scatter_handles + all_labels = hyperplane_labels + scatter_labels + + ax.legend(all_handles, all_labels, title="Classes") + ax.set_title(title) plt.show() + +# %% +# While the hyperplanes for classes 0 and 2 are quite similar between the two methods, +# we observe that the hyperplane for class 1 is notably different. This difference stems +# from the fundamental approaches of one-vs-rest and multinomial logistic regression: +# +# For one-vs-rest logistic regression: +# +# - Each hyperplane is determined independently by considering one class against all +# others. +# - For class 1, the hyperplane represents the decision boundary that best separates +# class 1 from the combined classes 0 and 2. +# - This binary approach can lead to simpler decision boundaries but may not capture +# complex relationships between all classes simultaneously. +# +# For multinomial logistic regression: +# +# - All hyperplanes are determined simultaneously, considering the relationships between +# all classes at once. +# - Each hyperplane represents the decision boundary where the probability of one class +# becomes higher than the others, based on the overall probability distribution. +# - This approach can capture more nuanced relationships between classes, potentially +# leading to more accurate classification in multi-class problems. +# +# The difference in hyperplanes, especially for class 1, highlights how these methods +# can produce different decision boundaries despite similar overall accuracy. The choice +# between one-vs-rest and multinomial logistic regression can depend on the specific +# dataset and the nature of the classification problem. From ea0a96db7b8dfcbb1a5a7b09af4ed59421a335c2 Mon Sep 17 00:00:00 2001 From: Guillaume Lemaitre Date: Wed, 16 Oct 2024 11:16:55 +0200 Subject: [PATCH 3/3] reformulate recommendations --- .../linear_model/plot_logistic_multinomial.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/examples/linear_model/plot_logistic_multinomial.py b/examples/linear_model/plot_logistic_multinomial.py index 60679999a59cf..c12229c81c7f1 100644 --- a/examples/linear_model/plot_logistic_multinomial.py +++ b/examples/linear_model/plot_logistic_multinomial.py @@ -167,17 +167,27 @@ def plot_hyperplanes(classifier, X, ax): # class 1 from the combined classes 0 and 2. # - This binary approach can lead to simpler decision boundaries but may not capture # complex relationships between all classes simultaneously. +# - There is no possible interpretation of the conditional class probabilities. # # For multinomial logistic regression: # # - All hyperplanes are determined simultaneously, considering the relationships between # all classes at once. +# - The loss minimized by the model is a proper scoring rule, which means that the model +# is optimized to estimate the conditional class probabilities that are, therefore, +# meaningful. # - Each hyperplane represents the decision boundary where the probability of one class # becomes higher than the others, based on the overall probability distribution. # - This approach can capture more nuanced relationships between classes, potentially # leading to more accurate classification in multi-class problems. # # The difference in hyperplanes, especially for class 1, highlights how these methods -# can produce different decision boundaries despite similar overall accuracy. The choice -# between one-vs-rest and multinomial logistic regression can depend on the specific -# dataset and the nature of the classification problem. +# can produce different decision boundaries despite similar overall accuracy. +# +# In practice, using multinomial logistic regression is recommended since it minimizes a +# well-formulated loss function, leading to better-calibrated class probabilities and +# thus more interpretable results. When it comes to decision boundaries, one should +# formulate a utility function to transform the class probabilities into a meaningful +# quantity for the problem at hand. One-vs-rest allows for different decision boundaries +# but does not allow for fine-grained control over the trade-off between the classes as +# a utility function would.