From 78ecbe641c8c505f3cbb4c45e4db3ffd1709a760 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <guillaume@probabl.ai>
Date: Sun, 29 Sep 2024 23:34:06 +0200
Subject: [PATCH 1/4] DOC remove example OLS with 3D plot

---
 doc/conf.py                          |   1 +
 examples/linear_model/plot_ols.py    | 113 +++++++++++++++------------
 examples/linear_model/plot_ols_3d.py |  83 --------------------
 3 files changed, 66 insertions(+), 131 deletions(-)
 delete mode 100644 examples/linear_model/plot_ols_3d.py

diff --git a/doc/conf.py b/doc/conf.py
index 9ab1966b70e73..903ea36b4dd18 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -475,6 +475,7 @@ def add_js_css_files(app, pagename, templatename, context, doctree):
     "auto_examples/linear_model/plot_lasso_coordinate_descent_path.py": (
         "auto_examples/linear_model/plot_lasso_lasso_lars_elasticnet_path.py"
     ),
+    "auto_examples/linear_model/plot_ols_3d": ("auto_examples/linear_model/plot_ols"),
 }
 html_context["redirects"] = redirects
 for old_link in redirects:
diff --git a/examples/linear_model/plot_ols.py b/examples/linear_model/plot_ols.py
index 8aaa35ed8d899..c19d97ad045c9 100644
--- a/examples/linear_model/plot_ols.py
+++ b/examples/linear_model/plot_ols.py
@@ -1,63 +1,80 @@
 """
-=========================================================
-Linear Regression Example
-=========================================================
-The example below uses only the first feature of the `diabetes` dataset,
-in order to illustrate the data points within the two-dimensional plot.
-The straight line can be seen in the plot, showing how linear regression
-attempts to draw a straight line that will best minimize the
-residual sum of squares between the observed responses in the dataset,
-and the responses predicted by the linear approximation.
-
-The coefficients, residual sum of squares and the coefficient of
-determination are also calculated.
+=====================================
+Simple Ordinary Least Squares Example
+=====================================
 
+This example shows how to use the simplest ordinary least squares (OLS) model
+called :class:`~sklearn.linear_model.LinearRegression` in scikit-learn.
+
+For this purpose, we use a single feature from the diabetes dataset and try to
+predict the diabetes progression using this linear model. We therefore load the
+diabetes dataset and split it into training and test sets.
+
+Then, we fit the model on the training set and evaluate its performance on the test
+set and finally visualize the results on the test set.
 """
 
 # Authors: The scikit-learn developers
 # SPDX-License-Identifier: BSD-3-Clause
 
-import matplotlib.pyplot as plt
-import numpy as np
-
-from sklearn import datasets, linear_model
+# %%
+# Data Loading and Preparation
+# ----------------------------
+#
+# Load the diabetes dataset. For simplicity, we only keep a single feature in the data.
+# Then, we split the data and target into training and test sets.
+from sklearn.datasets import load_diabetes
+from sklearn.model_selection import train_test_split
+
+X, y = load_diabetes(return_X_y=True)
+X = X[:, [2]]  # Use only one feature
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=20, shuffle=False)
+
+# %%
+# Linear regression model
+# -----------------------
+#
+# We create a linear regression model and fit it on the training data.
+from sklearn.linear_model import LinearRegression
+
+regressor = LinearRegression().fit(X_train, y_train)
+
+# %%
+# Model evaluation
+# ----------------
+#
+# We evaluate the model's performance on the test set using the mean squared error
+# and the coefficient of determination.
 from sklearn.metrics import mean_squared_error, r2_score
 
-# Load the diabetes dataset
-diabetes_X, diabetes_y = datasets.load_diabetes(return_X_y=True)
-
-# Use only one feature
-diabetes_X = diabetes_X[:, np.newaxis, 2]
-
-# Split the data into training/testing sets
-diabetes_X_train = diabetes_X[:-20]
-diabetes_X_test = diabetes_X[-20:]
+y_pred = regressor.predict(X_test)
 
-# Split the targets into training/testing sets
-diabetes_y_train = diabetes_y[:-20]
-diabetes_y_test = diabetes_y[-20:]
+print(f"Mean squared error: {mean_squared_error(y_test, y_pred):.2f}")
+print(f"Coefficient of determination: {r2_score(y_test, y_pred):.2f}")
 
-# Create linear regression object
-regr = linear_model.LinearRegression()
-
-# Train the model using the training sets
-regr.fit(diabetes_X_train, diabetes_y_train)
-
-# Make predictions using the testing set
-diabetes_y_pred = regr.predict(diabetes_X_test)
-
-# The coefficients
-print("Coefficients: \n", regr.coef_)
-# The mean squared error
-print("Mean squared error: %.2f" % mean_squared_error(diabetes_y_test, diabetes_y_pred))
-# The coefficient of determination: 1 is perfect prediction
-print("Coefficient of determination: %.2f" % r2_score(diabetes_y_test, diabetes_y_pred))
+# %%
+# Plotting the results
+# --------------------
+#
+# Finally, we visualize the results on the test set.
+import matplotlib.pyplot as plt
 
-# Plot outputs
-plt.scatter(diabetes_X_test, diabetes_y_test, color="black")
-plt.plot(diabetes_X_test, diabetes_y_pred, color="blue", linewidth=3)
+fig, ax = plt.subplots()
 
-plt.xticks(())
-plt.yticks(())
+ax.scatter(X_test, y_test, label="Data points")
+ax.plot(X_test, y_pred, linewidth=3, color="tab:orange", label="Model predictions")
+ax.set(xlabel="Feature", ylabel="Target", title="Linear Regression")
+ax.legend()
 
 plt.show()
+
+# %%
+# Conclusion
+# ----------
+#
+# This example shows how to use the simplest linear model called
+# :class:`~sklearn.linear_model.LinearRegression` in scikit-learn. For this purpose, we
+# use a single feature from the diabetes dataset and try to predict the diabetes
+# progression using this linear model. We therefore load the diabetes dataset and split
+# it into training and test sets.
+#
diff --git a/examples/linear_model/plot_ols_3d.py b/examples/linear_model/plot_ols_3d.py
deleted file mode 100644
index cd848f659e8d8..0000000000000
--- a/examples/linear_model/plot_ols_3d.py
+++ /dev/null
@@ -1,83 +0,0 @@
-"""
-=========================================================
-Sparsity Example: Fitting only features 1  and 2
-=========================================================
-
-Features 1 and 2 of the diabetes-dataset are fitted and
-plotted below. It illustrates that although feature 2
-has a strong coefficient on the full model, it does not
-give us much regarding `y` when compared to just feature 1.
-"""
-
-# Authors: The scikit-learn developers
-# SPDX-License-Identifier: BSD-3-Clause
-
-# %%
-# First we load the diabetes dataset.
-
-import numpy as np
-
-from sklearn import datasets
-
-X, y = datasets.load_diabetes(return_X_y=True)
-indices = (0, 1)
-
-X_train = X[:-20, indices]
-X_test = X[-20:, indices]
-y_train = y[:-20]
-y_test = y[-20:]
-
-# %%
-# Next we fit a linear regression model.
-
-from sklearn import linear_model
-
-ols = linear_model.LinearRegression()
-_ = ols.fit(X_train, y_train)
-
-
-# %%
-# Finally we plot the figure from three different views.
-
-import matplotlib.pyplot as plt
-
-# unused but required import for doing 3d projections with matplotlib < 3.2
-import mpl_toolkits.mplot3d  # noqa: F401
-
-
-def plot_figs(fig_num, elev, azim, X_train, clf):
-    fig = plt.figure(fig_num, figsize=(4, 3))
-    plt.clf()
-    ax = fig.add_subplot(111, projection="3d", elev=elev, azim=azim)
-
-    ax.scatter(X_train[:, 0], X_train[:, 1], y_train, c="k", marker="+")
-    ax.plot_surface(
-        np.array([[-0.1, -0.1], [0.15, 0.15]]),
-        np.array([[-0.1, 0.15], [-0.1, 0.15]]),
-        clf.predict(
-            np.array([[-0.1, -0.1, 0.15, 0.15], [-0.1, 0.15, -0.1, 0.15]]).T
-        ).reshape((2, 2)),
-        alpha=0.5,
-    )
-    ax.set_xlabel("X_1")
-    ax.set_ylabel("X_2")
-    ax.set_zlabel("Y")
-    ax.xaxis.set_ticklabels([])
-    ax.yaxis.set_ticklabels([])
-    ax.zaxis.set_ticklabels([])
-
-
-# Generate the three different figures from different views
-elev = 43.5
-azim = -110
-plot_figs(1, elev, azim, X_train, ols)
-
-elev = -0.5
-azim = 0
-plot_figs(2, elev, azim, X_train, ols)
-
-elev = -0.5
-azim = 90
-plot_figs(3, elev, azim, X_train, ols)
-
-plt.show()

From 4477e50e0cb711db12e435487057dfc59a5f0e43 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <guillaume@probabl.ai>
Date: Wed, 16 Oct 2024 11:00:19 +0200
Subject: [PATCH 2/4] improve wording conclusion

---
 examples/linear_model/plot_ols.py | 45 ++++++++++++++++++++-----------
 1 file changed, 29 insertions(+), 16 deletions(-)

diff --git a/examples/linear_model/plot_ols.py b/examples/linear_model/plot_ols.py
index c19d97ad045c9..65968ae90a7b3 100644
--- a/examples/linear_model/plot_ols.py
+++ b/examples/linear_model/plot_ols.py
@@ -1,9 +1,9 @@
 """
-=====================================
-Simple Ordinary Least Squares Example
-=====================================
+==============================
+Ordinary Least Squares Example
+==============================
 
-This example shows how to use the simplest ordinary least squares (OLS) model
+This example shows how to use the ordinary least squares (OLS) model
 called :class:`~sklearn.linear_model.LinearRegression` in scikit-learn.
 
 For this purpose, we use a single feature from the diabetes dataset and try to
@@ -34,7 +34,9 @@
 # Linear regression model
 # -----------------------
 #
-# We create a linear regression model and fit it on the training data.
+# We create a linear regression model and fit it on the training data. Note that by
+# default, an intercept is added to the model. We can control this behavior by setting
+# the `fit_intercept` parameter.
 from sklearn.linear_model import LinearRegression
 
 regressor = LinearRegression().fit(X_train, y_train)
@@ -56,15 +58,22 @@
 # Plotting the results
 # --------------------
 #
-# Finally, we visualize the results on the test set.
+# Finally, we visualize the results on the train and test data.
 import matplotlib.pyplot as plt
 
-fig, ax = plt.subplots()
+fig, ax = plt.subplots(ncols=2, figsize=(10, 5), sharex=True, sharey=True)
 
-ax.scatter(X_test, y_test, label="Data points")
-ax.plot(X_test, y_pred, linewidth=3, color="tab:orange", label="Model predictions")
-ax.set(xlabel="Feature", ylabel="Target", title="Linear Regression")
-ax.legend()
+ax[0].scatter(X_train, y_train, label="Train data points")
+ax[0].plot(X_train, regressor.predict(X_train), linewidth=3, color="tab:orange", label="Model predictions")
+ax[0].set(xlabel="Feature", ylabel="Target", title="Train set")
+ax[0].legend()
+
+ax[1].scatter(X_test, y_test, label="Test data points")
+ax[1].plot(X_test, y_pred, linewidth=3, color="tab:orange", label="Model predictions")
+ax[1].set(xlabel="Feature", ylabel="Target", title="Test set")
+ax[1].legend()
+
+fig.suptitle("Linear Regression")
 
 plt.show()
 
@@ -72,9 +81,13 @@
 # Conclusion
 # ----------
 #
-# This example shows how to use the simplest linear model called
-# :class:`~sklearn.linear_model.LinearRegression` in scikit-learn. For this purpose, we
-# use a single feature from the diabetes dataset and try to predict the diabetes
-# progression using this linear model. We therefore load the diabetes dataset and split
-# it into training and test sets.
+# The trained model corresponds to the estimator that minimizes the mean squared error
+# between the predicted and the true target values on the training data. This means that
+# there is no other model that fits the training data better in terms of squared error.
+# We therefore obtain an estimator of the conditional mean of the target given the
+# data.
 #
+# Note that in higher dimensions, minimizing only the squared error might lead to
+# overfitting. Therefore, regularization techniques are commonly used to prevent this
+# issue, such as those implemented in :class:`~sklearn.linear_model.Ridge` or
+# :class:`~sklearn.linear_model.Lasso`.

From 7f6bf9479c9092b34b42f6c7244dd69867758d39 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <guillaume@probabl.ai>
Date: Wed, 16 Oct 2024 11:03:33 +0200
Subject: [PATCH 3/4] lint

---
 examples/linear_model/plot_ols.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/examples/linear_model/plot_ols.py b/examples/linear_model/plot_ols.py
index 65968ae90a7b3..b4cef3aaae918 100644
--- a/examples/linear_model/plot_ols.py
+++ b/examples/linear_model/plot_ols.py
@@ -64,7 +64,13 @@
 fig, ax = plt.subplots(ncols=2, figsize=(10, 5), sharex=True, sharey=True)
 
 ax[0].scatter(X_train, y_train, label="Train data points")
-ax[0].plot(X_train, regressor.predict(X_train), linewidth=3, color="tab:orange", label="Model predictions")
+ax[0].plot(
+    X_train,
+    regressor.predict(X_train),
+    linewidth=3,
+    color="tab:orange",
+    label="Model predictions",
+)
 ax[0].set(xlabel="Feature", ylabel="Target", title="Train set")
 ax[0].legend()
 

From d82277eabfa25909b6588be9848196a1c8ab42b7 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <guillaume@probabl.ai>
Date: Wed, 16 Oct 2024 16:25:30 +0200
Subject: [PATCH 4/4] remove statemetn

---
 examples/linear_model/plot_ols.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/examples/linear_model/plot_ols.py b/examples/linear_model/plot_ols.py
index b4cef3aaae918..aeb8e986459fc 100644
--- a/examples/linear_model/plot_ols.py
+++ b/examples/linear_model/plot_ols.py
@@ -88,10 +88,8 @@
 # ----------
 #
 # The trained model corresponds to the estimator that minimizes the mean squared error
-# between the predicted and the true target values on the training data. This means that
-# there is no other model that fits the training data better in terms of squared error.
-# We therefore obtain an estimator of the conditional mean of the target given the
-# data.
+# between the predicted and the true target values on the training data. We therefore
+# obtain an estimator of the conditional mean of the target given the data.
 #
 # Note that in higher dimensions, minimizing only the squared error might lead to
 # overfitting. Therefore, regularization techniques are commonly used to prevent this