scikit-learn · lesteve · Nov 26, 2025 · Nov 5, 2025 · Nov 6, 2025 · Nov 6, 2025
diff --git a/asv_benchmarks/benchmarks/linear_model.py b/asv_benchmarks/benchmarks/linear_model.py
@@ -47,11 +47,11 @@ def make_data(self, params):
     def make_estimator(self, params):
         representation, solver, n_jobs = params
 
-        penalty = "l2" if solver == "lbfgs" else "l1"
+        l1_ratio = 0 if solver == "lbfgs" else 1
 
         estimator = LogisticRegression(
             solver=solver,
-            penalty=penalty,
+            l1_ratio=l1_ratio,
             tol=0.01,
             n_jobs=n_jobs,
             random_state=0,

diff --git a/benchmarks/bench_saga.py b/benchmarks/bench_saga.py
@@ -66,10 +66,12 @@ def fit_single(
     times = [0]
 
     if penalty == "l2":
+        l1_ratio = 0
         alpha = 1.0 / (C * n_samples)
         beta = 0
         lightning_penalty = None
     else:
+        l1_ratio = 1
         alpha = 0.0
         beta = 1.0 / (C * n_samples)
         lightning_penalty = "l1"
@@ -97,7 +99,7 @@ def fit_single(
             lr = LogisticRegression(
                 solver=solver,
                 C=C,
-                penalty=penalty,
+                l1_ratio=l1_ratio,
                 fit_intercept=False,
                 tol=0,
                 max_iter=this_max_iter,

diff --git a/doc/developers/develop.rst b/doc/developers/develop.rst
@@ -381,10 +381,10 @@ The parameter `deep` controls whether or not the parameters of the
     subestimator__dual -> False
     subestimator__fit_intercept -> True
     subestimator__intercept_scaling -> 1
-    subestimator__l1_ratio -> None
+    subestimator__l1_ratio -> 0.0
     subestimator__max_iter -> 100
     subestimator__n_jobs -> None
-    subestimator__penalty -> l2
+    subestimator__penalty -> deprecated
     subestimator__random_state -> None
     subestimator__solver -> lbfgs
     subestimator__tol -> 0.0001

diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
@@ -999,20 +999,20 @@ specific training sample (the vector :math:`s` is formed by element-wise
 multiplication of the class weights and sample weights),
 and the sum :math:`S = \sum_{i=1}^n s_i`.
 
-We currently provide four choices for the regularization term  :math:`r(w)` via
-the `penalty` argument:
-
-+----------------+-------------------------------------------------+
-| penalty        | :math:`r(w)`                                    |
-+================+=================================================+
-| `None`         | :math:`0`                                       |
-+----------------+-------------------------------------------------+
-| :math:`\ell_1` | :math:`\|w\|_1`                                 |
-+----------------+-------------------------------------------------+
-| :math:`\ell_2` | :math:`\frac{1}{2}\|w\|_2^2 = \frac{1}{2}w^T w` |
-+----------------+-------------------------------------------------+
-| `ElasticNet`   | :math:`\frac{1 - \rho}{2}w^T w + \rho \|w\|_1`  |
-+----------------+-------------------------------------------------+
+We currently provide four choices for the regularization or penalty term :math:`r(w)`
+via the arguments `C` and `l1_ratio`:
+
++-------------------------------+-------------------------------------------------+
+| penalty                       | :math:`r(w)`                                    |
++===============================+=================================================+
+| none (`C=np.inf`)             | :math:`0`                                       |
++-------------------------------+-------------------------------------------------+
+| :math:`\ell_1` (`l1_ratio=1`) | :math:`\|w\|_1`                                 |
++-------------------------------+-------------------------------------------------+
+| :math:`\ell_2` (`l1_ratio=0`) | :math:`\frac{1}{2}\|w\|_2^2 = \frac{1}{2}w^T w` |
++-------------------------------+-------------------------------------------------+
+| ElasticNet (`0<l1_ratio<1`)   | :math:`\frac{1 - \rho}{2}w^T w + \rho \|w\|_1`  |
++-------------------------------+-------------------------------------------------+
 
 For ElasticNet, :math:`\rho` (which corresponds to the `l1_ratio` parameter)
 controls the strength of :math:`\ell_1` regularization vs. :math:`\ell_2`
@@ -1063,21 +1063,20 @@ logistic regression, see also `log-linear model
   Again, :math:`s_{ik}` are the weights assigned by the user (multiplication of sample
   weights and class weights) with their sum :math:`S = \sum_{i=1}^n \sum_{k=0}^{K-1} s_{ik}`.
 
-  We currently provide four choices
-  for the regularization term :math:`r(W)` via the `penalty` argument, where :math:`m`
-  is the number of features:
-
-  +----------------+----------------------------------------------------------------------------------+
-  | penalty        | :math:`r(W)`                                                                     |
-  +================+==================================================================================+
-  | `None`         | :math:`0`                                                                        |
-  +----------------+----------------------------------------------------------------------------------+
-  | :math:`\ell_1` | :math:`\|W\|_{1,1} = \sum_{i=1}^m\sum_{j=1}^{K}|W_{i,j}|`                        |
-  +----------------+----------------------------------------------------------------------------------+
-  | :math:`\ell_2` | :math:`\frac{1}{2}\|W\|_F^2 = \frac{1}{2}\sum_{i=1}^m\sum_{j=1}^{K} W_{i,j}^2`   |
-  +----------------+----------------------------------------------------------------------------------+
-  | `ElasticNet`   | :math:`\frac{1 - \rho}{2}\|W\|_F^2 + \rho \|W\|_{1,1}`                           |
-  +----------------+----------------------------------------------------------------------------------+
+  We currently provide four choices for the regularization or penalty term :math:`r(W)`
+  via the arguments `C` and `l1_ratio`, where :math:`m` is the number of features:
+
+  +-------------------------------+----------------------------------------------------------------------------------+
+  | penalty                       | :math:`r(W)`                                                                     |
+  +===============================+==================================================================================+
+  | none (`C=np.inf`)             | :math:`0`                                                                        |
+  +-------------------------------+----------------------------------------------------------------------------------+
+  | :math:`\ell_1` (`l1_ratio=1`) | :math:`\|W\|_{1,1} = \sum_{i=1}^m\sum_{j=1}^{K}|W_{i,j}|`                        |
+  +-------------------------------+----------------------------------------------------------------------------------+
+  | :math:`\ell_2` (`l1_ratio=0`) | :math:`\frac{1}{2}\|W\|_F^2 = \frac{1}{2}\sum_{i=1}^m\sum_{j=1}^{K} W_{i,j}^2`   |
+  +-------------------------------+----------------------------------------------------------------------------------+
+  | ElasticNet (`0<l1_ratio<1`)   | :math:`\frac{1 - \rho}{2}\|W\|_F^2 + \rho \|W\|_{1,1}`                           |
+  +-------------------------------+----------------------------------------------------------------------------------+
 
 .. _logistic_regression_solvers:
 
@@ -1100,7 +1099,7 @@ The following table summarizes the penalties and multinomial multiclass supporte
 +------------------------------+-------------+-----------------+-----------------+-----------------------+-----------+------------+
 | Elastic-Net (L1 + L2)        |     no      |       no        |       no        |     no                |    no     |    yes     |
 +------------------------------+-------------+-----------------+-----------------+-----------------------+-----------+------------+
-| No penalty ('none')          |     yes     |       no        |       yes       |     yes               |    yes    |    yes     |
+| No penalty                   |     yes     |       no        |       yes       |     yes               |    yes    |    yes     |
 +------------------------------+-------------+-----------------+-----------------+-----------------------+-----------+------------+
 | **Multiclass support**       |                                                                                                  |
 +------------------------------+-------------+-----------------+-----------------+-----------------------+-----------+------------+
@@ -1164,10 +1163,10 @@ zero, is likely to be an underfit, bad model and you are advised to set
     than other solvers for large datasets, when both the number of samples and the
     number of features are large.
 
-  * The "saga" solver [7]_ is a variant of "sag" that also supports the
-    non-smooth `penalty="l1"`. This is therefore the solver of choice for sparse
-    multinomial logistic regression. It is also the only solver that supports
-    `penalty="elasticnet"`.
+  * The "saga" solver [7]_ is a variant of "sag" that also supports the non-smooth
+    :math:`\ell_1` penalty (`l1_ratio=1`). This is therefore the solver of choice for
+    sparse multinomial logistic regression. It is also the only solver that supports
+    Elastic-Net (`0 < l1_ratio < 1`).
 
   * The "lbfgs" is an optimization algorithm that approximates the
     Broyden–Fletcher–Goldfarb–Shanno algorithm [8]_, which belongs to

diff --git a/doc/whats_new/upcoming_changes/sklearn.linear_model/32659.api.rst b/doc/whats_new/upcoming_changes/sklearn.linear_model/32659.api.rst
@@ -0,0 +1,27 @@
+- Parameter `penalty` of :class:`linear_model.LogisticRegression` and
+  :class:`linear_model.LogisticRegressionCV` is deprecated and will be removed in
+  version 1.10. The equivalent behaviour can be obtained as follows:
+
+  - for :class:`linear_model.LogisticRegression`
+
+    - use `l1_ratio=0` instead of `penalty="l2"`
+    - use `l1_ratio=1` instead of `penalty="l1"`
+    - use `0<l1_ratio<1` instead of `penalty="elasticnet"`
+    - use `C=np.inf` instead of `penalty=None`
+
+  - for :class:`linear_model.LogisticRegressionCV`
+
+    - use `l1_ratios=(0,)` instead of `penalty="l2"`
+    - use `l1_ratios=(1,)` instead of `penalty="l1"`
+    - the equivalent of `penalty=None` is to have `np.inf` as an element of the `Cs` parameter
+
+  For :class:`linear_model.LogisticRegression`, the default value of `l1_ratio`
+  has changed from `None` to `0.0`. Setting `l1_ratio=None` is deprecated and
+  will raise an error in version 1.10
+
+  For :class:`linear_model.LogisticRegressionCV`, the default value of `l1_ratios`
+  has changed from `None` to `"warn"`. It will be changed to `(0,)` in version
+  1.10. Setting `l1_ratios=None` is deprecated and will raise an error in
+  version 1.10.
+
+  By :user:`Christian Lorentzen <lorentzenchr>`.
diff --git a/examples/linear_model/plot_logistic_l1_l2_sparsity.py b/examples/linear_model/plot_logistic_l1_l2_sparsity.py
@@ -39,11 +39,9 @@
 # Set regularization parameter
 for i, (C, axes_row) in enumerate(zip((1, 0.1, 0.01), axes)):
     # Increase tolerance for short training time
-    clf_l1_LR = LogisticRegression(C=C, penalty="l1", tol=0.01, solver="saga")
-    clf_l2_LR = LogisticRegression(C=C, penalty="l2", tol=0.01, solver="saga")
-    clf_en_LR = LogisticRegression(
-        C=C, penalty="elasticnet", solver="saga", l1_ratio=l1_ratio, tol=0.01
-    )
+    clf_l1_LR = LogisticRegression(C=C, l1_ratio=1, tol=0.01, solver="saga")
+    clf_l2_LR = LogisticRegression(C=C, l1_ratio=0, tol=0.01, solver="saga")
+    clf_en_LR = LogisticRegression(C=C, l1_ratio=l1_ratio, tol=0.01, solver="saga")
     clf_l1_LR.fit(X, y)
     clf_l2_LR.fit(X, y)
     clf_en_LR.fit(X, y)

diff --git a/examples/linear_model/plot_logistic_path.py b/examples/linear_model/plot_logistic_path.py
@@ -65,7 +65,7 @@
 clf = make_pipeline(
     StandardScaler(),
     LogisticRegression(
-        penalty="l1",
+        l1_ratio=1,
         solver="liblinear",
         tol=1e-6,
         max_iter=int(1e6),

diff --git a/examples/linear_model/plot_sparse_logistic_regression_20newsgroups.py b/examples/linear_model/plot_sparse_logistic_regression_20newsgroups.py
@@ -79,8 +79,8 @@
             % (model_params["name"], solver, this_max_iter)
         )
         clf = LogisticRegression(
+            l1_ratio=1,
             solver=solver,
-            penalty="l1",
             max_iter=this_max_iter,
             random_state=42,
         )

diff --git a/examples/linear_model/plot_sparse_logistic_regression_mnist.py b/examples/linear_model/plot_sparse_logistic_regression_mnist.py
@@ -53,7 +53,7 @@
 X_test = scaler.transform(X_test)
 
 # Turn up tolerance for faster convergence
-clf = LogisticRegression(C=50.0 / train_samples, penalty="l1", solver="saga", tol=0.1)
+clf = LogisticRegression(C=50.0 / train_samples, l1_ratio=1, solver="saga", tol=0.1)
 clf.fit(X_train, y_train)
 sparsity = np.mean(clf.coef_ == 0) * 100
 score = clf.score(X_test, y_test)

diff --git a/examples/miscellaneous/plot_estimator_representation.py b/examples/miscellaneous/plot_estimator_representation.py
@@ -24,7 +24,7 @@
 # values when displayed as a string. This reduces the visual noise and makes it
 # easier to spot what the differences are when comparing instances.
 
-lr = LogisticRegression(penalty="l1")
+lr = LogisticRegression(l1_ratio=1)
 print(lr)
 
 # %%

diff --git a/sklearn/feature_selection/_from_model.py b/sklearn/feature_selection/_from_model.py
@@ -40,11 +40,20 @@ def _calculate_threshold(estimator, importances, threshold):
         is_elasticnetcv_l1_penalized = est_name == "ElasticNetCV" and (
             hasattr(estimator, "l1_ratio_") and np.isclose(estimator.l1_ratio_, 1.0)
         )
+        is_logreg_l1_penalized = est_name == "LogisticRegression" and (
+            hasattr(estimator, "l1_ratio") and np.isclose(estimator.l1_ratio, 1.0)
+        )
+        is_logregcv_l1_penalized = est_name == "LogisticRegressionCV" and (
+            hasattr(estimator, "l1_ratio_")
+            and np.all(np.isclose(estimator.l1_ratio_, 1.0))
+        )
         if (
             is_l1_penalized
             or is_lasso
             or is_elasticnet_l1_penalized
             or is_elasticnetcv_l1_penalized
+            or is_logreg_l1_penalized
+            or is_logregcv_l1_penalized
         ):
             # the natural default threshold is 0 when l1 penalty was used
             threshold = 1e-5