diff --git a/doc/whats_new/upcoming_changes/sklearn.linear_model/31888.api.rst b/doc/whats_new/upcoming_changes/sklearn.linear_model/31888.api.rst new file mode 100644 index 0000000000000..a1ac21999bb09 --- /dev/null +++ b/doc/whats_new/upcoming_changes/sklearn.linear_model/31888.api.rst @@ -0,0 +1,4 @@ +- Raising error in :class:`sklearn.linear_model.LogisticRegression` when + liblinear solver is used and input X values are larger than 1e30, + the liblinear solver freezes otherwise. + By :user:`Shruti Nath `. diff --git a/sklearn/linear_model/_logistic.py b/sklearn/linear_model/_logistic.py index f921f473da835..c803bdc0ba72d 100644 --- a/sklearn/linear_model/_logistic.py +++ b/sklearn/linear_model/_logistic.py @@ -1295,6 +1295,12 @@ def fit(self, X, y, sample_weight=None): multi_class = _check_multi_class(multi_class, solver, len(self.classes_)) if solver == "liblinear": + if np.max(X) > 1e30: + raise ValueError( + "Using the 'liblinear' solver while X contains a maximum " + "value > 1e30 results in a frozen fit. Please choose another " + "solver or rescale the input X." + ) if len(self.classes_) > 2: warnings.warn( "Using the 'liblinear' solver for multiclass classification is " diff --git a/sklearn/linear_model/tests/test_logistic.py b/sklearn/linear_model/tests/test_logistic.py index 6b08be5a95a0d..440c5d4fdbbb0 100644 --- a/sklearn/linear_model/tests/test_logistic.py +++ b/sklearn/linear_model/tests/test_logistic.py @@ -2319,6 +2319,23 @@ def test_large_sparse_matrix(solver, global_random_seed, csr_container): LogisticRegression(solver=solver).fit(X, y) +def test_liblinear_with_large_values(): + # Liblinear freezes when X.max() ~ 1e100, see issue #7486. + # We preemptively raise an error when X.max() > 1e30. + + # generate sparse matrix with int64 indices + X = np.array([0, 1e100]).reshape(-1, 1) + y = np.array([0, 1]) + + msg = ( + "Using the 'liblinear' solver while X contains a maximum " + "value > 1e30 results in a frozen fit. Please choose another " + "solver or rescale the input X." + ) + with pytest.raises(ValueError, match=msg): + LogisticRegression(solver="liblinear").fit(X, y) + + def test_single_feature_newton_cg(): # Test that Newton-CG works with a single feature and intercept. # Non-regression test for issue #23605.