Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions doc/whats_new/v1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -417,6 +417,11 @@ Changelog
- |Enhancement| Validate user-supplied gram matrix passed to linear models
via the `precompute` argument. :pr:`19004` by :user:`Adam Midvidy <amidvidy>`.

- |API| Raise a warning in :class:`linear_model.RANSACRegressor` that from
version 1.2, `min_samples` need to be set explicitly for model other than
:class:`linear_model.LinearRegression`.
:pr:`19390` by :user:`Shao Yang Hong <hongshaoyang>`.

- |Fix| :meth:`ElasticNet.fit` no longer modifies `sample_weight` in place.
:pr:`19055` by `Thomas Fan`_.

Expand Down
23 changes: 20 additions & 3 deletions sklearn/linear_model/_ransac.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,11 +86,20 @@ class RANSACRegressor(
min_samples : int (>= 1) or float ([0, 1]), default=None
Minimum number of samples chosen randomly from original data. Treated
as an absolute number of samples for `min_samples >= 1`, treated as a
relative number `ceil(min_samples * X.shape[0]`) for
relative number `ceil(min_samples * X.shape[0])` for
`min_samples < 1`. This is typically chosen as the minimal number of
samples necessary to estimate the given `base_estimator`. By default a
``sklearn.linear_model.LinearRegression()`` estimator is assumed and
`min_samples` is chosen as ``X.shape[1] + 1``.
`min_samples` is chosen as ``X.shape[1] + 1``. This parameter is highly
dependent upon the model, so if a `base_estimator` other than
:class:`linear_model.LinearRegression` is used, the user is
encouraged to provide a value.

.. deprecated :: 1.0
Not setting `min_samples` explicitly will raise an error in version
1.2 for models other than
:class:`~sklearn.linear_model.LinearRegression`. To keep the old
default behavior, set `min_samples=X.shape[1] + 1` explicitly.

residual_threshold : float, default=None
Maximum residual for a data sample to be classified as an inlier.
Expand Down Expand Up @@ -289,7 +298,15 @@ def fit(self, X, y, sample_weight=None):
base_estimator = LinearRegression()

if self.min_samples is None:
# assume linear model by default
if not isinstance(base_estimator, LinearRegression):
# FIXME: in 1.2, turn this warning into an error
warnings.warn(
"From version 1.2, `min_samples` needs to be explicitely "
"set otherwise an error will be raised. To keep the "
"current behavior, you need to set `min_samples` to "
f"`X.shape[1] + 1 that is {X.shape[1] + 1}",
FutureWarning,
)
min_samples = X.shape[1] + 1
elif 0 < self.min_samples < 1:
min_samples = np.ceil(self.min_samples * X.shape[0])
Expand Down
15 changes: 12 additions & 3 deletions sklearn/linear_model/tests/test_ransac.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from sklearn.utils import check_random_state
from sklearn.utils._testing import assert_allclose
from sklearn.datasets import make_regression
from sklearn.linear_model import LinearRegression, RANSACRegressor
from sklearn.linear_model import LinearRegression, RANSACRegressor, Ridge
from sklearn.linear_model import OrthogonalMatchingPursuit
from sklearn.linear_model._ransac import _dynamic_max_trials
from sklearn.exceptions import ConvergenceWarning
Expand Down Expand Up @@ -358,6 +358,10 @@ def test_ransac_min_n_samples():
ransac_estimator7 = RANSACRegressor(
base_estimator, min_samples=X.shape[0] + 1, residual_threshold=5, random_state=0
)
# GH #19390
ransac_estimator8 = RANSACRegressor(
Ridge(), min_samples=None, residual_threshold=5, random_state=0
)

ransac_estimator1.fit(X, y)
ransac_estimator2.fit(X, y)
Expand All @@ -383,6 +387,10 @@ def test_ransac_min_n_samples():
with pytest.raises(ValueError):
ransac_estimator7.fit(X, y)

err_msg = "From version 1.2, `min_samples` needs to be explicitely set"
with pytest.warns(FutureWarning, match=err_msg):
ransac_estimator8.fit(X, y)


def test_ransac_multi_dimensional_targets():

Expand Down Expand Up @@ -574,9 +582,10 @@ def test_ransac_fit_sample_weight():
# check that if base_estimator.fit doesn't support
# sample_weight, raises error
base_estimator = OrthogonalMatchingPursuit()
ransac_estimator = RANSACRegressor(base_estimator)
ransac_estimator = RANSACRegressor(base_estimator, min_samples=10)

with pytest.raises(ValueError):
err_msg = f"{base_estimator.__class__.__name__} does not support sample_weight."
with pytest.raises(ValueError, match=err_msg):
ransac_estimator.fit(X, y, weights)


Expand Down
10 changes: 9 additions & 1 deletion sklearn/utils/estimator_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@
from ._testing import raises
from . import is_scalar_nan

from ..linear_model import LinearRegression
from ..linear_model import LogisticRegression
from ..linear_model import RANSACRegressor
from ..linear_model import Ridge

from ..base import (
Expand Down Expand Up @@ -354,7 +356,13 @@ def _construct_instance(Estimator):
required_parameters = getattr(Estimator, "_required_parameters", [])
if len(required_parameters):
if required_parameters in (["estimator"], ["base_estimator"]):
if issubclass(Estimator, RegressorMixin):
# `RANSACRegressor` will raise an error with any model other
# than `LinearRegression` if we don't fix `min_samples` parameter.
# For common test, we can enforce using `LinearRegression` that
# is the default estimator in `RANSACRegressor` instead of `Ridge`.
if issubclass(Estimator, RANSACRegressor):
estimator = Estimator(LinearRegression())
elif issubclass(Estimator, RegressorMixin):
estimator = Estimator(Ridge())
else:
estimator = Estimator(LogisticRegression(C=1))
Expand Down