scikit-learn · OmarManzoor · Feb 26, 2024 · Jan 25, 2024 · Jan 25, 2024 · Jan 25, 2024
diff --git a/doc/metadata_routing.rst b/doc/metadata_routing.rst
@@ -287,6 +287,7 @@ Meta-estimators and functions supporting metadata routing:
 - :class:`sklearn.linear_model.LogisticRegressionCV`
 - :class:`sklearn.linear_model.MultiTaskElasticNetCV`
 - :class:`sklearn.linear_model.MultiTaskLassoCV`
+- :class:`sklearn.linear_model.RANSACRegressor`
 - :class:`sklearn.model_selection.GridSearchCV`
 - :class:`sklearn.model_selection.HalvingGridSearchCV`
 - :class:`sklearn.model_selection.HalvingRandomSearchCV`
@@ -315,6 +316,7 @@ Meta-estimators and tools not supporting metadata routing yet:
 - :class:`sklearn.feature_selection.RFE`
 - :class:`sklearn.feature_selection.RFECV`
 - :class:`sklearn.feature_selection.SequentialFeatureSelector`
+- :class:`sklearn.impute.IterativeImputer`
 - :class:`sklearn.linear_model.RANSACRegressor`
 - :class:`sklearn.linear_model.RidgeClassifierCV`
 - :class:`sklearn.linear_model.RidgeCV`

diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
@@ -1536,10 +1536,10 @@ Each iteration performs the following steps:
 
 1. Select ``min_samples`` random samples from the original data and check
    whether the set of data is valid (see ``is_data_valid``).
-2. Fit a model to the random subset (``base_estimator.fit``) and check
+2. Fit a model to the random subset (``estimator.fit``) and check
    whether the estimated model is valid (see ``is_model_valid``).
 3. Classify all data as inliers or outliers by calculating the residuals
-   to the estimated model (``base_estimator.predict(X) - y``) - all data
+   to the estimated model (``estimator.predict(X) - y``) - all data
    samples with absolute residuals smaller than or equal to the
    ``residual_threshold`` are considered as inliers.
 4. Save fitted model as best model if number of inlier samples is

diff --git a/doc/whats_new/v1.5.rst b/doc/whats_new/v1.5.rst
@@ -48,6 +48,18 @@ more details.
   via their `fit` methods.
   :pr:`28432` by :user:`Adam Li <adam2392>` and :user:`Benjamin Bossan <BenjaminBossan>`.
 
+Metadata Routing
+----------------
+
+The following models now support metadata routing in one or more or their
+methods. Refer to the :ref:`Metadata Routing User Guide <metadata_routing>` for
+more details.
+
+- |Feature| :class:`linear_model.RANSACRegressor` now supports metadata routing
+  in its ``fit``, ``score`` and ``predict`` methods and route metadata to its
+  underlying estimator's' ``fit``, ``score`` and ``predict`` methods.
+  :pr:`28261` by :user:`Stefanie Senger <StefanieSenger>`.
+
 - |Feature| :class:`ensemble.VotingClassifier` and
   :class:`ensemble.VotingRegressor` now support metadata routing and pass
   ``**fit_params`` to the underlying estimators via their `fit` methods.

diff --git a/sklearn/linear_model/_ransac.py b/sklearn/linear_model/_ransac.py
@@ -17,6 +17,7 @@
 )
 from ..exceptions import ConvergenceWarning
 from ..utils import check_consistent_length, check_random_state
+from ..utils._bunch import Bunch
 from ..utils._param_validation import (
     HasMethods,
     Interval,
@@ -25,11 +26,20 @@
     StrOptions,
 )
 from ..utils.metadata_routing import (
-    _raise_for_unsupported_routing,
-    _RoutingNotSupportedMixin,
+    MetadataRouter,
+    MethodMapping,
+    _raise_for_params,
+    _routing_enabled,
+    process_routing,
 )
 from ..utils.random import sample_without_replacement
-from ..utils.validation import _check_sample_weight, check_is_fitted, has_fit_parameter
+from ..utils.validation import (
+    _check_method_params,
+    _check_sample_weight,
+    _deprecate_positional_args,
+    check_is_fitted,
+    has_fit_parameter,
+)
 from ._base import LinearRegression
 
 _EPSILON = np.spacing(1)
@@ -70,7 +80,6 @@ def _dynamic_max_trials(n_inliers, n_samples, min_samples, probability):
 
 
 class RANSACRegressor(
-    _RoutingNotSupportedMixin,
     MetaEstimatorMixin,
     RegressorMixin,
     MultiOutputMixin,
@@ -306,7 +315,11 @@ def __init__(
         # RansacRegressor.estimator is not validated yet
         prefer_skip_nested_validation=False
     )
-    def fit(self, X, y, sample_weight=None):
+    # TODO(1.7): remove `sample_weight` from the signature after deprecation
+    # cycle; for backwards compatibility: pop it from `fit_params` before the
+    # `_raise_for_params` check and reinsert it after the check
+    @_deprecate_positional_args(version="1.7")
+    def fit(self, X, y, *, sample_weight=None, **fit_params):
         """Fit estimator using RANSAC algorithm.
 
         Parameters
@@ -324,6 +337,17 @@ def fit(self, X, y, sample_weight=None):
 
             .. versionadded:: 0.18
 
+        **fit_params : dict
+            Parameters routed to the `fit` method of the sub-estimator via the
+            metadata routing API.
+
+            .. versionadded:: 1.5
+
+                Only available if
+                `sklearn.set_config(enable_metadata_routing=True)` is set. See
+                :ref:`Metadata Routing User Guide <metadata_routing>` for more
+                details.
+
         Returns
         -------
         self : object
@@ -336,10 +360,10 @@ def fit(self, X, y, sample_weight=None):
             `is_data_valid` and `is_model_valid` return False for all
             `max_trials` randomly chosen sub-samples.
         """
-        _raise_for_unsupported_routing(self, "fit", sample_weight=sample_weight)
         # Need to validate separately here. We can't pass multi_output=True
         # because that would allow y to be csr. Delay expensive finiteness
         # check to the estimator's own input validation.
+        _raise_for_params(fit_params, self, "fit")
         check_X_params = dict(accept_sparse="csr", force_all_finite=False)
         check_y_params = dict(ensure_2d=False)
         X, y = self._validate_data(
@@ -404,12 +428,22 @@ def fit(self, X, y, sample_weight=None):
         estimator_name = type(estimator).__name__
         if sample_weight is not None and not estimator_fit_has_sample_weight:
             raise ValueError(
-                "%s does not support sample_weight. Samples"
+                "%s does not support sample_weight. Sample"
                 " weights are only used for the calibration"
                 " itself." % estimator_name
             )
+
         if sample_weight is not None:
-            sample_weight = _check_sample_weight(sample_weight, X)
+            fit_params["sample_weight"] = sample_weight
+
+        if _routing_enabled():
+            routed_params = process_routing(self, "fit", **fit_params)
+        else:
+            routed_params = Bunch()
+            routed_params.estimator = Bunch(fit={}, predict={}, score={})
+            if sample_weight is not None:
+                sample_weight = _check_sample_weight(sample_weight, X)
+                routed_params.estimator.fit = {"sample_weight": sample_weight}
 
         n_inliers_best = 1
         score_best = -np.inf
@@ -451,13 +485,13 @@ def fit(self, X, y, sample_weight=None):
                 self.n_skips_invalid_data_ += 1
                 continue
 
+            # cut `fit_params` down to `subset_idxs`
+            fit_params_subset = _check_method_params(
+                X, params=routed_params.estimator.fit, indices=subset_idxs
+            )
+
             # fit model for current random sample set
-            if sample_weight is None:
-                estimator.fit(X_subset, y_subset)
-            else:
-                estimator.fit(
-                    X_subset, y_subset, sample_weight=sample_weight[subset_idxs]
-                )
+            estimator.fit(X_subset, y_subset, **fit_params_subset)
 
             # check if estimated model is valid
             if self.is_model_valid is not None and not self.is_model_valid(
@@ -484,8 +518,17 @@ def fit(self, X, y, sample_weight=None):
             X_inlier_subset = X[inlier_idxs_subset]
             y_inlier_subset = y[inlier_idxs_subset]
 
+            # cut `fit_params` down to `inlier_idxs_subset`
+            score_params_inlier_subset = _check_method_params(
+                X, params=routed_params.estimator.score, indices=inlier_idxs_subset
+            )
+
             # score of inlier data set
-            score_subset = estimator.score(X_inlier_subset, y_inlier_subset)
+            score_subset = estimator.score(
+                X_inlier_subset,
+                y_inlier_subset,
+                **score_params_inlier_subset,
+            )
 
             # same number of inliers but worse score -> skip current random
             # sample
@@ -549,20 +592,17 @@ def fit(self, X, y, sample_weight=None):
                 )
 
         # estimate final model using all inliers
-        if sample_weight is None:
-            estimator.fit(X_inlier_best, y_inlier_best)
-        else:
-            estimator.fit(
-                X_inlier_best,
-                y_inlier_best,
-                sample_weight=sample_weight[inlier_best_idxs_subset],
-            )
+        fit_params_best_idxs_subset = _check_method_params(
+            X, params=routed_params.estimator.fit, indices=inlier_best_idxs_subset
+        )
+
+        estimator.fit(X_inlier_best, y_inlier_best, **fit_params_best_idxs_subset)
 
         self.estimator_ = estimator
         self.inlier_mask_ = inlier_mask_best
         return self
 
-    def predict(self, X):
+    def predict(self, X, **params):
         """Predict using the estimated model.
 
         This is a wrapper for `estimator_.predict(X)`.
@@ -572,6 +612,17 @@ def predict(self, X):
         X : {array-like or sparse matrix} of shape (n_samples, n_features)
             Input data.
 
+        **params : dict
+            Parameters routed to the `predict` method of the sub-estimator via
+            the metadata routing API.
+
+            .. versionadded:: 1.5
+
+                Only available if
+                `sklearn.set_config(enable_metadata_routing=True)` is set. See
+                :ref:`Metadata Routing User Guide <metadata_routing>` for more
+                details.
+
         Returns
         -------
         y : array, shape = [n_samples] or [n_samples, n_targets]
@@ -584,9 +635,19 @@ def predict(self, X):
             accept_sparse=True,
             reset=False,
         )
-        return self.estimator_.predict(X)
 
-    def score(self, X, y):
+        _raise_for_params(params, self, "predict")
+
+        if _routing_enabled():
+            predict_params = process_routing(self, "predict", **params).estimator[
+                "predict"
+            ]
+        else:
+            predict_params = {}
+
+        return self.estimator_.predict(X, **predict_params)
+
+    def score(self, X, y, **params):
         """Return the score of the prediction.
 
         This is a wrapper for `estimator_.score(X, y)`.
@@ -599,6 +660,17 @@ def score(self, X, y):
         y : array-like of shape (n_samples,) or (n_samples, n_targets)
             Target values.
 
+        **params : dict
+            Parameters routed to the `score` method of the sub-estimator via
+            the metadata routing API.
+
+            .. versionadded:: 1.5
+
+                Only available if
+                `sklearn.set_config(enable_metadata_routing=True)` is set. See
+                :ref:`Metadata Routing User Guide <metadata_routing>` for more
+                details.
+
         Returns
         -------
         z : float
@@ -611,7 +683,38 @@ def score(self, X, y):
             accept_sparse=True,
             reset=False,
         )
-        return self.estimator_.score(X, y)
+
+        _raise_for_params(params, self, "score")
+        if _routing_enabled():
+            score_params = process_routing(self, "score", **params).estimator["score"]
+        else:
+            score_params = {}
+
+        return self.estimator_.score(X, y, **score_params)
+
+    def get_metadata_routing(self):
+        """Get metadata routing of this object.
+
+        Please check :ref:`User Guide <metadata_routing>` on how the routing
+        mechanism works.
+
+        .. versionadded:: 1.5
+
+        Returns
+        -------
+        routing : MetadataRouter
+            A :class:`~sklearn.utils.metadata_routing.MetadataRouter` encapsulating
+            routing information.
+        """
+        router = MetadataRouter(owner=self.__class__.__name__).add(
+            estimator=self.estimator,
+            method_mapping=MethodMapping()
+            .add(caller="fit", callee="fit")
+            .add(caller="fit", callee="score")
+            .add(caller="score", callee="score")
+            .add(caller="predict", callee="predict"),
+        )
+        return router
 
     def _more_tags(self):
         return {

diff --git a/sklearn/linear_model/tests/test_ransac.py b/sklearn/linear_model/tests/test_ransac.py
@@ -461,7 +461,7 @@ def test_ransac_fit_sample_weight():
     ransac_estimator = RANSACRegressor(random_state=0)
     n_samples = y.shape[0]
     weights = np.ones(n_samples)
-    ransac_estimator.fit(X, y, weights)
+    ransac_estimator.fit(X, y, sample_weight=weights)
     # sanity check
     assert ransac_estimator.inlier_mask_.shape[0] == n_samples
 
@@ -498,7 +498,7 @@ def test_ransac_fit_sample_weight():
     sample_weight = np.append(sample_weight, outlier_weight)
     X_ = np.append(X_, outlier_X, axis=0)
     y_ = np.append(y_, outlier_y)
-    ransac_estimator.fit(X_, y_, sample_weight)
+    ransac_estimator.fit(X_, y_, sample_weight=sample_weight)
 
     assert_allclose(ransac_estimator.estimator_.coef_, ref_coef_)
 
@@ -509,15 +509,15 @@ def test_ransac_fit_sample_weight():
 
     err_msg = f"{estimator.__class__.__name__} does not support sample_weight."
     with pytest.raises(ValueError, match=err_msg):
-        ransac_estimator.fit(X, y, weights)
+        ransac_estimator.fit(X, y, sample_weight=weights)
 
 
 def test_ransac_final_model_fit_sample_weight():
     X, y = make_regression(n_samples=1000, random_state=10)
     rng = check_random_state(42)
     sample_weight = rng.randint(1, 4, size=y.shape[0])
     sample_weight = sample_weight / sample_weight.sum()
-    ransac = RANSACRegressor(estimator=LinearRegression(), random_state=0)
+    ransac = RANSACRegressor(random_state=0)
     ransac.fit(X, y, sample_weight=sample_weight)
 
     final_model = LinearRegression()

diff --git a/sklearn/tests/metadata_routing_common.py b/sklearn/tests/metadata_routing_common.py
@@ -162,14 +162,17 @@ def fit(self, X, y, sample_weight="default", metadata="default"):
         )
         return self
 
-    def predict(self, X, sample_weight="default", metadata="default"):
-        pass  # pragma: no cover
+    def predict(self, X, y=None, sample_weight="default", metadata="default"):
+        record_metadata_not_default(
+            self, "predict", sample_weight=sample_weight, metadata=metadata
+        )
+        return np.zeros(shape=(len(X),))
 
-        # when needed, uncomment the implementation
-        # record_metadata_not_default(
-        #     self, "predict", sample_weight=sample_weight, metadata=metadata
-        # )
-        # return np.zeros(shape=(len(X),))
+    def score(self, X, y, sample_weight="default", metadata="default"):
+        record_metadata_not_default(
+            self, "score", sample_weight=sample_weight, metadata=metadata
+        )
+        return 1
 
 
 class NonConsumingClassifier(ClassifierMixin, BaseEstimator):
@@ -278,6 +281,13 @@ def decision_function(self, X, sample_weight="default", metadata="default"):
         )
         return np.zeros(shape=(len(X),))
 
+    # uncomment when needed
+    # def score(self, X, y, sample_weight="default", metadata="default"):
+    # record_metadata_not_default(
+    #    self, "score", sample_weight=sample_weight, metadata=metadata
+    # )
+    # return 1
+
 
 class ConsumingTransformer(TransformerMixin, BaseEstimator):
     """A transformer which accepts metadata on fit and transform.