diff --git a/sklearn/linear_model/ransac.py b/sklearn/linear_model/ransac.py index 12c45b26aa567..db60ddb90b378 100644 --- a/sklearn/linear_model/ransac.py +++ b/sklearn/linear_model/ransac.py @@ -11,7 +11,7 @@ from ..utils.random import sample_without_replacement from ..utils.validation import check_is_fitted from .base import LinearRegression - +from ..utils.validation import has_fit_parameter _EPSILON = np.spacing(1) @@ -177,7 +177,7 @@ def __init__(self, base_estimator=None, min_samples=None, self.residual_metric = residual_metric self.random_state = random_state - def fit(self, X, y): + def fit(self, X, y, sample_weight=None): """Fit estimator using RANSAC algorithm. Parameters @@ -188,6 +188,11 @@ def fit(self, X, y): y : array-like, shape = [n_samples] or [n_samples, n_targets] Target values. + sample_weight: array-like, shape = [n_samples] + Individual weights for each sample + raises error if sample_weight is passed and base_estimator + fit method does not support it. + Raises ------ ValueError @@ -243,6 +248,17 @@ def fit(self, X, y): except ValueError: pass + estimator_fit_has_sample_weight = has_fit_parameter(base_estimator, + "sample_weight") + estimator_name = type(base_estimator).__name__ + if (sample_weight is not None and not + estimator_fit_has_sample_weight): + raise ValueError("%s does not support sample_weight. Samples" + " weights are only used for the calibration" + " itself." % estimator_name) + if sample_weight is not None: + sample_weight = np.asarray(sample_weight) + n_inliers_best = 0 score_best = np.inf inlier_mask_best = None @@ -269,7 +285,11 @@ def fit(self, X, y): continue # fit model for current random sample set - base_estimator.fit(X_subset, y_subset) + if sample_weight is None: + base_estimator.fit(X_subset, y_subset) + else: + base_estimator.fit(X_subset, y_subset, + sample_weight=sample_weight[subset_idxs]) # check if estimated model is valid if (self.is_model_valid is not None and not diff --git a/sklearn/linear_model/tests/test_ransac.py b/sklearn/linear_model/tests/test_ransac.py index b646653acbe76..1a5f73c666292 100644 --- a/sklearn/linear_model/tests/test_ransac.py +++ b/sklearn/linear_model/tests/test_ransac.py @@ -1,11 +1,9 @@ import numpy as np -from numpy.testing import assert_equal, assert_raises -from numpy.testing import assert_array_almost_equal -from sklearn.utils.testing import assert_raises_regexp +from numpy.testing import assert_equal, assert_raises, assert_array_equal,assert_array_almost_equal +from sklearn.utils.testing import assert_raises_regexp, assert_almost_equal, assert_less from scipy import sparse - -from sklearn.utils.testing import assert_less -from sklearn.linear_model import LinearRegression, RANSACRegressor +from sklearn.utils import check_random_state +from sklearn.linear_model import LinearRegression, RANSACRegressor,Lasso from sklearn.linear_model.ransac import _dynamic_max_trials @@ -353,3 +351,49 @@ def test_ransac_dynamic_max_trials(): ransac_estimator = RANSACRegressor(base_estimator, min_samples=2, stop_probability=1.1) assert_raises(ValueError, ransac_estimator.fit, X, y) + + +def test_ransac_fit_sample_weight(): + ransac_estimator = RANSACRegressor(random_state=0) + n_samples = y.shape[0] + weights = np.ones(n_samples) + ransac_estimator.fit(X, y, weights) + # sanity check + assert_equal(ransac_estimator.inlier_mask_.shape[0], n_samples) + + ref_inlier_mask = np.ones_like(ransac_estimator.inlier_mask_ + ).astype(np.bool_) + ref_inlier_mask[outliers] = False + # check that mask is correct + assert_array_equal(ransac_estimator.inlier_mask_, ref_inlier_mask) + + # check that fit(X) = fit([X1, X2, X3],sample_weight = [n1, n2, n3]) where + # X = X1 repeated n1 times, X2 repeated n2 times and so forth + random_state = check_random_state(0) + X_ = random_state.randint(0, 200, [10, 1]) + y_ = np.ndarray.flatten(0.2 * X_ + 2) + sample_weight = random_state.randint(0, 10, 10) + outlier_X = random_state.randint(0, 1000, [1, 1]) + outlier_weight = random_state.randint(0, 10, 1) + outlier_y = random_state.randint(-1000, 0, 1) + + X_flat = np.append(np.repeat(X_, sample_weight, axis=0), + np.repeat(outlier_X, outlier_weight, axis=0), axis=0) + y_flat = np.ndarray.flatten(np.append(np.repeat(y_, sample_weight, axis=0), + np.repeat(outlier_y, outlier_weight, axis=0), + axis=0)) + ransac_estimator.fit(X_flat, y_flat) + ref_coef_ = ransac_estimator.estimator_.coef_ + + sample_weight = np.append(sample_weight, outlier_weight) + X_ = np.append(X_, outlier_X, axis=0) + y_ = np.append(y_, outlier_y) + ransac_estimator.fit(X_, y_, sample_weight) + + assert_almost_equal(ransac_estimator.estimator_.coef_, ref_coef_) + + # check that if base_estimator.fit doesn't support + # sample_weight, raises error + base_estimator = Lasso() + ransac_estimator = RANSACRegressor(base_estimator) + assert_raises(ValueError, ransac_estimator.fit, X, y, weights)