Thanks to visit codestin.com
Credit goes to github.com

Skip to content

[MRG+1] Added sample_weight parameter to ransac.fit #6140

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 23 additions & 3 deletions sklearn/linear_model/ransac.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from ..utils.random import sample_without_replacement
from ..utils.validation import check_is_fitted
from .base import LinearRegression

from ..utils.validation import has_fit_parameter

_EPSILON = np.spacing(1)

Expand Down Expand Up @@ -177,7 +177,7 @@ def __init__(self, base_estimator=None, min_samples=None,
self.residual_metric = residual_metric
self.random_state = random_state

def fit(self, X, y):
def fit(self, X, y, sample_weight=None):
"""Fit estimator using RANSAC algorithm.

Parameters
Expand All @@ -188,6 +188,11 @@ def fit(self, X, y):
y : array-like, shape = [n_samples] or [n_samples, n_targets]
Target values.

sample_weight: array-like, shape = [n_samples]
Individual weights for each sample
raises error if sample_weight is passed and base_estimator
fit method does not support it.

Raises
------
ValueError
Expand Down Expand Up @@ -243,6 +248,17 @@ def fit(self, X, y):
except ValueError:
pass

estimator_fit_has_sample_weight = has_fit_parameter(base_estimator,
"sample_weight")
estimator_name = type(base_estimator).__name__
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you can reuse has_fit_parameter from sklearn.utils.validation

if (sample_weight is not None and not
estimator_fit_has_sample_weight):
raise ValueError("%s does not support sample_weight. Samples"
" weights are only used for the calibration"
" itself." % estimator_name)
if sample_weight is not None:
sample_weight = np.asarray(sample_weight)

n_inliers_best = 0
score_best = np.inf
inlier_mask_best = None
Expand All @@ -269,7 +285,11 @@ def fit(self, X, y):
continue

# fit model for current random sample set
base_estimator.fit(X_subset, y_subset)
if sample_weight is None:
base_estimator.fit(X_subset, y_subset)
else:
base_estimator.fit(X_subset, y_subset,
sample_weight=sample_weight[subset_idxs])

# check if estimated model is valid
if (self.is_model_valid is not None and not
Expand Down
56 changes: 50 additions & 6 deletions sklearn/linear_model/tests/test_ransac.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
import numpy as np
from numpy.testing import assert_equal, assert_raises
from numpy.testing import assert_array_almost_equal
from sklearn.utils.testing import assert_raises_regexp
from numpy.testing import assert_equal, assert_raises, assert_array_equal,assert_array_almost_equal
from sklearn.utils.testing import assert_raises_regexp, assert_almost_equal, assert_less
from scipy import sparse

from sklearn.utils.testing import assert_less
from sklearn.linear_model import LinearRegression, RANSACRegressor
from sklearn.utils import check_random_state
from sklearn.linear_model import LinearRegression, RANSACRegressor,Lasso
from sklearn.linear_model.ransac import _dynamic_max_trials


Expand Down Expand Up @@ -353,3 +351,49 @@ def test_ransac_dynamic_max_trials():
ransac_estimator = RANSACRegressor(base_estimator, min_samples=2,
stop_probability=1.1)
assert_raises(ValueError, ransac_estimator.fit, X, y)


def test_ransac_fit_sample_weight():
ransac_estimator = RANSACRegressor(random_state=0)
n_samples = y.shape[0]
weights = np.ones(n_samples)
ransac_estimator.fit(X, y, weights)
# sanity check
assert_equal(ransac_estimator.inlier_mask_.shape[0], n_samples)

ref_inlier_mask = np.ones_like(ransac_estimator.inlier_mask_
).astype(np.bool_)
ref_inlier_mask[outliers] = False
# check that mask is correct
assert_array_equal(ransac_estimator.inlier_mask_, ref_inlier_mask)

# check that fit(X) = fit([X1, X2, X3],sample_weight = [n1, n2, n3]) where
# X = X1 repeated n1 times, X2 repeated n2 times and so forth
random_state = check_random_state(0)
X_ = random_state.randint(0, 200, [10, 1])
y_ = np.ndarray.flatten(0.2 * X_ + 2)
sample_weight = random_state.randint(0, 10, 10)
outlier_X = random_state.randint(0, 1000, [1, 1])
outlier_weight = random_state.randint(0, 10, 1)
outlier_y = random_state.randint(-1000, 0, 1)

X_flat = np.append(np.repeat(X_, sample_weight, axis=0),
np.repeat(outlier_X, outlier_weight, axis=0), axis=0)
y_flat = np.ndarray.flatten(np.append(np.repeat(y_, sample_weight, axis=0),
np.repeat(outlier_y, outlier_weight, axis=0),
axis=0))
ransac_estimator.fit(X_flat, y_flat)
ref_coef_ = ransac_estimator.estimator_.coef_

sample_weight = np.append(sample_weight, outlier_weight)
X_ = np.append(X_, outlier_X, axis=0)
y_ = np.append(y_, outlier_y)
ransac_estimator.fit(X_, y_, sample_weight)

assert_almost_equal(ransac_estimator.estimator_.coef_, ref_coef_)

# check that if base_estimator.fit doesn't support
# sample_weight, raises error
base_estimator = Lasso()
ransac_estimator = RANSACRegressor(base_estimator)
assert_raises(ValueError, ransac_estimator.fit, X, y, weights)