From 4ae4726e41d055e00835d1764f2eabd2412d348d Mon Sep 17 00:00:00 2001 From: imaculate Date: Fri, 8 Jan 2016 14:47:08 +0200 Subject: [PATCH 1/7] Added sample_weight parameter to ransac.fit --- sklearn/linear_model/ransac.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/sklearn/linear_model/ransac.py b/sklearn/linear_model/ransac.py index 12c45b26aa567..c796db6df1456 100644 --- a/sklearn/linear_model/ransac.py +++ b/sklearn/linear_model/ransac.py @@ -11,7 +11,8 @@ from ..utils.random import sample_without_replacement from ..utils.validation import check_is_fitted from .base import LinearRegression - +from ..utils.fixes import signature +import warnings _EPSILON = np.spacing(1) @@ -177,7 +178,7 @@ def __init__(self, base_estimator=None, min_samples=None, self.residual_metric = residual_metric self.random_state = random_state - def fit(self, X, y): + def fit(self, X, y, sample_weight = None): """Fit estimator using RANSAC algorithm. Parameters @@ -243,6 +244,17 @@ def fit(self, X, y): except ValueError: pass + fit_parameters = signature(base_estimator.fit).parameters + estimator_name = type(base_estimator).__name__ + if (sample_weight is not None + and "sample_weight" not in fit_parameters): + warnings.warn("%s does not support sample_weight. Samples" + " weights are only used for the calibration" + " itself." % estimator_name) + base_estimator_sample_weight = None + else: + base_estimator_sample_weight = sample_weight + n_inliers_best = 0 score_best = np.inf inlier_mask_best = None @@ -269,7 +281,10 @@ def fit(self, X, y): continue # fit model for current random sample set - base_estimator.fit(X_subset, y_subset) + if base_estimator_sample_weight is None: + base_estimator.fit(X_subset, y_subset) + else: + base_estimator.fit(X_subset, y_subset, sample_weight= base_estimator_sample_weight[subset_idxs]) # check if estimated model is valid if (self.is_model_valid is not None and not From 033d4f45794422f8acf797f71d036b9518bac45b Mon Sep 17 00:00:00 2001 From: imaculate Date: Sat, 9 Jan 2016 11:17:30 +0200 Subject: [PATCH 2/7] revised the method fit for ransacRegressor to use utils.validation and raise an exception when sample_weight is passed to base_estimators that dont support it. --- sklearn/linear_model/ransac.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/sklearn/linear_model/ransac.py b/sklearn/linear_model/ransac.py index c796db6df1456..ab644f67dbc03 100644 --- a/sklearn/linear_model/ransac.py +++ b/sklearn/linear_model/ransac.py @@ -11,8 +11,7 @@ from ..utils.random import sample_without_replacement from ..utils.validation import check_is_fitted from .base import LinearRegression -from ..utils.fixes import signature -import warnings +from ..utils import validation _EPSILON = np.spacing(1) @@ -196,6 +195,10 @@ def fit(self, X, y, sample_weight = None): `is_data_valid` and `is_model_valid` return False for all `max_trials` randomly chosen sub-samples. + TypeError + If sample_weight is passed and the base estimator fit method + does not support it + """ X = check_array(X, accept_sparse='csr') y = check_array(y, ensure_2d=False) @@ -244,11 +247,11 @@ def fit(self, X, y, sample_weight = None): except ValueError: pass - fit_parameters = signature(base_estimator.fit).parameters + estimator_fit_has_sample_weight = validation.has_fit_parameters(base_estimator, "sample_weight") estimator_name = type(base_estimator).__name__ if (sample_weight is not None - and "sample_weight" not in fit_parameters): - warnings.warn("%s does not support sample_weight. Samples" + and not estimator_fit_has_sample_weight ): + raise TypeError("%s does not support sample_weight. Samples" " weights are only used for the calibration" " itself." % estimator_name) base_estimator_sample_weight = None From 49fc556f1158c0bb7c8ce691e90f1820928cee9c Mon Sep 17 00:00:00 2001 From: imaculate Date: Sat, 9 Jan 2016 15:23:09 +0200 Subject: [PATCH 3/7] more code refactoring --- sklearn/linear_model/ransac.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/linear_model/ransac.py b/sklearn/linear_model/ransac.py index ab644f67dbc03..add7325865a47 100644 --- a/sklearn/linear_model/ransac.py +++ b/sklearn/linear_model/ransac.py @@ -11,7 +11,7 @@ from ..utils.random import sample_without_replacement from ..utils.validation import check_is_fitted from .base import LinearRegression -from ..utils import validation +from ..utils.validation import has_fit_parameter _EPSILON = np.spacing(1) @@ -247,7 +247,7 @@ def fit(self, X, y, sample_weight = None): except ValueError: pass - estimator_fit_has_sample_weight = validation.has_fit_parameters(base_estimator, "sample_weight") + estimator_fit_has_sample_weight = has_fit_parameter(base_estimator, "sample_weight") estimator_name = type(base_estimator).__name__ if (sample_weight is not None and not estimator_fit_has_sample_weight ): From 3ef5088b585109ebc0d19c8dd681953964721d68 Mon Sep 17 00:00:00 2001 From: imaculate Date: Sat, 9 Jan 2016 16:39:18 +0200 Subject: [PATCH 4/7] added tests for the feature --- sklearn/linear_model/tests/test_ransac.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/sklearn/linear_model/tests/test_ransac.py b/sklearn/linear_model/tests/test_ransac.py index b646653acbe76..726f2942ffbe7 100644 --- a/sklearn/linear_model/tests/test_ransac.py +++ b/sklearn/linear_model/tests/test_ransac.py @@ -5,7 +5,7 @@ from scipy import sparse from sklearn.utils.testing import assert_less -from sklearn.linear_model import LinearRegression, RANSACRegressor +from sklearn.linear_model import LinearRegression, RANSACRegressor,Lasso from sklearn.linear_model.ransac import _dynamic_max_trials @@ -353,3 +353,14 @@ def test_ransac_dynamic_max_trials(): ransac_estimator = RANSACRegressor(base_estimator, min_samples=2, stop_probability=1.1) assert_raises(ValueError, ransac_estimator.fit, X, y) + +def test_ransac_fit_sample_weight(): + ransac_estimator = RANSACRegressor() + n_samples = y.shape[0] + weights = np.ones(n_samples) + ransac_estimator.fit(X, y, weights) + assert_equal(ransac_estimator.inlier_mask_.shape[0], n_samples) #sanity check + + base_estimator = Lasso() #check that if base_estimator.fit doesn't support sample_weight raises error + ransac_estimator = RANSACRegressor(base_estimator) + assert_raises(TypeError,ransac_estimator.fit,X,y, weights ) \ No newline at end of file From 466ab4db30a230b9658cf63a33ca58c9afec9166 Mon Sep 17 00:00:00 2001 From: imaculate Date: Mon, 11 Jan 2016 22:57:39 +0200 Subject: [PATCH 5/7] Modified the tests --- sklearn/linear_model/ransac.py | 10 +++--- sklearn/linear_model/tests/test_ransac.py | 39 +++++++++++++++++++---- 2 files changed, 37 insertions(+), 12 deletions(-) diff --git a/sklearn/linear_model/ransac.py b/sklearn/linear_model/ransac.py index add7325865a47..659e642aec969 100644 --- a/sklearn/linear_model/ransac.py +++ b/sklearn/linear_model/ransac.py @@ -188,6 +188,10 @@ def fit(self, X, y, sample_weight = None): y : array-like, shape = [n_samples] or [n_samples, n_targets] Target values. + sample_weight: numpy array of shape [n_samples] + Individual weights for each sample + raises error if sample_weight is passed and base_estimator fit method does not support it. + Raises ------ ValueError @@ -195,10 +199,6 @@ def fit(self, X, y, sample_weight = None): `is_data_valid` and `is_model_valid` return False for all `max_trials` randomly chosen sub-samples. - TypeError - If sample_weight is passed and the base estimator fit method - does not support it - """ X = check_array(X, accept_sparse='csr') y = check_array(y, ensure_2d=False) @@ -251,7 +251,7 @@ def fit(self, X, y, sample_weight = None): estimator_name = type(base_estimator).__name__ if (sample_weight is not None and not estimator_fit_has_sample_weight ): - raise TypeError("%s does not support sample_weight. Samples" + raise ValueError("%s does not support sample_weight. Samples" " weights are only used for the calibration" " itself." % estimator_name) base_estimator_sample_weight = None diff --git a/sklearn/linear_model/tests/test_ransac.py b/sklearn/linear_model/tests/test_ransac.py index 726f2942ffbe7..73e01660c00f1 100644 --- a/sklearn/linear_model/tests/test_ransac.py +++ b/sklearn/linear_model/tests/test_ransac.py @@ -1,10 +1,8 @@ import numpy as np -from numpy.testing import assert_equal, assert_raises -from numpy.testing import assert_array_almost_equal -from sklearn.utils.testing import assert_raises_regexp +from numpy.testing import assert_equal, assert_raises, assert_array_equal,assert_array_almost_equal +from sklearn.utils.testing import assert_raises_regexp, assert_almost_equal, assert_less from scipy import sparse - -from sklearn.utils.testing import assert_less +from sklearn.utils import check_random_state from sklearn.linear_model import LinearRegression, RANSACRegressor,Lasso from sklearn.linear_model.ransac import _dynamic_max_trials @@ -355,12 +353,39 @@ def test_ransac_dynamic_max_trials(): assert_raises(ValueError, ransac_estimator.fit, X, y) def test_ransac_fit_sample_weight(): - ransac_estimator = RANSACRegressor() + ransac_estimator = RANSACRegressor(random_state=0) n_samples = y.shape[0] weights = np.ones(n_samples) ransac_estimator.fit(X, y, weights) assert_equal(ransac_estimator.inlier_mask_.shape[0], n_samples) #sanity check + ref_inlier_mask = np.ones_like(ransac_estimator.inlier_mask_ + ).astype(np.bool_) + ref_inlier_mask[outliers] = False + assert_array_equal(ransac_estimator.inlier_mask_, ref_inlier_mask)#check that mask is correct + + #Now check that fit(X) = fit([X1, X2, X3],sample_weight = [n1, n2, n3]) where X = X1 repeated n1 times, X2 repeated n2 times and so forth + random_state = check_random_state(0) + X_ = random_state.randint(0, 200, [10,1]) + y_ = np.ndarray.flatten(0.2 *X_ +2) + sample_weight = random_state.randint(0, 10, 10) + outlier_X = random_state.randint(0,1000,[1,1]) + outlier_weight = random_state.randint(0,10,1 ) + outlier_y = random_state.randint(-1000,0,1) + + X_flat = np.append(np.repeat(X_, sample_weight, axis=0), np.repeat(outlier_X,outlier_weight, axis=0), axis=0) + y_flat = np.ndarray.flatten(np.append(np.repeat(y_, sample_weight, axis=0), np.repeat(outlier_y,outlier_weight, axis=0), axis=0)) + + ransac_estimator.fit(X_flat, y_flat) + ref_coef_ = ransac_estimator.estimator_.coef_ + + sample_weight = np.append(sample_weight, outlier_weight) + X_ = np.append(X_,outlier_X, axis = 0) + y_ = np.append(y_,outlier_y) + ransac_estimator.fit(X_, y_, sample_weight) + + assert_almost_equal(ransac_estimator.estimator_.coef_, ref_coef_) + base_estimator = Lasso() #check that if base_estimator.fit doesn't support sample_weight raises error ransac_estimator = RANSACRegressor(base_estimator) - assert_raises(TypeError,ransac_estimator.fit,X,y, weights ) \ No newline at end of file + assert_raises(ValueError,ransac_estimator.fit,X,y, weights ) From b63cba4918e195d30bd129f25c775cd99be91742 Mon Sep 17 00:00:00 2001 From: imaculate Date: Thu, 14 Jan 2016 10:16:48 +0200 Subject: [PATCH 6/7] Refactored code and fixed pep8 violations --- sklearn/linear_model/ransac.py | 30 +++++++++-------- sklearn/linear_model/tests/test_ransac.py | 40 ++++++++++++++--------- 2 files changed, 40 insertions(+), 30 deletions(-) diff --git a/sklearn/linear_model/ransac.py b/sklearn/linear_model/ransac.py index 659e642aec969..db60ddb90b378 100644 --- a/sklearn/linear_model/ransac.py +++ b/sklearn/linear_model/ransac.py @@ -177,7 +177,7 @@ def __init__(self, base_estimator=None, min_samples=None, self.residual_metric = residual_metric self.random_state = random_state - def fit(self, X, y, sample_weight = None): + def fit(self, X, y, sample_weight=None): """Fit estimator using RANSAC algorithm. Parameters @@ -188,9 +188,10 @@ def fit(self, X, y, sample_weight = None): y : array-like, shape = [n_samples] or [n_samples, n_targets] Target values. - sample_weight: numpy array of shape [n_samples] - Individual weights for each sample - raises error if sample_weight is passed and base_estimator fit method does not support it. + sample_weight: array-like, shape = [n_samples] + Individual weights for each sample + raises error if sample_weight is passed and base_estimator + fit method does not support it. Raises ------ @@ -247,16 +248,16 @@ def fit(self, X, y, sample_weight = None): except ValueError: pass - estimator_fit_has_sample_weight = has_fit_parameter(base_estimator, "sample_weight") + estimator_fit_has_sample_weight = has_fit_parameter(base_estimator, + "sample_weight") estimator_name = type(base_estimator).__name__ - if (sample_weight is not None - and not estimator_fit_has_sample_weight ): + if (sample_weight is not None and not + estimator_fit_has_sample_weight): raise ValueError("%s does not support sample_weight. Samples" - " weights are only used for the calibration" - " itself." % estimator_name) - base_estimator_sample_weight = None - else: - base_estimator_sample_weight = sample_weight + " weights are only used for the calibration" + " itself." % estimator_name) + if sample_weight is not None: + sample_weight = np.asarray(sample_weight) n_inliers_best = 0 score_best = np.inf @@ -284,10 +285,11 @@ def fit(self, X, y, sample_weight = None): continue # fit model for current random sample set - if base_estimator_sample_weight is None: + if sample_weight is None: base_estimator.fit(X_subset, y_subset) else: - base_estimator.fit(X_subset, y_subset, sample_weight= base_estimator_sample_weight[subset_idxs]) + base_estimator.fit(X_subset, y_subset, + sample_weight=sample_weight[subset_idxs]) # check if estimated model is valid if (self.is_model_valid is not None and not diff --git a/sklearn/linear_model/tests/test_ransac.py b/sklearn/linear_model/tests/test_ransac.py index 73e01660c00f1..cfa51bdee23d9 100644 --- a/sklearn/linear_model/tests/test_ransac.py +++ b/sklearn/linear_model/tests/test_ransac.py @@ -352,40 +352,48 @@ def test_ransac_dynamic_max_trials(): stop_probability=1.1) assert_raises(ValueError, ransac_estimator.fit, X, y) + def test_ransac_fit_sample_weight(): ransac_estimator = RANSACRegressor(random_state=0) n_samples = y.shape[0] weights = np.ones(n_samples) ransac_estimator.fit(X, y, weights) - assert_equal(ransac_estimator.inlier_mask_.shape[0], n_samples) #sanity check + # sanity check + assert_equal(ransac_estimator.inlier_mask_.shape[0], n_samples) ref_inlier_mask = np.ones_like(ransac_estimator.inlier_mask_ ).astype(np.bool_) ref_inlier_mask[outliers] = False - assert_array_equal(ransac_estimator.inlier_mask_, ref_inlier_mask)#check that mask is correct + # check that mask is correct + assert_array_equal(ransac_estimator.inlier_mask_, ref_inlier_mask) - #Now check that fit(X) = fit([X1, X2, X3],sample_weight = [n1, n2, n3]) where X = X1 repeated n1 times, X2 repeated n2 times and so forth + """ check that fit(X) = fit([X1, X2, X3],sample_weight = [n1, n2, n3]) where + X = X1 repeated n1 times, X2 repeated n2 times and so forth""" random_state = check_random_state(0) - X_ = random_state.randint(0, 200, [10,1]) - y_ = np.ndarray.flatten(0.2 *X_ +2) + X_ = random_state.randint(0, 200, [10, 1]) + y_ = np.ndarray.flatten(0.2 * X_ + 2) sample_weight = random_state.randint(0, 10, 10) - outlier_X = random_state.randint(0,1000,[1,1]) - outlier_weight = random_state.randint(0,10,1 ) - outlier_y = random_state.randint(-1000,0,1) - - X_flat = np.append(np.repeat(X_, sample_weight, axis=0), np.repeat(outlier_X,outlier_weight, axis=0), axis=0) - y_flat = np.ndarray.flatten(np.append(np.repeat(y_, sample_weight, axis=0), np.repeat(outlier_y,outlier_weight, axis=0), axis=0)) - + outlier_X = random_state.randint(0, 1000, [1, 1]) + outlier_weight = random_state.randint(0, 10, 1) + outlier_y = random_state.randint(-1000, 0, 1) + + X_flat = np.append(np.repeat(X_, sample_weight, axis=0), + np.repeat(outlier_X, outlier_weight, axis=0), axis=0) + y_flat = np.ndarray.flatten(np.append(np.repeat(y_, sample_weight, axis=0), + np.repeat(outlier_y, outlier_weight, axis=0), + axis=0)) ransac_estimator.fit(X_flat, y_flat) ref_coef_ = ransac_estimator.estimator_.coef_ sample_weight = np.append(sample_weight, outlier_weight) - X_ = np.append(X_,outlier_X, axis = 0) - y_ = np.append(y_,outlier_y) + X_ = np.append(X_, outlier_X, axis=0) + y_ = np.append(y_, outlier_y) ransac_estimator.fit(X_, y_, sample_weight) assert_almost_equal(ransac_estimator.estimator_.coef_, ref_coef_) - base_estimator = Lasso() #check that if base_estimator.fit doesn't support sample_weight raises error + """ check that if base_estimator.fit doesn't support + sample_weight, raises error""" + base_estimator = Lasso() ransac_estimator = RANSACRegressor(base_estimator) - assert_raises(ValueError,ransac_estimator.fit,X,y, weights ) + assert_raises(ValueError, ransac_estimator.fit, X, y, weights) From b77b3635ca8f6065b30864133d8ee5a8e3da9657 Mon Sep 17 00:00:00 2001 From: imaculate Date: Thu, 14 Jan 2016 19:06:44 +0200 Subject: [PATCH 7/7] Changed commit style --- sklearn/linear_model/tests/test_ransac.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sklearn/linear_model/tests/test_ransac.py b/sklearn/linear_model/tests/test_ransac.py index cfa51bdee23d9..1a5f73c666292 100644 --- a/sklearn/linear_model/tests/test_ransac.py +++ b/sklearn/linear_model/tests/test_ransac.py @@ -367,8 +367,8 @@ def test_ransac_fit_sample_weight(): # check that mask is correct assert_array_equal(ransac_estimator.inlier_mask_, ref_inlier_mask) - """ check that fit(X) = fit([X1, X2, X3],sample_weight = [n1, n2, n3]) where - X = X1 repeated n1 times, X2 repeated n2 times and so forth""" + # check that fit(X) = fit([X1, X2, X3],sample_weight = [n1, n2, n3]) where + # X = X1 repeated n1 times, X2 repeated n2 times and so forth random_state = check_random_state(0) X_ = random_state.randint(0, 200, [10, 1]) y_ = np.ndarray.flatten(0.2 * X_ + 2) @@ -392,8 +392,8 @@ def test_ransac_fit_sample_weight(): assert_almost_equal(ransac_estimator.estimator_.coef_, ref_coef_) - """ check that if base_estimator.fit doesn't support - sample_weight, raises error""" + # check that if base_estimator.fit doesn't support + # sample_weight, raises error base_estimator = Lasso() ransac_estimator = RANSACRegressor(base_estimator) assert_raises(ValueError, ransac_estimator.fit, X, y, weights)