From 340945f775c7d82d1cbae6211b4929895a9401f6 Mon Sep 17 00:00:00 2001 From: "Peter St. John" Date: Sat, 11 Nov 2017 06:43:59 -0700 Subject: [PATCH 1/2] adding regression weights for BayesianRidge --- sklearn/linear_model/bayes.py | 18 +++++++++++++++--- sklearn/linear_model/tests/test_bayes.py | 15 +++++++++++++++ 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/sklearn/linear_model/bayes.py b/sklearn/linear_model/bayes.py index a094eec0cd935..dd16e5110fe75 100644 --- a/sklearn/linear_model/bayes.py +++ b/sklearn/linear_model/bayes.py @@ -11,7 +11,7 @@ from scipy import linalg from scipy.linalg import pinvh -from .base import LinearModel +from .base import LinearModel, _rescale_data from ..base import RegressorMixin from ..utils.extmath import fast_logdet from ..utils import check_X_y @@ -140,7 +140,7 @@ def __init__(self, n_iter=300, tol=1.e-3, alpha_1=1.e-6, alpha_2=1.e-6, self.copy_X = copy_X self.verbose = verbose - def fit(self, X, y): + def fit(self, X, y, sample_weight=None): """Fit the model Parameters @@ -150,13 +150,25 @@ def fit(self, X, y): y : numpy array of shape [n_samples] Target values. Will be cast to X's dtype if necessary + sample_weight : numpy array of shape [n_samples] + Individual weights for each sample + + .. versionadded:: 0.19.2 + parameter *sample_weight* support to BayesianRidge. + Returns ------- self : returns an instance of self. """ X, y = check_X_y(X, y, dtype=np.float64, y_numeric=True) X, y, X_offset_, y_offset_, X_scale_ = self._preprocess_data( - X, y, self.fit_intercept, self.normalize, self.copy_X) + X, y, self.fit_intercept, self.normalize, self.copy_X, + sample_weight=sample_weight) + + if sample_weight is not None: + # Sample weight can be implemented via a simple rescaling. + X, y = _rescale_data(X, y, sample_weight) + self.X_offset_ = X_offset_ self.X_scale_ = X_scale_ n_samples, n_features = X.shape diff --git a/sklearn/linear_model/tests/test_bayes.py b/sklearn/linear_model/tests/test_bayes.py index 492f77d693a13..5337c0a19c5cf 100644 --- a/sklearn/linear_model/tests/test_bayes.py +++ b/sklearn/linear_model/tests/test_bayes.py @@ -50,6 +50,21 @@ def test_bayesian_ridge_parameter(): assert_almost_equal(rr_model.intercept_, br_model.intercept_) +def test_bayesian_sample_weights(): + # Test correctness of the sample_weights method + X = np.array([[1, 1], [3, 4], [5, 7], [4, 1], [2, 6], [3, 10], [3, 2]]) + y = np.array([1, 2, 3, 2, 0, 4, 5]).T + w = np.array([4, 3, 3, 1, 1, 2, 3]).T + + # A Ridge regression model using an alpha value equal to the ratio of + # lambda_ and alpha_ from the Bayesian Ridge model must be identical + br_model = BayesianRidge(compute_score=True).fit(X, y, sample_weight=w) + rr_model = Ridge(alpha=br_model.lambda_ / br_model.alpha_).fit( + X, y, sample_weight=w) + assert_array_almost_equal(rr_model.coef_, br_model.coef_) + assert_almost_equal(rr_model.intercept_, br_model.intercept_) + + def test_toy_bayesian_ridge_object(): # Test BayesianRidge on toy X = np.array([[1], [2], [6], [8], [10]]) From 44f025aed56dfe9648a4c1cb81bd9eb73b344f7d Mon Sep 17 00:00:00 2001 From: "Peter St. John" Date: Mon, 13 Nov 2017 09:36:31 -0700 Subject: [PATCH 2/2] addressing reviews --- doc/whats_new/v0.20.rst | 4 ++++ sklearn/linear_model/bayes.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst index a01ffe41f9757..d4c4a950f3f0e 100644 --- a/doc/whats_new/v0.20.rst +++ b/doc/whats_new/v0.20.rst @@ -81,6 +81,10 @@ Classifiers and regressors ``inverse_func`` are the inverse of each other. :issue:`9399` by :user:`Guillaume Lemaitre `. +- Add `sample_weight` parameter to the fit method of + :class:`linear_model.BayesianRidge` for weighted linear regression. + :issue:`10111` by :user:`Peter St. John `. + Model evaluation and meta-estimators - A scorer based on :func:`metrics.brier_score_loss` is also available. diff --git a/sklearn/linear_model/bayes.py b/sklearn/linear_model/bayes.py index dd16e5110fe75..e754613cda381 100644 --- a/sklearn/linear_model/bayes.py +++ b/sklearn/linear_model/bayes.py @@ -153,7 +153,7 @@ def fit(self, X, y, sample_weight=None): sample_weight : numpy array of shape [n_samples] Individual weights for each sample - .. versionadded:: 0.19.2 + .. versionadded:: 0.20 parameter *sample_weight* support to BayesianRidge. Returns