From 8dd7f16bef2e0525ed2a65cc8bd3fd3b2affb0af Mon Sep 17 00:00:00 2001 From: dsullivan7 Date: Wed, 3 Dec 2014 17:15:49 +0100 Subject: [PATCH 1/5] adding support for class_weight in fit method --- sklearn/linear_model/stochastic_gradient.py | 23 ++++++++++++-- sklearn/linear_model/tests/test_sgd.py | 35 +++++++++++++++++++++ 2 files changed, 55 insertions(+), 3 deletions(-) diff --git a/sklearn/linear_model/stochastic_gradient.py b/sklearn/linear_model/stochastic_gradient.py index 4577f32fd7761..9be219112bb91 100644 --- a/sklearn/linear_model/stochastic_gradient.py +++ b/sklearn/linear_model/stochastic_gradient.py @@ -394,7 +394,7 @@ def _partial_fit(self, X, y, alpha, C, return self def _fit(self, X, y, alpha, C, loss, learning_rate, coef_init=None, - intercept_init=None, sample_weight=None): + intercept_init=None, sample_weight=None, class_weight=None): if hasattr(self, "classes_"): self.classes_ = None @@ -405,6 +405,8 @@ def _fit(self, X, y, alpha, C, loss, learning_rate, coef_init=None, # np.unique sorts in asc order; largest class id is positive class classes = np.unique(y) + if class_weight is not None: + self.class_weight = class_weight if self.warm_start and self.coef_ is not None: if coef_init is None: coef_init = self.coef_ @@ -524,7 +526,7 @@ def partial_fit(self, X, y, classes=None, sample_weight=None): coef_init=None, intercept_init=None) def fit(self, X, y, coef_init=None, intercept_init=None, - class_weight=None, sample_weight=None): + sample_weight=None, class_weight=None): """Fit linear model with Stochastic Gradient Descent. Parameters @@ -543,7 +545,21 @@ def fit(self, X, y, coef_init=None, intercept_init=None, sample_weight : array-like, shape (n_samples,), optional Weights applied to individual samples. - If not provided, uniform weights are assumed. + If not provided, uniform weights are assumed. These weights will + be multiplied with the class_weight if the class_weight is + specified + + class_weight : dict, {class_label: weight} or "auto" or None, optional + Weights associated with classes. If not given, all classes + are supposed to have weight one. + + This parameter will overwrite the class_weight parameter passed + into the constructor. If both the sample_weight and + class_weight are specified, the weights will be multiplied + together + + The "auto" mode uses the values of y to automatically adjust + weights inversely proportional to class frequencies. Returns ------- @@ -552,6 +568,7 @@ def fit(self, X, y, coef_init=None, intercept_init=None, return self._fit(X, y, alpha=self.alpha, C=1.0, loss=self.loss, learning_rate=self.learning_rate, coef_init=coef_init, intercept_init=intercept_init, + class_weight=class_weight, sample_weight=sample_weight) diff --git a/sklearn/linear_model/tests/test_sgd.py b/sklearn/linear_model/tests/test_sgd.py index 6085ecb31bbb4..7ba708015c7b4 100644 --- a/sklearn/linear_model/tests/test_sgd.py +++ b/sklearn/linear_model/tests/test_sgd.py @@ -597,6 +597,41 @@ def test_wrong_class_weight_format(self): clf = self.factory(alpha=0.1, n_iter=1000, class_weight=[0.5]) clf.fit(X, Y) + def test_weights_in_fit(self): + """Tests to see if weights passed through fit method""" + weights = {1: .6, 2: .3} + different_weights = {1: 1.1, 2: .7} + + clf1 = self.factory(alpha=0.1, n_iter=20, class_weight=weights) + clf2 = self.factory(alpha=0.1, n_iter=20, + class_weight=different_weights) + + clf1.fit(X4, Y4) + clf2.fit(X4, Y4) + clf2.fit(X4, Y4, class_weight=weights) + + assert_array_equal(clf1.coef_, clf2.coef_) + + def test_weights_multiplied(self): + """Tests that class_weight and sample_weight are multiplicative""" + class_weights = {1: .6, 2: .3} + sample_weights = np.random.random(Y4.shape[0]) + multiplied_together = np.copy(sample_weights) + multiplied_together[Y4 == 1] *= class_weights[1] + multiplied_together[Y4 == 2] *= class_weights[2] + + clf1 = self.factory(alpha=0.1, n_iter=20, class_weight=class_weights) + clf2 = self.factory(alpha=0.1, n_iter=20) + clf3 = self.factory(alpha=0.1, n_iter=20) + + clf1.fit(X4, Y4, sample_weight=sample_weights) + clf2.fit(X4, Y4, sample_weight=multiplied_together) + clf3.fit(X4, Y4, class_weight=class_weights, + sample_weight=sample_weights) + + assert_array_equal(clf1.coef_, clf2.coef_) + assert_array_equal(clf2.coef_, clf3.coef_) + def test_auto_weight(self): """Test class weights for imbalanced data""" # compute reference metrics on iris dataset that is quite balanced by From afe890b2a8aba25e5e111aa224387f453c5653a7 Mon Sep 17 00:00:00 2001 From: dsullivan7 Date: Thu, 4 Dec 2014 13:17:34 +0100 Subject: [PATCH 2/5] adding warning if passing class_weight through fit --- sklearn/linear_model/stochastic_gradient.py | 29 +++++++------------- sklearn/linear_model/tests/test_sgd.py | 30 +++++++++------------ 2 files changed, 23 insertions(+), 36 deletions(-) diff --git a/sklearn/linear_model/stochastic_gradient.py b/sklearn/linear_model/stochastic_gradient.py index 9be219112bb91..8daeb3a7cf8f8 100644 --- a/sklearn/linear_model/stochastic_gradient.py +++ b/sklearn/linear_model/stochastic_gradient.py @@ -6,6 +6,7 @@ import numpy as np import scipy.sparse as sp +import warnings from abc import ABCMeta, abstractmethod @@ -394,7 +395,7 @@ def _partial_fit(self, X, y, alpha, C, return self def _fit(self, X, y, alpha, C, loss, learning_rate, coef_init=None, - intercept_init=None, sample_weight=None, class_weight=None): + intercept_init=None, sample_weight=None): if hasattr(self, "classes_"): self.classes_ = None @@ -405,8 +406,6 @@ def _fit(self, X, y, alpha, C, loss, learning_rate, coef_init=None, # np.unique sorts in asc order; largest class id is positive class classes = np.unique(y) - if class_weight is not None: - self.class_weight = class_weight if self.warm_start and self.coef_ is not None: if coef_init is None: coef_init = self.coef_ @@ -526,7 +525,7 @@ def partial_fit(self, X, y, classes=None, sample_weight=None): coef_init=None, intercept_init=None) def fit(self, X, y, coef_init=None, intercept_init=None, - sample_weight=None, class_weight=None): + class_weight=None, sample_weight=None): """Fit linear model with Stochastic Gradient Descent. Parameters @@ -546,29 +545,21 @@ def fit(self, X, y, coef_init=None, intercept_init=None, sample_weight : array-like, shape (n_samples,), optional Weights applied to individual samples. If not provided, uniform weights are assumed. These weights will - be multiplied with the class_weight if the class_weight is - specified - - class_weight : dict, {class_label: weight} or "auto" or None, optional - Weights associated with classes. If not given, all classes - are supposed to have weight one. - - This parameter will overwrite the class_weight parameter passed - into the constructor. If both the sample_weight and - class_weight are specified, the weights will be multiplied - together - - The "auto" mode uses the values of y to automatically adjust - weights inversely proportional to class frequencies. + be multiplied with class_weight (passed through the + contructor) if class_weight is specified Returns ------- self : returns an instance of self. """ + if class_weight is not None: + warnings.warn("You are trying to set class_weight through the fit " + "method, which will not be possible in a later " + "version of scikit. Pass the class_weight into " + "the constructor instead.") return self._fit(X, y, alpha=self.alpha, C=1.0, loss=self.loss, learning_rate=self.learning_rate, coef_init=coef_init, intercept_init=intercept_init, - class_weight=class_weight, sample_weight=sample_weight) diff --git a/sklearn/linear_model/tests/test_sgd.py b/sklearn/linear_model/tests/test_sgd.py index 7ba708015c7b4..14b6d175abeae 100644 --- a/sklearn/linear_model/tests/test_sgd.py +++ b/sklearn/linear_model/tests/test_sgd.py @@ -597,20 +597,20 @@ def test_wrong_class_weight_format(self): clf = self.factory(alpha=0.1, n_iter=1000, class_weight=[0.5]) clf.fit(X, Y) - def test_weights_in_fit(self): - """Tests to see if weights passed through fit method""" - weights = {1: .6, 2: .3} - different_weights = {1: 1.1, 2: .7} + def test_class_weight_warning(self): + """Tests that class_weight passed through fit raises warning. + This test should be removed after deprecating support for this""" - clf1 = self.factory(alpha=0.1, n_iter=20, class_weight=weights) - clf2 = self.factory(alpha=0.1, n_iter=20, - class_weight=different_weights) - - clf1.fit(X4, Y4) - clf2.fit(X4, Y4) - clf2.fit(X4, Y4, class_weight=weights) - - assert_array_equal(clf1.coef_, clf2.coef_) + clf = self.factory() + warning_message = ("You are trying to set class_weight through the " + "fit " + "method, which will not be possible in a later " + "version of scikit. Pass the class_weight into " + "the constructor instead.") + import warnings + with warnings.catch_warnings(record=True) as w: + clf.fit(X4, Y4, class_weight=1) + assert_true(warning_message == str(w[-1].message)) def test_weights_multiplied(self): """Tests that class_weight and sample_weight are multiplicative""" @@ -622,15 +622,11 @@ def test_weights_multiplied(self): clf1 = self.factory(alpha=0.1, n_iter=20, class_weight=class_weights) clf2 = self.factory(alpha=0.1, n_iter=20) - clf3 = self.factory(alpha=0.1, n_iter=20) clf1.fit(X4, Y4, sample_weight=sample_weights) clf2.fit(X4, Y4, sample_weight=multiplied_together) - clf3.fit(X4, Y4, class_weight=class_weights, - sample_weight=sample_weights) assert_array_equal(clf1.coef_, clf2.coef_) - assert_array_equal(clf2.coef_, clf3.coef_) def test_auto_weight(self): """Test class weights for imbalanced data""" From b99824fb46199c35827033b7e9637813b5e01a34 Mon Sep 17 00:00:00 2001 From: dsullivan7 Date: Fri, 5 Dec 2014 09:39:39 +0100 Subject: [PATCH 3/5] changing warn message, making it Deprecation Warning, and removing negative index for py2.7 --- sklearn/linear_model/stochastic_gradient.py | 6 +++--- sklearn/linear_model/tests/test_sgd.py | 7 ++++--- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/sklearn/linear_model/stochastic_gradient.py b/sklearn/linear_model/stochastic_gradient.py index 8daeb3a7cf8f8..e0f5c76dc7bb3 100644 --- a/sklearn/linear_model/stochastic_gradient.py +++ b/sklearn/linear_model/stochastic_gradient.py @@ -554,9 +554,9 @@ def fit(self, X, y, coef_init=None, intercept_init=None, """ if class_weight is not None: warnings.warn("You are trying to set class_weight through the fit " - "method, which will not be possible in a later " - "version of scikit. Pass the class_weight into " - "the constructor instead.") + "method, which will be deprecated in version " + "v0.17 of scikit-learn. Pass the class_weight into " + "the constructor instead.", DeprecationWarning) return self._fit(X, y, alpha=self.alpha, C=1.0, loss=self.loss, learning_rate=self.learning_rate, coef_init=coef_init, intercept_init=intercept_init, diff --git a/sklearn/linear_model/tests/test_sgd.py b/sklearn/linear_model/tests/test_sgd.py index 14b6d175abeae..56357bab6b675 100644 --- a/sklearn/linear_model/tests/test_sgd.py +++ b/sklearn/linear_model/tests/test_sgd.py @@ -604,13 +604,14 @@ def test_class_weight_warning(self): clf = self.factory() warning_message = ("You are trying to set class_weight through the " "fit " - "method, which will not be possible in a later " - "version of scikit. Pass the class_weight into " + "method, which will be deprecated in version " + "v0.17 of scikit-learn. Pass the class_weight into " "the constructor instead.") import warnings with warnings.catch_warnings(record=True) as w: clf.fit(X4, Y4, class_weight=1) - assert_true(warning_message == str(w[-1].message)) + assert_true(warning_message == str(w[0].message)) + assert_true(issubclass(w[0].category, DeprecationWarning)) def test_weights_multiplied(self): """Tests that class_weight and sample_weight are multiplicative""" From 5a019b7202fb887506ed6ae2ef1c1716ca9a6f7a Mon Sep 17 00:00:00 2001 From: dsullivan7 Date: Wed, 10 Dec 2014 16:07:46 +0100 Subject: [PATCH 4/5] adding simplefilter for warning --- sklearn/linear_model/tests/test_sgd.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sklearn/linear_model/tests/test_sgd.py b/sklearn/linear_model/tests/test_sgd.py index 56357bab6b675..1845b1418fdb1 100644 --- a/sklearn/linear_model/tests/test_sgd.py +++ b/sklearn/linear_model/tests/test_sgd.py @@ -609,6 +609,7 @@ def test_class_weight_warning(self): "the constructor instead.") import warnings with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always", DeprecationWarning) clf.fit(X4, Y4, class_weight=1) assert_true(warning_message == str(w[0].message)) assert_true(issubclass(w[0].category, DeprecationWarning)) From 6cb191cfd88c52f90d6983487011c30129217c15 Mon Sep 17 00:00:00 2001 From: dsullivan7 Date: Fri, 12 Dec 2014 13:27:49 +0100 Subject: [PATCH 5/5] changing warning test to use assert_raises_message --- sklearn/linear_model/tests/test_sgd.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/sklearn/linear_model/tests/test_sgd.py b/sklearn/linear_model/tests/test_sgd.py index 1845b1418fdb1..a817088fb983d 100644 --- a/sklearn/linear_model/tests/test_sgd.py +++ b/sklearn/linear_model/tests/test_sgd.py @@ -14,6 +14,7 @@ from sklearn.utils.testing import assert_false, assert_true from sklearn.utils.testing import assert_equal from sklearn.utils.testing import assert_raises_regexp +from sklearn.utils.testing import assert_warns_message from sklearn import linear_model, datasets, metrics from sklearn.base import clone @@ -607,12 +608,10 @@ def test_class_weight_warning(self): "method, which will be deprecated in version " "v0.17 of scikit-learn. Pass the class_weight into " "the constructor instead.") - import warnings - with warnings.catch_warnings(record=True) as w: - warnings.simplefilter("always", DeprecationWarning) - clf.fit(X4, Y4, class_weight=1) - assert_true(warning_message == str(w[0].message)) - assert_true(issubclass(w[0].category, DeprecationWarning)) + assert_warns_message(DeprecationWarning, + warning_message, + clf.fit, X4, Y4, + class_weight=1) def test_weights_multiplied(self): """Tests that class_weight and sample_weight are multiplicative"""