From 07b474cefc1e13996c4e04d9bb1e9f3d54ce0e33 Mon Sep 17 00:00:00 2001 From: Dylan Werner-Meier Date: Thu, 25 Feb 2016 13:37:06 +0100 Subject: [PATCH 1/9] Fix issue #6298 Adds a "classes_" property to BaseSearchCV --- sklearn/grid_search.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sklearn/grid_search.py b/sklearn/grid_search.py index 236ef6f411492..65902c526540d 100644 --- a/sklearn/grid_search.py +++ b/sklearn/grid_search.py @@ -386,6 +386,10 @@ def __init__(self, estimator, scoring=None, def _estimator_type(self): return self.estimator._estimator_type + @property + def classes_(self): + return self.best_estimator_.classes_ + def score(self, X, y=None): """Returns the score on the given data, if the estimator has been refit. From 0d797160f2dd1066b9417bc7f83a89f2bde7827b Mon Sep 17 00:00:00 2001 From: Alyssa Batula Date: Thu, 13 Oct 2016 00:47:24 -0400 Subject: [PATCH 2/9] Added test to ensure classes_ property is added to gridSearch correctly --- sklearn/tests/test_grid_search.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/sklearn/tests/test_grid_search.py b/sklearn/tests/test_grid_search.py index 13b9086310595..dd70a4775b201 100644 --- a/sklearn/tests/test_grid_search.py +++ b/sklearn/tests/test_grid_search.py @@ -785,3 +785,16 @@ def test_parameters_sampler_replacement(): sampler = ParameterSampler(params_distribution, n_iter=7) samples = list(sampler) assert_equal(len(samples), 7) + + +def test_classes__property(): + # Test that classes_ property matches best_esimator_.classes_ + X = np.arange(100).reshape(10, 10) + y = np.array([0] * 5 + [1] * 5) + Cs = [.1, 1, 10] + + clf = LinearSVC(random_state=0) + + grid_search = GridSearchCV(clf, {'C': Cs}, scoring='accuracy') + grid_search.fit(X, y) + assert_array_equal(grid_search.best_estimator_.classes_, grid_search.classes_) From b8ee33dcbe72c0cf40cc8c074279118950284160 Mon Sep 17 00:00:00 2001 From: Alyssa Batula Date: Sat, 15 Oct 2016 21:59:47 -0400 Subject: [PATCH 3/9] Fixed formatting --- sklearn/tests/test_grid_search.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sklearn/tests/test_grid_search.py b/sklearn/tests/test_grid_search.py index dd70a4775b201..d0005456c45f9 100644 --- a/sklearn/tests/test_grid_search.py +++ b/sklearn/tests/test_grid_search.py @@ -792,9 +792,10 @@ def test_classes__property(): X = np.arange(100).reshape(10, 10) y = np.array([0] * 5 + [1] * 5) Cs = [.1, 1, 10] - + clf = LinearSVC(random_state=0) grid_search = GridSearchCV(clf, {'C': Cs}, scoring='accuracy') grid_search.fit(X, y) - assert_array_equal(grid_search.best_estimator_.classes_, grid_search.classes_) + assert_array_equal(grid_search.best_estimator_.classes_, + grid_search.classes_) From 5c44632aadc925b035c97ab1bf30ea73c2fbb140 Mon Sep 17 00:00:00 2001 From: Alyssa Batula Date: Sat, 15 Oct 2016 22:27:32 -0400 Subject: [PATCH 4/9] Added test to ensure gridSearchCV with a regressor does not have a classes_ attribute --- sklearn/tests/test_grid_search.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/sklearn/tests/test_grid_search.py b/sklearn/tests/test_grid_search.py index d0005456c45f9..0f753603e3461 100644 --- a/sklearn/tests/test_grid_search.py +++ b/sklearn/tests/test_grid_search.py @@ -42,6 +42,7 @@ from sklearn.metrics import f1_score from sklearn.metrics import make_scorer from sklearn.metrics import roc_auc_score +from sklearn.linear_model import Ridge from sklearn.exceptions import ChangedBehaviorWarning from sklearn.exceptions import FitFailedWarning @@ -799,3 +800,15 @@ def test_classes__property(): grid_search.fit(X, y) assert_array_equal(grid_search.best_estimator_.classes_, grid_search.classes_) + +def test_classes__regressor(): + # Test that regressors do not have a classes_ attribute + # Test that classes_ property matches best_esimator_.classes_ + X = np.arange(100).reshape(10, 10) + y = np.array([0] * 5 + [1] * 5) + + regr = Ridge() + + grid_search = GridSearchCV(regr, {'alpha':[1.0,2.0]}) + grid_search.fit(X, y) + assert_false(hasattr(grid_search, 'classes_')) From 6a1ef11fa3cf4a744f5903a79493e3f76552f522 Mon Sep 17 00:00:00 2001 From: Alyssa Batula Date: Sat, 15 Oct 2016 22:49:37 -0400 Subject: [PATCH 5/9] Fixed whitespace issues --- sklearn/tests/test_grid_search.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sklearn/tests/test_grid_search.py b/sklearn/tests/test_grid_search.py index 0f753603e3461..55990123b42c6 100644 --- a/sklearn/tests/test_grid_search.py +++ b/sklearn/tests/test_grid_search.py @@ -801,6 +801,7 @@ def test_classes__property(): assert_array_equal(grid_search.best_estimator_.classes_, grid_search.classes_) + def test_classes__regressor(): # Test that regressors do not have a classes_ attribute # Test that classes_ property matches best_esimator_.classes_ @@ -809,6 +810,6 @@ def test_classes__regressor(): regr = Ridge() - grid_search = GridSearchCV(regr, {'alpha':[1.0,2.0]}) + grid_search = GridSearchCV(regr, {'alpha': [1.0, 2.0]}) grid_search.fit(X, y) assert_false(hasattr(grid_search, 'classes_')) From 359e86828cb6f5e6a56ff0d474d8abdc212c1e0b Mon Sep 17 00:00:00 2001 From: Alyssa Batula Date: Mon, 17 Oct 2016 19:52:08 -0400 Subject: [PATCH 6/9] Combined tests for the new GridSearchSV.classes_ property into a single test. --- sklearn/tests/test_grid_search.py | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/sklearn/tests/test_grid_search.py b/sklearn/tests/test_grid_search.py index 55990123b42c6..e6c2e18538163 100644 --- a/sklearn/tests/test_grid_search.py +++ b/sklearn/tests/test_grid_search.py @@ -794,22 +794,12 @@ def test_classes__property(): y = np.array([0] * 5 + [1] * 5) Cs = [.1, 1, 10] - clf = LinearSVC(random_state=0) - - grid_search = GridSearchCV(clf, {'C': Cs}, scoring='accuracy') + grid_search = GridSearchCV(LinearSVC(random_state=0), {'C': Cs}) grid_search.fit(X, y) assert_array_equal(grid_search.best_estimator_.classes_, grid_search.classes_) - -def test_classes__regressor(): # Test that regressors do not have a classes_ attribute - # Test that classes_ property matches best_esimator_.classes_ - X = np.arange(100).reshape(10, 10) - y = np.array([0] * 5 + [1] * 5) - - regr = Ridge() - - grid_search = GridSearchCV(regr, {'alpha': [1.0, 2.0]}) + grid_search = GridSearchCV(Ridge(), {'alpha': [1.0, 2.0]}) grid_search.fit(X, y) assert_false(hasattr(grid_search, 'classes_')) From c1d844acce8b62b204ab70a889186e7939e21fe9 Mon Sep 17 00:00:00 2001 From: Alyssa Batula Date: Mon, 17 Oct 2016 19:59:05 -0400 Subject: [PATCH 7/9] Removed trailing whitespace --- sklearn/grid_search.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/grid_search.py b/sklearn/grid_search.py index 552b5dce50baa..f49d7e0485fa5 100644 --- a/sklearn/grid_search.py +++ b/sklearn/grid_search.py @@ -692,7 +692,7 @@ class GridSearchCV(BaseSearchCV): - An iterable yielding train/test splits. For integer/None inputs, if the estimator is a classifier and ``y`` is - either binary or multiclass, + either binary or multiclass, :class:`sklearn.model_selection.StratifiedKFold` is used. In all other cases, :class:`sklearn.model_selection.KFold` is used. @@ -904,7 +904,7 @@ class RandomizedSearchCV(BaseSearchCV): - An iterable yielding train/test splits. For integer/None inputs, if the estimator is a classifier and ``y`` is - either binary or multiclass, + either binary or multiclass, :class:`sklearn.model_selection.StratifiedKFold` is used. In all other cases, :class:`sklearn.model_selection.KFold` is used. From 1683de4204003153a704d97692436cfb7020f0e6 Mon Sep 17 00:00:00 2001 From: Alyssa Batula Date: Tue, 18 Oct 2016 23:57:33 -0400 Subject: [PATCH 8/9] Added what's new for pull request #7661 --- doc/whats_new.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/doc/whats_new.rst b/doc/whats_new.rst index a4b775ec66d0a..7b6e362fb6dea 100644 --- a/doc/whats_new.rst +++ b/doc/whats_new.rst @@ -19,6 +19,11 @@ New features Enhancements ............ + - Added ``classes_`` attribute to gridSearchCV object that matches the + ``classes_`` attribute of ``best_estimator_``. (`#7661 + `_) by `Alyssa + Batula`_ and `Dylan Werner-Meier`_. + - The ``min_weight_fraction_leaf`` constraint in tree construction is now more efficient, taking a fast path to declare a node a leaf if its weight is less than 2 * the minimum. Note that the constructed tree will be From 084d35d95113e46b9bf3983738abee22ab03e1a2 Mon Sep 17 00:00:00 2001 From: Alyssa Batula Date: Wed, 19 Oct 2016 21:21:05 -0400 Subject: [PATCH 9/9] Fixed formatting of update in what's new --- doc/whats_new.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/whats_new.rst b/doc/whats_new.rst index 7b6e362fb6dea..6998ac63cf791 100644 --- a/doc/whats_new.rst +++ b/doc/whats_new.rst @@ -19,8 +19,8 @@ New features Enhancements ............ - - Added ``classes_`` attribute to gridSearchCV object that matches the - ``classes_`` attribute of ``best_estimator_``. (`#7661 + - Added ``classes_`` attribute to :class:`model_selection.GridSearchCV` + that matches the ``classes_`` attribute of ``best_estimator_``. (`#7661 `_) by `Alyssa Batula`_ and `Dylan Werner-Meier`_.