From d382bb4e67fa7d92143509e0ed2317e25760f4be Mon Sep 17 00:00:00 2001 From: Wally Date: Sat, 8 Jul 2017 21:03:32 +0100 Subject: [PATCH 01/17] fixed bug (not tested), writing test --- sklearn/decomposition/incremental_pca.py | 7 ++++++- sklearn/decomposition/tests/test_incremental_pca.py | 4 ++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/sklearn/decomposition/incremental_pca.py b/sklearn/decomposition/incremental_pca.py index 9b23d1f16e1fd..9ad3d91d37f72 100644 --- a/sklearn/decomposition/incremental_pca.py +++ b/sklearn/decomposition/incremental_pca.py @@ -208,11 +208,16 @@ def partial_fit(self, X, y=None, check_input=True): self.components_ = None if self.n_components is None: - self.n_components_ = n_features + self.n_components_ = min(n_samples, n_features) elif not 1 <= self.n_components <= n_features: raise ValueError("n_components=%r invalid for n_features=%d, need " "more rows than columns for IncrementalPCA " "processing" % (self.n_components, n_features)) + elif not 1 <= self.n_components <= n_samples: + raise ValueError("n_components=%r must be less or equal to " + "the batch number of samples %d. You can change " + "either one depending on what you " + "want." % (self.n_components, n_samples)) else: self.n_components_ = self.n_components diff --git a/sklearn/decomposition/tests/test_incremental_pca.py b/sklearn/decomposition/tests/test_incremental_pca.py index 87e7f9d7683e1..c03657c6791ca 100644 --- a/sklearn/decomposition/tests/test_incremental_pca.py +++ b/sklearn/decomposition/tests/test_incremental_pca.py @@ -77,6 +77,10 @@ def test_incremental_pca_validation(): for n_components in [-1, 0, .99, 3]: assert_raises(ValueError, IncrementalPCA(n_components, batch_size=10).fit, X) + X = [[0, 1], [1, 0]] + for n_components in [-1, 0, .99, 3]: + assert_raises(ValueError, IncrementalPCA(n_components, + batch_size=1).fit, X) def test_incremental_pca_set_params(): From fcb2768b870f83b4fbec9edfca8f9f00a625bab1 Mon Sep 17 00:00:00 2001 From: Wally Date: Sat, 8 Jul 2017 22:15:11 +0100 Subject: [PATCH 02/17] removed lower interval comparison check from fix, more work on test --- sklearn/decomposition/incremental_pca.py | 2 +- sklearn/decomposition/tests/test_incremental_pca.py | 10 +++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/sklearn/decomposition/incremental_pca.py b/sklearn/decomposition/incremental_pca.py index 9ad3d91d37f72..e267fb8ab0fac 100644 --- a/sklearn/decomposition/incremental_pca.py +++ b/sklearn/decomposition/incremental_pca.py @@ -213,7 +213,7 @@ def partial_fit(self, X, y=None, check_input=True): raise ValueError("n_components=%r invalid for n_features=%d, need " "more rows than columns for IncrementalPCA " "processing" % (self.n_components, n_features)) - elif not 1 <= self.n_components <= n_samples: + elif not self.n_components <= n_samples: raise ValueError("n_components=%r must be less or equal to " "the batch number of samples %d. You can change " "either one depending on what you " diff --git a/sklearn/decomposition/tests/test_incremental_pca.py b/sklearn/decomposition/tests/test_incremental_pca.py index c03657c6791ca..c7f9db522b497 100644 --- a/sklearn/decomposition/tests/test_incremental_pca.py +++ b/sklearn/decomposition/tests/test_incremental_pca.py @@ -4,6 +4,7 @@ from sklearn.utils.testing import assert_almost_equal from sklearn.utils.testing import assert_array_almost_equal from sklearn.utils.testing import assert_raises +from sklearn.utils.testing import assert_raises_regex from sklearn import datasets from sklearn.decomposition import PCA, IncrementalPCA @@ -77,10 +78,13 @@ def test_incremental_pca_validation(): for n_components in [-1, 0, .99, 3]: assert_raises(ValueError, IncrementalPCA(n_components, batch_size=10).fit, X) - X = [[0, 1], [1, 0]] for n_components in [-1, 0, .99, 3]: - assert_raises(ValueError, IncrementalPCA(n_components, - batch_size=1).fit, X) + X2 = [[0, 1, 0], [1, 0, 0]] + assert_raises_regex(ValueError, + "n_components\=.* be less or equal to " + "the batch number of samples .*\. You can change " + "either one depending on what you want\.", + IncrementalPCA(n_components).partial_fit, X2) def test_incremental_pca_set_params(): From d4bd366359465bfa50155c537a581ca3f7df5147 Mon Sep 17 00:00:00 2001 From: Wally Date: Sat, 8 Jul 2017 23:13:48 +0100 Subject: [PATCH 03/17] fix was failing another test, + finished test for fix --- sklearn/decomposition/incremental_pca.py | 5 +++- .../tests/test_incremental_pca.py | 23 +++++++++++-------- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/sklearn/decomposition/incremental_pca.py b/sklearn/decomposition/incremental_pca.py index e267fb8ab0fac..7d8708596da0b 100644 --- a/sklearn/decomposition/incremental_pca.py +++ b/sklearn/decomposition/incremental_pca.py @@ -208,7 +208,10 @@ def partial_fit(self, X, y=None, check_input=True): self.components_ = None if self.n_components is None: - self.n_components_ = min(n_samples, n_features) + if self.components_ is None: + self.n_components_ = min(n_samples, n_features) + else: + self.n_components_ = self.components_.shape[0] elif not 1 <= self.n_components <= n_features: raise ValueError("n_components=%r invalid for n_features=%d, need " "more rows than columns for IncrementalPCA " diff --git a/sklearn/decomposition/tests/test_incremental_pca.py b/sklearn/decomposition/tests/test_incremental_pca.py index c7f9db522b497..5f23f3440e42c 100644 --- a/sklearn/decomposition/tests/test_incremental_pca.py +++ b/sklearn/decomposition/tests/test_incremental_pca.py @@ -74,17 +74,20 @@ def test_incremental_pca_inverse(): def test_incremental_pca_validation(): # Test that n_components is >=1 and <= n_features. - X = [[0, 1], [1, 0]] - for n_components in [-1, 0, .99, 3]: - assert_raises(ValueError, IncrementalPCA(n_components, - batch_size=10).fit, X) - for n_components in [-1, 0, .99, 3]: - X2 = [[0, 1, 0], [1, 0, 0]] + X = [[0, 1, 0], [1, 0, 0]] + for n_components in [-1, 0, .99, 4]: assert_raises_regex(ValueError, - "n_components\=.* be less or equal to " - "the batch number of samples .*\. You can change " - "either one depending on what you want\.", - IncrementalPCA(n_components).partial_fit, X2) + "n_components\=.* invalid for n_features\=.*, need" + " more rows than columns for IncrementalPCA " + "processing", + IncrementalPCA(n_components, batch_size=10).fit, X) + + # Tests that n_components is also <= n_samples. + assert_raises_regex(ValueError, + "n_components\=.* be less or equal to " + "the batch number of samples .*\. You can change " + "either one depending on what you want\.", + IncrementalPCA(n_components=3).partial_fit, X) def test_incremental_pca_set_params(): From 2cff58d02ef6d450504f9108e9dd09742beb6955 Mon Sep 17 00:00:00 2001 From: Wally Date: Fri, 14 Jul 2017 15:58:43 +0100 Subject: [PATCH 04/17] Revert "Merge branch 'master' of https://github.com/scikit-learn/scikit-learn into n_samples6452" This reverts commit 71c5a730c9c43cafe4bea38c18a65e61277dd7a7, reversing changes made to d4bd366359465bfa50155c537a581ca3f7df5147. --- doc/whats_new.rst | 5 +---- sklearn/datasets/kddcup99.py | 6 +++++- sklearn/linear_model/ridge.py | 3 +-- sklearn/linear_model/tests/test_ridge.py | 11 ----------- sklearn/model_selection/_split.py | 2 +- sklearn/neural_network/multilayer_perceptron.py | 2 +- 6 files changed, 9 insertions(+), 20 deletions(-) diff --git a/doc/whats_new.rst b/doc/whats_new.rst index 3c87d4174c388..0c5608d6b5970 100644 --- a/doc/whats_new.rst +++ b/doc/whats_new.rst @@ -471,13 +471,10 @@ Bug fixes by :user:`Andre Ambrosio Boechat `, :user:`Utkarsh Upadhyay `, and `Joel Nothman`_. + - Add ``data_home`` parameter to :func:`sklearn.datasets.fetch_kddcup99` by `Loic Esteve`_. - - Fix inconsistent results between :class:`linear_model.RidgeCV` - and :class:`linear_model.Ridge` when using ``normalize=True`` - by `Alexandre Gramfort`_. - API changes summary ------------------- diff --git a/sklearn/datasets/kddcup99.py b/sklearn/datasets/kddcup99.py index 6d52c5b6214b2..89c74238bc4f3 100644 --- a/sklearn/datasets/kddcup99.py +++ b/sklearn/datasets/kddcup99.py @@ -222,7 +222,7 @@ def fetch_kddcup99(subset=None, data_home=None, shuffle=False, return Bunch(data=data, target=target) -def _fetch_brute_kddcup99(data_home=None, +def _fetch_brute_kddcup99(subset=None, data_home=None, download_if_missing=True, random_state=None, shuffle=False, percent10=True): @@ -230,6 +230,10 @@ def _fetch_brute_kddcup99(data_home=None, Parameters ---------- + subset : None, 'SA', 'SF', 'http', 'smtp' + To return the corresponding classical subsets of kddcup 99. + If None, return the entire kddcup 99 dataset. + data_home : string, optional Specify another download and cache folder for the datasets. By default all scikit-learn data is stored in '~/scikit_learn_data' subfolders. diff --git a/sklearn/linear_model/ridge.py b/sklearn/linear_model/ridge.py index caf2f9eed64c2..e0c7b6f188037 100644 --- a/sklearn/linear_model/ridge.py +++ b/sklearn/linear_model/ridge.py @@ -1119,8 +1119,7 @@ def fit(self, X, y, sample_weight=None): raise ValueError("cv!=None and store_cv_values=True " " are incompatible") parameters = {'alpha': self.alphas} - gs = GridSearchCV(Ridge(fit_intercept=self.fit_intercept, - normalize=self.normalize), + gs = GridSearchCV(Ridge(fit_intercept=self.fit_intercept), parameters, cv=self.cv, scoring=self.scoring) gs.fit(X, y, sample_weight=sample_weight) estimator = gs.best_estimator_ diff --git a/sklearn/linear_model/tests/test_ridge.py b/sklearn/linear_model/tests/test_ridge.py index ee44da5d56b86..4879e02deff50 100644 --- a/sklearn/linear_model/tests/test_ridge.py +++ b/sklearn/linear_model/tests/test_ridge.py @@ -383,16 +383,6 @@ def _test_ridge_loo(filter_): return ret -def _test_ridge_cv_normalize(filter_): - ridge_cv = RidgeCV(normalize=True, cv=3) - ridge_cv.fit(filter_(10. * X_diabetes), y_diabetes) - - gs = GridSearchCV(Ridge(normalize=True), cv=3, - param_grid={'alpha': ridge_cv.alphas}) - gs.fit(filter_(10. * X_diabetes), y_diabetes) - assert_equal(gs.best_estimator_.alpha, ridge_cv.alpha_) - - def _test_ridge_cv(filter_): ridge_cv = RidgeCV() ridge_cv.fit(filter_(X_diabetes), y_diabetes) @@ -472,7 +462,6 @@ def check_dense_sparse(test_func): def test_dense_sparse(): for test_func in (_test_ridge_loo, _test_ridge_cv, - _test_ridge_cv_normalize, _test_ridge_diabetes, _test_multi_ridge_diabetes, _test_ridge_classifiers, diff --git a/sklearn/model_selection/_split.py b/sklearn/model_selection/_split.py index 4bcc0ae1c5349..3f228e85c43e8 100644 --- a/sklearn/model_selection/_split.py +++ b/sklearn/model_selection/_split.py @@ -566,7 +566,7 @@ class StratifiedKFold(_BaseKFold): def __init__(self, n_splits=3, shuffle=False, random_state=None): super(StratifiedKFold, self).__init__(n_splits, shuffle, random_state) - def _make_test_folds(self, X, y=None): + def _make_test_folds(self, X, y=None, groups=None): if self.shuffle: rng = check_random_state(self.random_state) else: diff --git a/sklearn/neural_network/multilayer_perceptron.py b/sklearn/neural_network/multilayer_perceptron.py index d4adfd9107f6e..ec1196a3e2ac6 100644 --- a/sklearn/neural_network/multilayer_perceptron.py +++ b/sklearn/neural_network/multilayer_perceptron.py @@ -640,7 +640,7 @@ def partial_fit(self): % self.solver) return self._partial_fit - def _partial_fit(self, X, y): + def _partial_fit(self, X, y, classes=None): return self._fit(X, y, incremental=True) def _predict(self, X): From 5b250ce3484cec5dcfc0c54bdd5c3971beb2644f Mon Sep 17 00:00:00 2001 From: wallygauze Date: Fri, 14 Jul 2017 17:51:50 +0100 Subject: [PATCH 05/17] Correcting side-effects from reverting merge --- sklearn/datasets/kddcup99.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/sklearn/datasets/kddcup99.py b/sklearn/datasets/kddcup99.py index 4c05c7fdf8886..56cf3c4181c7c 100644 --- a/sklearn/datasets/kddcup99.py +++ b/sklearn/datasets/kddcup99.py @@ -222,7 +222,7 @@ def fetch_kddcup99(subset=None, data_home=None, shuffle=False, return Bunch(data=data, target=target) -def _fetch_brute_kddcup99(subset=None, data_home=None, +def _fetch_brute_kddcup99(data_home=None, download_if_missing=True, random_state=None, shuffle=False, percent10=True): @@ -230,10 +230,6 @@ def _fetch_brute_kddcup99(subset=None, data_home=None, Parameters ---------- - subset : None, 'SA', 'SF', 'http', 'smtp' - To return the corresponding classical subsets of kddcup 99. - If None, return the entire kddcup 99 dataset. - data_home : string, optional Specify another download and cache folder for the datasets. By default all scikit-learn data is stored in '~/scikit_learn_data' subfolders. From c508034ff4a31c6b430e0f1997334f8cc317e6b7 Mon Sep 17 00:00:00 2001 From: wallygauze Date: Fri, 14 Jul 2017 17:55:38 +0100 Subject: [PATCH 06/17] Correction number 2 --- sklearn/linear_model/ridge.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sklearn/linear_model/ridge.py b/sklearn/linear_model/ridge.py index 6bfcad836c5b8..3e584a78ad93a 100644 --- a/sklearn/linear_model/ridge.py +++ b/sklearn/linear_model/ridge.py @@ -1120,7 +1120,8 @@ def fit(self, X, y, sample_weight=None): raise ValueError("cv!=None and store_cv_values=True " " are incompatible") parameters = {'alpha': self.alphas} - gs = GridSearchCV(Ridge(fit_intercept=self.fit_intercept), + gs = GridSearchCV(Ridge(fit_intercept=self.fit_intercept, + normalize=self.normalize), parameters, cv=self.cv, scoring=self.scoring) gs.fit(X, y, sample_weight=sample_weight) estimator = gs.best_estimator_ From e6b38e34b9bf085c98654dceece494d369c0c9ed Mon Sep 17 00:00:00 2001 From: wallygauze Date: Fri, 14 Jul 2017 17:58:57 +0100 Subject: [PATCH 07/17] Correction number 3 --- sklearn/linear_model/tests/test_ridge.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/sklearn/linear_model/tests/test_ridge.py b/sklearn/linear_model/tests/test_ridge.py index 4879e02deff50..6cfce63464569 100644 --- a/sklearn/linear_model/tests/test_ridge.py +++ b/sklearn/linear_model/tests/test_ridge.py @@ -383,6 +383,15 @@ def _test_ridge_loo(filter_): return ret +def _test_ridge_cv_normalize(filter_): + ridge_cv = RidgeCV(normalize=True, cv=3) + ridge_cv.fit(filter_(10. * X_diabetes), y_diabetes) + + gs = GridSearchCV(Ridge(normalize=True), cv=3, + param_grid={'alpha': ridge_cv.alphas}) + gs.fit(filter_(10. * X_diabetes), y_diabetes) + assert_equal(gs.best_estimator_.alpha, ridge_cv.alpha_) + def _test_ridge_cv(filter_): ridge_cv = RidgeCV() ridge_cv.fit(filter_(X_diabetes), y_diabetes) From 93f73013ab57a995236388d97508d662294b40da Mon Sep 17 00:00:00 2001 From: wallygauze Date: Fri, 14 Jul 2017 18:05:06 +0100 Subject: [PATCH 08/17] Correction number 4 --- sklearn/linear_model/tests/test_ridge.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sklearn/linear_model/tests/test_ridge.py b/sklearn/linear_model/tests/test_ridge.py index 6cfce63464569..ee44da5d56b86 100644 --- a/sklearn/linear_model/tests/test_ridge.py +++ b/sklearn/linear_model/tests/test_ridge.py @@ -386,12 +386,13 @@ def _test_ridge_loo(filter_): def _test_ridge_cv_normalize(filter_): ridge_cv = RidgeCV(normalize=True, cv=3) ridge_cv.fit(filter_(10. * X_diabetes), y_diabetes) - + gs = GridSearchCV(Ridge(normalize=True), cv=3, param_grid={'alpha': ridge_cv.alphas}) gs.fit(filter_(10. * X_diabetes), y_diabetes) assert_equal(gs.best_estimator_.alpha, ridge_cv.alpha_) - + + def _test_ridge_cv(filter_): ridge_cv = RidgeCV() ridge_cv.fit(filter_(X_diabetes), y_diabetes) @@ -471,6 +472,7 @@ def check_dense_sparse(test_func): def test_dense_sparse(): for test_func in (_test_ridge_loo, _test_ridge_cv, + _test_ridge_cv_normalize, _test_ridge_diabetes, _test_multi_ridge_diabetes, _test_ridge_classifiers, From 1acfd8baa43ae5f9eb78dace35f3349d1fd01ea4 Mon Sep 17 00:00:00 2001 From: wallygauze Date: Fri, 14 Jul 2017 18:08:01 +0100 Subject: [PATCH 09/17] Correction number 5 --- sklearn/model_selection/_split.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/model_selection/_split.py b/sklearn/model_selection/_split.py index 3f228e85c43e8..4bcc0ae1c5349 100644 --- a/sklearn/model_selection/_split.py +++ b/sklearn/model_selection/_split.py @@ -566,7 +566,7 @@ class StratifiedKFold(_BaseKFold): def __init__(self, n_splits=3, shuffle=False, random_state=None): super(StratifiedKFold, self).__init__(n_splits, shuffle, random_state) - def _make_test_folds(self, X, y=None, groups=None): + def _make_test_folds(self, X, y=None): if self.shuffle: rng = check_random_state(self.random_state) else: From 289a8ac93e6b704f87276be39c300378f79b2734 Mon Sep 17 00:00:00 2001 From: wallygauze Date: Fri, 14 Jul 2017 18:13:51 +0100 Subject: [PATCH 10/17] Last Correction --- sklearn/neural_network/multilayer_perceptron.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/neural_network/multilayer_perceptron.py b/sklearn/neural_network/multilayer_perceptron.py index 735021a06e532..af1eca3b201d5 100644 --- a/sklearn/neural_network/multilayer_perceptron.py +++ b/sklearn/neural_network/multilayer_perceptron.py @@ -640,7 +640,7 @@ def partial_fit(self): % self.solver) return self._partial_fit - def _partial_fit(self, X, y, classes=None): + def _partial_fit(self, X, y): return self._fit(X, y, incremental=True) def _predict(self, X): From be5ac2d0b8cf02d15f4788479214d7df94167b59 Mon Sep 17 00:00:00 2001 From: Wally Date: Mon, 17 Jul 2017 05:20:31 +0100 Subject: [PATCH 11/17] added regression tests for n_comp=None case in incremental pca --- .../tests/test_incremental_pca.py | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/sklearn/decomposition/tests/test_incremental_pca.py b/sklearn/decomposition/tests/test_incremental_pca.py index 5f23f3440e42c..3ac37f759e92c 100644 --- a/sklearn/decomposition/tests/test_incremental_pca.py +++ b/sklearn/decomposition/tests/test_incremental_pca.py @@ -90,6 +90,33 @@ def test_incremental_pca_validation(): IncrementalPCA(n_components=3).partial_fit, X) +def test_n_components_none(): + # Ensures that n_components == None is handled correctly + rng = np.random.RandomState(1999) + for n_samples, n_features in [(50, 10), (10, 50)]: + + ipca = IncrementalPCA(n_components=None) + + for partial_fit_call in [1, 2]: + X = rng.rand(n_samples, n_features) + + if not hasattr(ipca, 'components_'): # first call to partial_fit + + ipca.partial_fit(X) + if not ipca.n_components_ == min(X.shape): + raise AssertionError('n_components=None did default to' + ' the choice of the minimum between ' + 'the batch number of samples and the ' + 'number of features.') + else: + + ipca.partial_fit(X) + if not ipca.n_components_ == ipca.components_.shape[0]: + raise AssertionError('For n_components=None, the value' + ' assigned has changed between calls ' + 'to partial_fit.') + + def test_incremental_pca_set_params(): # Test that components_ sign is stable over batch sizes. rng = np.random.RandomState(1999) From eee25b3ffa8af14c0c4bb5da1f3e04a4918ea167 Mon Sep 17 00:00:00 2001 From: Wally Date: Mon, 17 Jul 2017 07:35:24 +0100 Subject: [PATCH 12/17] some lines were never used, turned to code better for coverage --- sklearn/decomposition/tests/test_incremental_pca.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/sklearn/decomposition/tests/test_incremental_pca.py b/sklearn/decomposition/tests/test_incremental_pca.py index 3ac37f759e92c..00249ee26f04a 100644 --- a/sklearn/decomposition/tests/test_incremental_pca.py +++ b/sklearn/decomposition/tests/test_incremental_pca.py @@ -103,18 +103,12 @@ def test_n_components_none(): if not hasattr(ipca, 'components_'): # first call to partial_fit ipca.partial_fit(X) - if not ipca.n_components_ == min(X.shape): - raise AssertionError('n_components=None did default to' - ' the choice of the minimum between ' - 'the batch number of samples and the ' - 'number of features.') + assert ipca.n_components_ == min(X.shape) + else: ipca.partial_fit(X) - if not ipca.n_components_ == ipca.components_.shape[0]: - raise AssertionError('For n_components=None, the value' - ' assigned has changed between calls ' - 'to partial_fit.') + assert ipca.n_components_ == ipca.components_.shape[0] def test_incremental_pca_set_params(): From 46fd39273dc39517265f0ac6f6a4a4b950669f51 Mon Sep 17 00:00:00 2001 From: wallygauze Date: Mon, 24 Jul 2017 18:57:57 +0100 Subject: [PATCH 13/17] Update whats_new.rst --- doc/whats_new.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/doc/whats_new.rst b/doc/whats_new.rst index 9f5a8f5c914ad..aabec2c842bce 100644 --- a/doc/whats_new.rst +++ b/doc/whats_new.rst @@ -526,6 +526,11 @@ Decomposition, manifold learning and clustering - Fix bug where :mod:`mixture` ``sample`` methods did not return as many samples as requested. :issue:`7702` by :user:`Levi John Wolf `. + - Fix for uninformative error in :class:`decomposition.incremental_pca`: + now an error is raised if the number of components is larger than the + chosen batch size. The ``n_components=None`` case was adapted accordingly. + :issue:`6452`. By :user:`Wally Gauze `. + Preprocessing and feature selection - For sparse matrices, :func:`preprocessing.normalize` with ``return_norm=True`` From a7555542041641b9b5af90229dc14f7edeb32136 Mon Sep 17 00:00:00 2001 From: wallygauze Date: Tue, 25 Jul 2017 10:58:34 +0100 Subject: [PATCH 14/17] modifying error message (part 1) --- sklearn/decomposition/incremental_pca.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sklearn/decomposition/incremental_pca.py b/sklearn/decomposition/incremental_pca.py index 54be6e21e3cf4..101b42feaf31c 100644 --- a/sklearn/decomposition/incremental_pca.py +++ b/sklearn/decomposition/incremental_pca.py @@ -220,9 +220,8 @@ def partial_fit(self, X, y=None, check_input=True): "processing" % (self.n_components, n_features)) elif not self.n_components <= n_samples: raise ValueError("n_components=%r must be less or equal to " - "the batch number of samples %d. You can change " - "either one depending on what you " - "want." % (self.n_components, n_samples)) + "the batch number of samples " + "%d." % (self.n_components, n_samples)) else: self.n_components_ = self.n_components From 522ebe0bf1a72a8c941e3911727f9c397fb45f02 Mon Sep 17 00:00:00 2001 From: wallygauze Date: Tue, 25 Jul 2017 11:00:26 +0100 Subject: [PATCH 15/17] modifying error message part2 --- sklearn/decomposition/tests/test_incremental_pca.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sklearn/decomposition/tests/test_incremental_pca.py b/sklearn/decomposition/tests/test_incremental_pca.py index 00249ee26f04a..7565279d63da4 100644 --- a/sklearn/decomposition/tests/test_incremental_pca.py +++ b/sklearn/decomposition/tests/test_incremental_pca.py @@ -85,8 +85,7 @@ def test_incremental_pca_validation(): # Tests that n_components is also <= n_samples. assert_raises_regex(ValueError, "n_components\=.* be less or equal to " - "the batch number of samples .*\. You can change " - "either one depending on what you want\.", + "the batch number of samples .*\.", IncrementalPCA(n_components=3).partial_fit, X) From 5bdc0f3be5fa18cbb2f9f39d715beecb66d8f700 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Thu, 3 Aug 2017 12:05:00 +0200 Subject: [PATCH 16/17] Minor improvements in test_pca.py --- .../tests/test_incremental_pca.py | 37 ++++++++++--------- 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/sklearn/decomposition/tests/test_incremental_pca.py b/sklearn/decomposition/tests/test_incremental_pca.py index 7565279d63da4..e256ca68872aa 100644 --- a/sklearn/decomposition/tests/test_incremental_pca.py +++ b/sklearn/decomposition/tests/test_incremental_pca.py @@ -74,40 +74,41 @@ def test_incremental_pca_inverse(): def test_incremental_pca_validation(): # Test that n_components is >=1 and <= n_features. - X = [[0, 1, 0], [1, 0, 0]] + X = np.array([[0, 1, 0], [1, 0, 0]]) + n_samples, n_features = X.shape for n_components in [-1, 0, .99, 4]: assert_raises_regex(ValueError, - "n_components\=.* invalid for n_features\=.*, need" + "n_components={} invalid for n_features={}, need" " more rows than columns for IncrementalPCA " - "processing", + "processing".format(n_components, n_features), IncrementalPCA(n_components, batch_size=10).fit, X) # Tests that n_components is also <= n_samples. + n_components = 3 assert_raises_regex(ValueError, - "n_components\=.* be less or equal to " - "the batch number of samples .*\.", - IncrementalPCA(n_components=3).partial_fit, X) + "n_components={} must be less or equal to " + "the batch number of samples {}".format( + n_components, n_samples), + IncrementalPCA( + n_components=n_components).partial_fit, X) def test_n_components_none(): # Ensures that n_components == None is handled correctly rng = np.random.RandomState(1999) for n_samples, n_features in [(50, 10), (10, 50)]: - + X = rng.rand(n_samples, n_features) ipca = IncrementalPCA(n_components=None) - for partial_fit_call in [1, 2]: - X = rng.rand(n_samples, n_features) - - if not hasattr(ipca, 'components_'): # first call to partial_fit - - ipca.partial_fit(X) - assert ipca.n_components_ == min(X.shape) - - else: + # First partial_fit call, ipca.n_components_ is inferred from + # min(X.shape) + ipca.partial_fit(X) + assert ipca.n_components_ == min(X.shape) - ipca.partial_fit(X) - assert ipca.n_components_ == ipca.components_.shape[0] + # Second partial_fit call, ipca.n_components_ is inferred from + # ipca.components_ computed from the first partial_fit call + ipca.partial_fit(X) + assert ipca.n_components_ == ipca.components_.shape[0] def test_incremental_pca_set_params(): From d15c6012a384c67787e4d7704b98454434446d56 Mon Sep 17 00:00:00 2001 From: Wally Date: Mon, 14 Aug 2017 12:56:36 +0100 Subject: [PATCH 17/17] moved entry to 0.20 --- doc/whats_new.rst | 6632 +++++++++++++++++++++++---------------------- 1 file changed, 3376 insertions(+), 3256 deletions(-) diff --git a/doc/whats_new.rst b/doc/whats_new.rst index aabec2c842bce..a79df7a911586 100644 --- a/doc/whats_new.rst +++ b/doc/whats_new.rst @@ -5,10 +5,39 @@ Release history =============== +Version 0.20 (under development) +================================ + +Changed models +-------------- + +Changelog +--------- + +New features +............ + +Classifiers and regressors + +- :class:`ensemble.GradientBoostingClassifier` and + :class:`ensemble.GradientBoostingRegressor` now support early stopping + via ``n_iter_no_change``, ``validation_fraction`` and ``tol``. :issue:`7071` + by `Raghav RV`_ + +Bug fixes +......... + +Decomposition, manifold learning and clustering + +- Fix for uninformative error in :class:`decomposition.incremental_pca`: + now an error is raised if the number of components is larger than the + chosen batch size. The ``n_components=None`` case was adapted accordingly. + :issue:`6452`. By :user:`Wally Gauze `. + Version 0.19 ============ -**In Development** +**Release Candidate (0.19b2) July 17, 2017** Highlights ---------- @@ -22,18 +51,18 @@ algorithms in existing estimators, such as multiplicative update in :class:`decomposition.NMF` and multinomial :class:`linear_model.LogisticRegression` with L1 loss (use ``solver='saga'``). -You can also learn faster. For instance, the :ref:`new option to cache -transformations ` in :class:`pipeline.Pipeline` makes grid -search over pipelines including slow transformations much more efficient. And -you can predict faster: if you're sure you know what you're doing, you can turn -off validating that the input is finite using :func:`config_context`. - Cross validation is now able to return the results from multiple metric evaluations. The new :func:`model_selection.cross_validate` can return many scores on the test data as well as training set performance and timings, and we have extended the ``scoring`` and ``refit`` parameters for grid/randomized search :ref:`to handle multiple metrics `. +You can also learn faster. For instance, the :ref:`new option to cache +transformations ` in :class:`pipeline.Pipeline` makes grid +search over pipelines including slow transformations much more efficient. And +you can predict faster: if you're sure you know what you're doing, you can turn +off validating that the input is finite using :func:`config_context`. + We've made some important fixes too. We've fixed a longstanding implementation error in :func:`metrics.average_precision_score`, so please be cautious with prior results reported from that function. A number of errors in the @@ -51,21 +80,22 @@ parameters, may produce different models from the previous version. This often occurs due to changes in the modelling logic (bug fixes or enhancements), or in random sampling procedures. - * :class:`cluster.KMeans` with sparse X and initial centroids given (bug fix) - * :class:`cross_decomposition.PLSRegression` - with ``scale=True`` (bug fix) - * :class:`ensemble.GradientBoostingClassifier` and - :class:`ensemble.GradientBoostingRegressor` where ``min_impurity_split`` is used (bug fix) - * gradient boosting ``loss='quantile'`` (bug fix) - * :class:`ensemble.IsolationForest` (bug fix) - * :class:`feature_selection.SelectFdr` (bug fix) - * :class:`linear_model.RANSACRegressor` (bug fix) - * :class:`linear_model.LassoLars` (bug fix) - * :class:`linear_model.LassoLarsIC` (bug fix) - * :class:`manifold.TSNE` (bug fix) - * :class:`semi_supervised.LabelSpreading` (bug fix) - * :class:`semi_supervised.LabelPropagation` (bug fix) - * tree based models where ``min_weight_fraction_leaf`` is used (enhancement) +- :class:`cluster.KMeans` with sparse X and initial centroids given (bug fix) +- :class:`cross_decomposition.PLSRegression` + with ``scale=True`` (bug fix) +- :class:`ensemble.GradientBoostingClassifier` and + :class:`ensemble.GradientBoostingRegressor` where ``min_impurity_split`` is used (bug fix) +- gradient boosting ``loss='quantile'`` (bug fix) +- :class:`ensemble.IsolationForest` (bug fix) +- :class:`feature_selection.SelectFdr` (bug fix) +- :class:`linear_model.RANSACRegressor` (bug fix) +- :class:`linear_model.LassoLars` (bug fix) +- :class:`linear_model.LassoLarsIC` (bug fix) +- :class:`manifold.TSNE` (bug fix) +- :class:`neighbors.NearestCentroid` (bug fix) +- :class:`semi_supervised.LabelSpreading` (bug fix) +- :class:`semi_supervised.LabelPropagation` (bug fix) +- tree based models where ``min_weight_fraction_leaf`` is used (enhancement) Details are listed in the changelog below. @@ -80,759 +110,849 @@ New features Classifiers and regressors - - Added :class:`multioutput.ClassifierChain` for multi-label - classification. By `Adam Kleczewski `_. +- Added :class:`multioutput.ClassifierChain` for multi-label + classification. By `Adam Kleczewski `_. - - Added solver ``'saga'`` that implements the improved version of Stochastic - Average Gradient, in :class:`linear_model.LogisticRegression` and - :class:`linear_model.Ridge`. It allows the use of L1 penalty with - multinomial logistic loss, and behaves marginally better than 'sag' - during the first epochs of ridge and logistic regression. - :issue:`8446` by `Arthur Mensch`_. +- Added solver ``'saga'`` that implements the improved version of Stochastic + Average Gradient, in :class:`linear_model.LogisticRegression` and + :class:`linear_model.Ridge`. It allows the use of L1 penalty with + multinomial logistic loss, and behaves marginally better than 'sag' + during the first epochs of ridge and logistic regression. + :issue:`8446` by `Arthur Mensch`_. Other estimators - - Added the :class:`neighbors.LocalOutlierFactor` class for anomaly - detection based on nearest neighbors. - :issue:`5279` by `Nicolas Goix`_ and `Alexandre Gramfort`_. +- Added the :class:`neighbors.LocalOutlierFactor` class for anomaly + detection based on nearest neighbors. + :issue:`5279` by `Nicolas Goix`_ and `Alexandre Gramfort`_. - - Added :class:`preprocessing.QuantileTransformer` class and - :func:`preprocessing.quantile_transform` function for features - normalization based on quantiles. - :issue:`8363` by :user:`Denis Engemann `, - :user:`Guillaume Lemaitre `, `Olivier Grisel`_, `Raghav RV`_, - :user:`Thierry Guillemot `, and `Gael Varoquaux`_. +- Added :class:`preprocessing.QuantileTransformer` class and + :func:`preprocessing.quantile_transform` function for features + normalization based on quantiles. + :issue:`8363` by :user:`Denis Engemann `, + :user:`Guillaume Lemaitre `, `Olivier Grisel`_, `Raghav RV`_, + :user:`Thierry Guillemot `, and `Gael Varoquaux`_. - - The new solver ``'mu'`` implements a Multiplicate Update in - :class:`decomposition.NMF`, allowing the optimization of all - beta-divergences, including the Frobenius norm, the generalized - Kullback-Leibler divergence and the Itakura-Saito divergence. - :issue:`5295` by `Tom Dupre la Tour`_. +- The new solver ``'mu'`` implements a Multiplicate Update in + :class:`decomposition.NMF`, allowing the optimization of all + beta-divergences, including the Frobenius norm, the generalized + Kullback-Leibler divergence and the Itakura-Saito divergence. + :issue:`5295` by `Tom Dupre la Tour`_. Model selection and evaluation - - :class:`model_selection.GridSearchCV` and - :class:`model_selection.RandomizedSearchCV` now support simultaneous - evaluation of multiple metrics. Refer to the - :ref:`multimetric_grid_search` section of the user guide for more - information. :issue:`7388` by `Raghav RV`_ - - - Added the :func:`model_selection.cross_validate` which allows evaluation - of multiple metrics. This function returns a dict with more useful - information from cross-validation such as the train scores, fit times and - score times. - Refer to :ref:`multimetric_cross_validation` section of the userguide - for more information. :issue:`7388` by `Raghav RV`_ - - - Added :func:`metrics.mean_squared_log_error`, which computes - the mean square error of the logarithmic transformation of targets, - particularly useful for targets with an exponential trend. - :issue:`7655` by :user:`Karan Desai `. - - - Added :func:`metrics.dcg_score` and :func:`metrics.ndcg_score`, which - compute Discounted cumulative gain (DCG) and Normalized discounted - cumulative gain (NDCG). - :issue:`7739` by :user:`David Gasquez `. - - - Added the :class:`model_selection.RepeatedKFold` and - :class:`model_selection.RepeatedStratifiedKFold`. - :issue:`8120` by `Neeraj Gangwar`_. +- :class:`model_selection.GridSearchCV` and + :class:`model_selection.RandomizedSearchCV` now support simultaneous + evaluation of multiple metrics. Refer to the + :ref:`multimetric_grid_search` section of the user guide for more + information. :issue:`7388` by `Raghav RV`_ + +- Added the :func:`model_selection.cross_validate` which allows evaluation + of multiple metrics. This function returns a dict with more useful + information from cross-validation such as the train scores, fit times and + score times. + Refer to :ref:`multimetric_cross_validation` section of the userguide + for more information. :issue:`7388` by `Raghav RV`_ + +- Added :func:`metrics.mean_squared_log_error`, which computes + the mean square error of the logarithmic transformation of targets, + particularly useful for targets with an exponential trend. + :issue:`7655` by :user:`Karan Desai `. + +- Added :func:`metrics.dcg_score` and :func:`metrics.ndcg_score`, which + compute Discounted cumulative gain (DCG) and Normalized discounted + cumulative gain (NDCG). + :issue:`7739` by :user:`David Gasquez `. + +- Added the :class:`model_selection.RepeatedKFold` and + :class:`model_selection.RepeatedStratifiedKFold`. + :issue:`8120` by `Neeraj Gangwar`_. + +- Added a scorer based on :class:`metrics.explained_variance_score`. + :issue:`9259` by `Hanmin Qin `_. Miscellaneous - - Validation that input data contains no NaN or inf can now be suppressed - using :func:`config_context`, at your own risk. This will save on runtime, - and may be particularly useful for prediction time. :issue:`7548` by - `Joel Nothman`_. +- Validation that input data contains no NaN or inf can now be suppressed + using :func:`config_context`, at your own risk. This will save on runtime, + and may be particularly useful for prediction time. :issue:`7548` by + `Joel Nothman`_. - - Added a test to ensure parameter listing in docstrings match the - function/class signature. :issue:`9206` by `Alexandre Gramfort`_ and - `Raghav RV`_. +- Added a test to ensure parameter listing in docstrings match the + function/class signature. :issue:`9206` by `Alexandre Gramfort`_ and + `Raghav RV`_. Enhancements ............ Trees and ensembles - - The ``min_weight_fraction_leaf`` constraint in tree construction is now - more efficient, taking a fast path to declare a node a leaf if its weight - is less than 2 * the minimum. Note that the constructed tree will be - different from previous versions where ``min_weight_fraction_leaf`` is - used. :issue:`7441` by :user:`Nelson Liu `. +- The ``min_weight_fraction_leaf`` constraint in tree construction is now + more efficient, taking a fast path to declare a node a leaf if its weight + is less than 2 * the minimum. Note that the constructed tree will be + different from previous versions where ``min_weight_fraction_leaf`` is + used. :issue:`7441` by :user:`Nelson Liu `. - - :class:`ensemble.GradientBoostingClassifier` and :class:`ensemble.GradientBoostingRegressor` - now support sparse input for prediction. - :issue:`6101` by :user:`Ibraim Ganiev `. +- :class:`ensemble.GradientBoostingClassifier` and :class:`ensemble.GradientBoostingRegressor` + now support sparse input for prediction. + :issue:`6101` by :user:`Ibraim Ganiev `. - - :class:`ensemble.VotingClassifier` now allows changing estimators by using - :meth:`ensemble.VotingClassifier.set_params`. An estimator can also be - removed by setting it to ``None``. - :issue:`7674` by :user:`Yichuan Liu `. +- :class:`ensemble.VotingClassifier` now allows changing estimators by using + :meth:`ensemble.VotingClassifier.set_params`. An estimator can also be + removed by setting it to ``None``. + :issue:`7674` by :user:`Yichuan Liu `. - - :func:`tree.export_graphviz` now shows configurable number of decimal - places. :issue:`8698` by :user:`Guillaume Lemaitre `. +- :func:`tree.export_graphviz` now shows configurable number of decimal + places. :issue:`8698` by :user:`Guillaume Lemaitre `. + +- Added ``flatten_transform`` parameter to :class:`ensemble.VotingClassifier` + to change output shape of `transform` method to 2 dimensional. + :issue:`7794` by :user:`Ibraim Ganiev ` and + :user:`Herilalaina Rakotoarison `. Linear, kernelized and related models - - :class:`linear_model.SGDClassifier`, :class:`linear_model.SGDRegressor`, - :class:`linear_model.PassiveAggressiveClassifier`, - :class:`linear_model.PassiveAggressiveRegressor` and - :class:`linear_model.Perceptron` now expose ``max_iter`` and - ``tol`` parameters, to handle convergence more precisely. - ``n_iter`` parameter is deprecated, and the fitted estimator exposes - a ``n_iter_`` attribute, with actual number of iterations before - convergence. :issue:`5036` by `Tom Dupre la Tour`_. - - - Added ``average`` parameter to perform weight averaging in - :class:`linear_model.PassiveAggressiveClassifier`. :issue:`4939` - by :user:`Andrea Esuli `. - - - :class:`linear_model.RANSACRegressor` no longer throws an error - when calling ``fit`` if no inliers are found in its first iteration. - Furthermore, causes of skipped iterations are tracked in newly added - attributes, ``n_skips_*``. - :issue:`7914` by :user:`Michael Horrell `. - - - In :class:`gaussian_process.GaussianProcessRegressor`, method ``predict`` - is a lot faster with ``return_std=True``. :issue:`8591` by - :user:`Hadrien Bertrand `. - - - Added ``return_std`` to ``predict`` method of - :class:`linear_model.ARDRegression` and - :class:`linear_model.BayesianRidge`. - :issue:`7838` by :user:`Sergey Feldman `. - - - Memory usage enhancements: Prevent cast from float32 to float64 in: - :class:`linear_model.MultiTaskElasticNet`; - :class:`linear_model.LogisticRegression` when using newton-cg solver; and - :class:`linear_model.Ridge` when using svd, sparse_cg, cholesky or lsqr - solvers. :issue:`8835`, :issue:`8061` by :user:`Joan Massich ` and :user:`Nicolas - Cordier ` and :user:`Thierry Guillemot `. +- :class:`linear_model.SGDClassifier`, :class:`linear_model.SGDRegressor`, + :class:`linear_model.PassiveAggressiveClassifier`, + :class:`linear_model.PassiveAggressiveRegressor` and + :class:`linear_model.Perceptron` now expose ``max_iter`` and + ``tol`` parameters, to handle convergence more precisely. + ``n_iter`` parameter is deprecated, and the fitted estimator exposes + a ``n_iter_`` attribute, with actual number of iterations before + convergence. :issue:`5036` by `Tom Dupre la Tour`_. + +- Added ``average`` parameter to perform weight averaging in + :class:`linear_model.PassiveAggressiveClassifier`. :issue:`4939` + by :user:`Andrea Esuli `. + +- :class:`linear_model.RANSACRegressor` no longer throws an error + when calling ``fit`` if no inliers are found in its first iteration. + Furthermore, causes of skipped iterations are tracked in newly added + attributes, ``n_skips_*``. + :issue:`7914` by :user:`Michael Horrell `. + +- In :class:`gaussian_process.GaussianProcessRegressor`, method ``predict`` + is a lot faster with ``return_std=True``. :issue:`8591` by + :user:`Hadrien Bertrand `. + +- Added ``return_std`` to ``predict`` method of + :class:`linear_model.ARDRegression` and + :class:`linear_model.BayesianRidge`. + :issue:`7838` by :user:`Sergey Feldman `. + +- Memory usage enhancements: Prevent cast from float32 to float64 in: + :class:`linear_model.MultiTaskElasticNet`; + :class:`linear_model.LogisticRegression` when using newton-cg solver; and + :class:`linear_model.Ridge` when using svd, sparse_cg, cholesky or lsqr + solvers. :issue:`8835`, :issue:`8061` by :user:`Joan Massich ` and :user:`Nicolas + Cordier ` and :user:`Thierry Guillemot `. Other predictors - - Custom metrics for the :mod:`neighbors` binary trees now have - fewer constraints: they must take two 1d-arrays and return a float. - :issue:`6288` by `Jake Vanderplas`_. +- Custom metrics for the :mod:`neighbors` binary trees now have + fewer constraints: they must take two 1d-arrays and return a float. + :issue:`6288` by `Jake Vanderplas`_. - - ``algorithm='auto`` in :mod:`neighbors` estimators now chooses the most - appropriate algorithm for all input types and metrics. :issue:`9145` by - :user:`Herilalaina Rakotoarison ` and :user:`Reddy Chinthala - `. +- ``algorithm='auto`` in :mod:`neighbors` estimators now chooses the most + appropriate algorithm for all input types and metrics. :issue:`9145` by + :user:`Herilalaina Rakotoarison ` and :user:`Reddy Chinthala + `. Decomposition, manifold learning and clustering - - :class:`cluster.MiniBatchKMeans` and :class:`cluster.KMeans` - now use significantly less memory when assigning data points to their - nearest cluster center. :issue:`7721` by :user:`Jon Crall `. +- :class:`cluster.MiniBatchKMeans` and :class:`cluster.KMeans` + now use significantly less memory when assigning data points to their + nearest cluster center. :issue:`7721` by :user:`Jon Crall `. + +- :class:`decomposition.PCA`, :class:`decomposition.IncrementalPCA` and + :class:`decomposition.TruncatedSVD` now expose the singular values + from the underlying SVD. They are stored in the attribute + ``singular_values_``, like in :class:`decomposition.IncrementalPCA`. + :issue:`7685` by :user:`Tommy Löfstedt ` - - :class:`decomposition.PCA`, :class:`decomposition.IncrementalPCA` and - :class:`decomposition.TruncatedSVD` now expose the singular values - from the underlying SVD. They are stored in the attribute - ``singular_values_``, like in :class:`decomposition.IncrementalPCA`. - :issue:`7685` by :user:`Tommy Löfstedt ` +- Fixed the implementation of noise_variance_ in :class:`decomposition.PCA`. + :issue:`9108` by `Hanmin Qin `_. - - :class:`decomposition.NMF` now faster when ``beta_loss=0``. - :issue:`9277` by :user:`hongkahjun`. +- :class:`decomposition.NMF` now faster when ``beta_loss=0``. + :issue:`9277` by :user:`hongkahjun`. - - Memory improvements for method ``barnes_hut`` in :class:`manifold.TSNE` - :issue:`7089` by :user:`Thomas Moreau ` and `Olivier Grisel`_. +- Memory improvements for method ``barnes_hut`` in :class:`manifold.TSNE` + :issue:`7089` by :user:`Thomas Moreau ` and `Olivier Grisel`_. - - Optimization schedule improvements for Barnes-Hut :class:`manifold.TSNE` - so the results are closer to the one from the reference implementation - `lvdmaaten/bhtsne `_ by :user:`Thomas - Moreau ` and `Olivier Grisel`_. +- Optimization schedule improvements for Barnes-Hut :class:`manifold.TSNE` + so the results are closer to the one from the reference implementation + `lvdmaaten/bhtsne `_ by :user:`Thomas + Moreau ` and `Olivier Grisel`_. - - Memory usage enhancements: Prevent cast from float32 to float64 in - :class:`decomposition.PCA` and - :func:`decomposition.randomized_svd_low_rank`. - :issue:`9067` by `Raghav RV`_. +- Memory usage enhancements: Prevent cast from float32 to float64 in + :class:`decomposition.PCA` and + :func:`decomposition.randomized_svd_low_rank`. + :issue:`9067` by `Raghav RV`_. Preprocessing and feature selection - - Added ``norm_order`` parameter to :class:`feature_selection.SelectFromModel` - to enable selection of the norm order when ``coef_`` is more than 1D. - :issue:`6181` by :user:`Antoine Wendlinger `. +- Added ``norm_order`` parameter to :class:`feature_selection.SelectFromModel` + to enable selection of the norm order when ``coef_`` is more than 1D. + :issue:`6181` by :user:`Antoine Wendlinger `. - - Added ability to use sparse matrices in :func:`feature_selection.f_regression` - with ``center=True``. :issue:`8065` by :user:`Daniel LeJeune `. +- Added ability to use sparse matrices in :func:`feature_selection.f_regression` + with ``center=True``. :issue:`8065` by :user:`Daniel LeJeune `. - - Small performance improvement to n-gram creation in - :mod:`feature_extraction.text` by binding methods for loops and - special-casing unigrams. :issue:`7567` by :user:`Jaye Doepke ` +- Small performance improvement to n-gram creation in + :mod:`feature_extraction.text` by binding methods for loops and + special-casing unigrams. :issue:`7567` by :user:`Jaye Doepke ` - - Relax assumption on the data for the - :class:`kernel_approximation.SkewedChi2Sampler`. Since the Skewed-Chi2 - kernel is defined on the open interval :math:`(-skewedness; +\infty)^d`, - the transform function should not check whether ``X < 0`` but whether ``X < - -self.skewedness``. :issue:`7573` by :user:`Romain Brault `. +- Relax assumption on the data for the + :class:`kernel_approximation.SkewedChi2Sampler`. Since the Skewed-Chi2 + kernel is defined on the open interval :math:`(-skewedness; +\infty)^d`, + the transform function should not check whether ``X < 0`` but whether ``X < + -self.skewedness``. :issue:`7573` by :user:`Romain Brault `. - - Made default kernel parameters kernel-dependent in - :class:`kernel_approximation.Nystroem`. - :issue:`5229` by :user:`Saurabh Bansod ` and `Andreas Müller`_. +- Made default kernel parameters kernel-dependent in + :class:`kernel_approximation.Nystroem`. + :issue:`5229` by :user:`Saurabh Bansod ` and `Andreas Müller`_. Model evaluation and meta-estimators - - :class:`pipeline.Pipeline` is now able to cache transformers - within a pipeline by using the ``memory`` constructor parameter. - :issue:`7990` by :user:`Guillaume Lemaitre `. +- :class:`pipeline.Pipeline` is now able to cache transformers + within a pipeline by using the ``memory`` constructor parameter. + :issue:`7990` by :user:`Guillaume Lemaitre `. - - :class:`pipeline.Pipeline` steps can now be accessed as attributes of its - ``named_steps`` attribute. :issue:`8586` by :user:`Herilalaina - Rakotoarison `. +- :class:`pipeline.Pipeline` steps can now be accessed as attributes of its + ``named_steps`` attribute. :issue:`8586` by :user:`Herilalaina + Rakotoarison `. - - Added ``sample_weight`` parameter to :meth:`pipeline.Pipeline.score`. - :issue:`7723` by :user:`Mikhail Korobov `. +- Added ``sample_weight`` parameter to :meth:`pipeline.Pipeline.score`. + :issue:`7723` by :user:`Mikhail Korobov `. - - Added ability to set ``n_jobs`` parameter to :func:`pipeline.make_union`. - A ``TypeError`` will be raised for any other kwargs. :issue:`8028` - by :user:`Alexander Booth `. +- Added ability to set ``n_jobs`` parameter to :func:`pipeline.make_union`. + A ``TypeError`` will be raised for any other kwargs. :issue:`8028` + by :user:`Alexander Booth `. - - :class:`model_selection.GridSearchCV`, - :class:`model_selection.RandomizedSearchCV` and - :func:`model_selection.cross_val_score` now allow estimators with callable - kernels which were previously prohibited. - :issue:`8005` by `Andreas Müller`_ . +- :class:`model_selection.GridSearchCV`, + :class:`model_selection.RandomizedSearchCV` and + :func:`model_selection.cross_val_score` now allow estimators with callable + kernels which were previously prohibited. + :issue:`8005` by `Andreas Müller`_ . - - :func:`model_selection.cross_val_predict` now returns output of the - correct shape for all values of the argument ``method``. - :issue:`7863` by :user:`Aman Dalmia `. +- :func:`model_selection.cross_val_predict` now returns output of the + correct shape for all values of the argument ``method``. + :issue:`7863` by :user:`Aman Dalmia `. - - Added ``shuffle`` and ``random_state`` parameters to shuffle training - data before taking prefixes of it based on training sizes in - :func:`model_selection.learning_curve`. - :issue:`7506` by :user:`Narine Kokhlikyan `. +- Added ``shuffle`` and ``random_state`` parameters to shuffle training + data before taking prefixes of it based on training sizes in + :func:`model_selection.learning_curve`. + :issue:`7506` by :user:`Narine Kokhlikyan `. - - :class:`model_selection.StratifiedShuffleSplit` now works with multioutput - multiclass (or multilabel) data. :issue:`9044` by `Vlad Niculae`_. +- :class:`model_selection.StratifiedShuffleSplit` now works with multioutput + multiclass (or multilabel) data. :issue:`9044` by `Vlad Niculae`_. - - Speed improvements to :class:`model_selection.StratifiedShuffleSplit`. - :issue:`5991` by :user:`Arthur Mensch ` and `Joel Nothman`_. +- Speed improvements to :class:`model_selection.StratifiedShuffleSplit`. + :issue:`5991` by :user:`Arthur Mensch ` and `Joel Nothman`_. - - Add ``shuffle`` parameter to :func:`model_selection.train_test_split`. - :issue:`8845` by :user:`themrmax ` +- Add ``shuffle`` parameter to :func:`model_selection.train_test_split`. + :issue:`8845` by :user:`themrmax ` - - :class:`multioutput.MultiOutputRegressor` and :class:`multioutput.MultiOutputClassifier` - now support online learning using ``partial_fit``. - :issue: `8053` by :user:`Peng Yu `. +- :class:`multioutput.MultiOutputRegressor` and :class:`multioutput.MultiOutputClassifier` + now support online learning using ``partial_fit``. + :issue: `8053` by :user:`Peng Yu `. - - Add ``max_train_size`` parameter to :class:`model_selection.TimeSeriesSplit` - :issue:`8282` by :user:`Aman Dalmia `. +- Add ``max_train_size`` parameter to :class:`model_selection.TimeSeriesSplit` + :issue:`8282` by :user:`Aman Dalmia `. - - More clustering metrics are now available through :func:`metrics.get_scorer` - and ``scoring`` parameters. :issue:`8117` by `Raghav RV`_. +- More clustering metrics are now available through :func:`metrics.get_scorer` + and ``scoring`` parameters. :issue:`8117` by `Raghav RV`_. Metrics - - :func:`metrics.matthews_corrcoef` now support multiclass classification. - :issue:`8094` by :user:`Jon Crall `. +- :func:`metrics.matthews_corrcoef` now support multiclass classification. + :issue:`8094` by :user:`Jon Crall `. - - Add ``sample_weight`` parameter to :func:`metrics.cohen_kappa_score`. - :issue:`8335` by :user:`Victor Poughon `. +- Add ``sample_weight`` parameter to :func:`metrics.cohen_kappa_score`. + :issue:`8335` by :user:`Victor Poughon `. Miscellaneous - - :func:`utils.check_estimator` now attempts to ensure that methods - transform, predict, etc. do not set attributes on the estimator. - :issue:`7533` by :user:`Ekaterina Krivich `. +- :func:`utils.check_estimator` now attempts to ensure that methods + transform, predict, etc. do not set attributes on the estimator. + :issue:`7533` by :user:`Ekaterina Krivich `. - - Added type checking to the ``accept_sparse`` parameter in - :mod:`utils.validation` methods. This parameter now accepts only boolean, - string, or list/tuple of strings. ``accept_sparse=None`` is deprecated and - should be replaced by ``accept_sparse=False``. - :issue:`7880` by :user:`Josh Karnofsky `. +- Added type checking to the ``accept_sparse`` parameter in + :mod:`utils.validation` methods. This parameter now accepts only boolean, + string, or list/tuple of strings. ``accept_sparse=None`` is deprecated and + should be replaced by ``accept_sparse=False``. + :issue:`7880` by :user:`Josh Karnofsky `. - - Make it possible to load a chunk of an svmlight formatted file by - passing a range of bytes to :func:`datasets.load_svmlight_file`. - :issue:`935` by :user:`Olivier Grisel `. +- Make it possible to load a chunk of an svmlight formatted file by + passing a range of bytes to :func:`datasets.load_svmlight_file`. + :issue:`935` by :user:`Olivier Grisel `. - - :class:`dummy.DummyClassifier` and :class:`dummy.DummyRegressor` - now accept non-finite features. :issue:`8931` by :user:`Attractadore`. +- :class:`dummy.DummyClassifier` and :class:`dummy.DummyRegressor` + now accept non-finite features. :issue:`8931` by :user:`Attractadore`. Bug fixes ......... Trees and ensembles - - Fixed a memory leak in trees when using trees with ``criterion='mae'``. - :issue:`8002` by `Raghav RV`_. +- Fixed a memory leak in trees when using trees with ``criterion='mae'``. + :issue:`8002` by `Raghav RV`_. - - Fixed a bug where :class:`ensemble.IsolationForest` uses an - an incorrect formula for the average path length - :issue:`8549` by `Peter Wang `_. +- Fixed a bug where :class:`ensemble.IsolationForest` uses an + an incorrect formula for the average path length + :issue:`8549` by `Peter Wang `_. - - Fixed a bug where :class:`ensemble.AdaBoostClassifier` throws - ``ZeroDivisionError`` while fitting data with single class labels. - :issue:`7501` by :user:`Dominik Krzeminski `. +- Fixed a bug where :class:`ensemble.AdaBoostClassifier` throws + ``ZeroDivisionError`` while fitting data with single class labels. + :issue:`7501` by :user:`Dominik Krzeminski `. - - Fixed a bug in :class:`ensemble.GradientBoostingClassifier` and - :class:`ensemble.GradientBoostingRegressor` where a float being compared - to ``0.0`` using ``==`` caused a divide by zero error. :issue:`7970` by - :user:`He Chen `. +- Fixed a bug in :class:`ensemble.GradientBoostingClassifier` and + :class:`ensemble.GradientBoostingRegressor` where a float being compared + to ``0.0`` using ``==`` caused a divide by zero error. :issue:`7970` by + :user:`He Chen `. - - Fix a bug where :class:`ensemble.GradientBoostingClassifier` and - :class:`ensemble.GradientBoostingRegressor` ignored the - ``min_impurity_split`` parameter. - :issue:`8006` by :user:`Sebastian Pölsterl `. +- Fix a bug where :class:`ensemble.GradientBoostingClassifier` and + :class:`ensemble.GradientBoostingRegressor` ignored the + ``min_impurity_split`` parameter. + :issue:`8006` by :user:`Sebastian Pölsterl `. - - Fixed ``oob_score`` in :class:`ensemble.BaggingClassifier`. - :issue:`8936` by :user:`Michael Lewis ` +- Fixed ``oob_score`` in :class:`ensemble.BaggingClassifier`. + :issue:`8936` by :user:`Michael Lewis ` - - Fixed excessive memory usage in prediction for random forests estimators. - :issue:`8672` by :user:`Mike Benfield `. +- Fixed excessive memory usage in prediction for random forests estimators. + :issue:`8672` by :user:`Mike Benfield `. - - Fixed a bug where ``sample_weight`` as a list broke random forests in Python 2 - :issue:`8068` by :user:`xor`. +- Fixed a bug where ``sample_weight`` as a list broke random forests in Python 2 + :issue:`8068` by :user:`xor`. - - Fixed a bug where :class:`ensemble.IsolationForest` fails when - ``max_features`` is less than 1. - :issue:`5732` by :user:`Ishank Gulati `. +- Fixed a bug where :class:`ensemble.IsolationForest` fails when + ``max_features`` is less than 1. + :issue:`5732` by :user:`Ishank Gulati `. - - Fix a bug where gradient boosting with ``loss='quantile'`` computed - negative errors for negative values of ``ytrue - ypred`` leading to wrong - values when calling ``__call__``. - :issue:`8087` by :user:`Alexis Mignon ` +- Fix a bug where gradient boosting with ``loss='quantile'`` computed + negative errors for negative values of ``ytrue - ypred`` leading to wrong + values when calling ``__call__``. + :issue:`8087` by :user:`Alexis Mignon ` - - Fix a bug where :class:`ensemble.VotingClassifier` raises an error - when a numpy array is passed in for weights. :issue:`7983` by - :user:`Vincent Pham `. +- Fix a bug where :class:`ensemble.VotingClassifier` raises an error + when a numpy array is passed in for weights. :issue:`7983` by + :user:`Vincent Pham `. - - Fixed a bug where :func:`tree.export_graphviz` raised an error - when the length of features_names does not match n_features in the decision - tree. :issue:`8512` by :user:`Li Li `. +- Fixed a bug where :func:`tree.export_graphviz` raised an error + when the length of features_names does not match n_features in the decision + tree. :issue:`8512` by :user:`Li Li `. Linear, kernelized and related models - - Fixed a bug where :func:`linear_model.RANSACRegressor.fit` may run until - ``max_iter`` if it finds a large inlier group early. :issue:`8251` by - :user:`aivision2020`. +- Fixed a bug where :func:`linear_model.RANSACRegressor.fit` may run until + ``max_iter`` if it finds a large inlier group early. :issue:`8251` by + :user:`aivision2020`. - - Fixed a bug where :class:`naive_bayes.MultinomialNB` and - :class:`naive_bayes.BernoulliNB` failed when ``alpha=0``. :issue:`5814` by - :user:`Yichuan Liu ` and :user:`Herilalaina Rakotoarison - `. +- Fixed a bug where :class:`naive_bayes.MultinomialNB` and + :class:`naive_bayes.BernoulliNB` failed when ``alpha=0``. :issue:`5814` by + :user:`Yichuan Liu ` and :user:`Herilalaina Rakotoarison + `. - - Fixed a bug where :class:`linear_model.LassoLars` does not give - the same result as the LassoLars implementation available - in R (lars library). :issue:`7849` by :user:`Jair Montoya Martinez `. +- Fixed a bug where :class:`linear_model.LassoLars` does not give + the same result as the LassoLars implementation available + in R (lars library). :issue:`7849` by :user:`Jair Montoya Martinez `. - - Fixed a bug in :class:`linear_model.RandomizedLasso`, - :class:`linear_model.Lars`, :class:`linear_model.LassoLars`, - :class:`linear_model.LarsCV` and :class:`linear_model.LassoLarsCV`, - where the parameter ``precompute`` was not used consistently across - classes, and some values proposed in the docstring could raise errors. - :issue:`5359` by `Tom Dupre la Tour`_. +- Fixed a bug in :class:`linear_model.RandomizedLasso`, + :class:`linear_model.Lars`, :class:`linear_model.LassoLars`, + :class:`linear_model.LarsCV` and :class:`linear_model.LassoLarsCV`, + where the parameter ``precompute`` was not used consistently across + classes, and some values proposed in the docstring could raise errors. + :issue:`5359` by `Tom Dupre la Tour`_. - - Fix inconsistent results between :class:`linear_model.RidgeCV` and - :class:`linear_model.Ridge` when using ``normalize=True``. :issue:`9302` - by `Alexandre Gramfort`_. +- Fix inconsistent results between :class:`linear_model.RidgeCV` and + :class:`linear_model.Ridge` when using ``normalize=True``. :issue:`9302` + by `Alexandre Gramfort`_. - - Fix a bug where :func:`linear_model.LassoLars.fit` sometimes - left ``coef_`` as a list, rather than an ndarray. - :issue:`8160` by :user:`CJ Carey `. +- Fix a bug where :func:`linear_model.LassoLars.fit` sometimes + left ``coef_`` as a list, rather than an ndarray. + :issue:`8160` by :user:`CJ Carey `. - - Fix :func:`linear_model.BayesianRidge.fit` to return - ridge parameter ``alpha_`` and ``lambda_`` consistent with calculated - coefficients ``coef_`` and ``intercept_``. - :issue:`8224` by :user:`Peter Gedeck `. +- Fix :func:`linear_model.BayesianRidge.fit` to return + ridge parameter ``alpha_`` and ``lambda_`` consistent with calculated + coefficients ``coef_`` and ``intercept_``. + :issue:`8224` by :user:`Peter Gedeck `. - - Fixed a bug in :class:`svm.OneClassSVM` where it returned floats instead of - integer classes. :issue:`8676` by :user:`Vathsala Achar `. +- Fixed a bug in :class:`svm.OneClassSVM` where it returned floats instead of + integer classes. :issue:`8676` by :user:`Vathsala Achar `. - - Fix AIC/BIC criterion computation in :class:`linear_model.LassoLarsIC`. - :issue:`9022` by `Alexandre Gramfort`_ and :user:`Mehmet Basbug `. +- Fix AIC/BIC criterion computation in :class:`linear_model.LassoLarsIC`. + :issue:`9022` by `Alexandre Gramfort`_ and :user:`Mehmet Basbug `. - - Fixed a memory leak in our LibLinear implementation. :issue:`9024` by - :user:`Sergei Lebedev ` +- Fixed a memory leak in our LibLinear implementation. :issue:`9024` by + :user:`Sergei Lebedev ` - - Fix bug where stratified CV splitters did not work with - :class:`linear_model.LassoCV`. :issue:`8973` by - :user:`Paulo Haddad `. +- Fix bug where stratified CV splitters did not work with + :class:`linear_model.LassoCV`. :issue:`8973` by + :user:`Paulo Haddad `. - - Fixed a bug in :class:`gaussian_process.GaussianProcessRegressor` - when the standard deviation and covariance predicted without fit - would fail with a unmeaningful error by default. - :issue:`6573` by :user:`Quazi Marufur Rahman ` and - `Manoj Kumar`_. +- Fixed a bug in :class:`gaussian_process.GaussianProcessRegressor` + when the standard deviation and covariance predicted without fit + would fail with a unmeaningful error by default. + :issue:`6573` by :user:`Quazi Marufur Rahman ` and + `Manoj Kumar`_. Other predictors - - Fix :class:`semi_supervised.BaseLabelPropagation` to correctly implement - ``LabelPropagation`` and ``LabelSpreading`` as done in the referenced - papers. :issue:`9239` - by :user:`Andre Ambrosio Boechat `, :user:`Utkarsh Upadhyay - `, and `Joel Nothman`_. +- Fix :class:`semi_supervised.BaseLabelPropagation` to correctly implement + ``LabelPropagation`` and ``LabelSpreading`` as done in the referenced + papers. :issue:`9239` + by :user:`Andre Ambrosio Boechat `, :user:`Utkarsh Upadhyay + `, and `Joel Nothman`_. Decomposition, manifold learning and clustering - - Fixed the implementation of :class:`manifold.TSNE`: - - ``early_exageration`` parameter had no effect and is now used for the - first 250 optimization iterations. - - Fixed the ``AssertionError: Tree consistency failed`` exception - reported in :issue:`8992`. - - Improve the learning schedule to match the one from the reference - implementation `lvdmaaten/bhtsne `_. +- Fixed the implementation of :class:`manifold.TSNE`: +- ``early_exageration`` parameter had no effect and is now used for the + first 250 optimization iterations. +- Fixed the ``AssertionError: Tree consistency failed`` exception + reported in :issue:`8992`. +- Improve the learning schedule to match the one from the reference + implementation `lvdmaaten/bhtsne `_. by :user:`Thomas Moreau ` and `Olivier Grisel`_. - - Fix a bug in :class:`decomposition.LatentDirichletAllocation` - where the ``perplexity`` method was returning incorrect results because - the ``transform`` method returns normalized document topic distributions - as of version 0.18. :issue:`7954` by :user:`Gary Foreman `. - - - Fix output shape and bugs with n_jobs > 1 in - :class:`decomposition.SparseCoder` transform and - :func:`decomposition.sparse_encode` - for one-dimensional data and one component. - This also impacts the output shape of :class:`decomposition.DictionaryLearning`. - :issue:`8086` by `Andreas Müller`_. - - - Fixed the implementation of ``explained_variance_`` - in :class:`decomposition.PCA`, - :class:`decomposition.RandomizedPCA` and - :class:`decomposition.IncrementalPCA`. - :issue:`9105` by `Hanmin Qin `_. - - - Fixed a bug where :class:`cluster.DBSCAN` gives incorrect - result when input is a precomputed sparse matrix with initial - rows all zero. :issue:`8306` by :user:`Akshay Gupta ` - - - Fix a bug regarding fitting :class:`cluster.KMeans` with a sparse - array X and initial centroids, where X's means were unnecessarily being - subtracted from the centroids. :issue:`7872` by :user:`Josh Karnofsky `. - - - Fixes to the input validation in :class:`covariance.EllipticEnvelope`. - :issue:`8086` by `Andreas Müller`_. - - - Fixed a bug in :class:`covariance.MinCovDet` where inputting data - that produced a singular covariance matrix would cause the helper method - ``_c_step`` to throw an exception. - :issue:`3367` by :user:`Jeremy Steward ` - - - Fixed a bug in :class:`manifold.TSNE` affecting convergence of the - gradient descent. :issue:`8768` by :user:`David DeTomaso `. - - - Fixed a bug in :class:`manifold.TSNE` where it stored the incorrect - ``kl_divergence_``. :issue:`6507` by :user:`Sebastian Saeger `. - - - Fixed improper scaling in :class:`cross_decomposition.PLSRegression` - with ``scale=True``. :issue:`7819` by :user:`jayzed82 `. - - - :class:`cluster.bicluster.SpectralCoclustering` and - :class:`cluster.bicluster.SpectralBiclustering` ``fit`` method conforms - with API by accepting ``y`` and returning the object. :issue:`6126`, - :issue:`7814` by :user:`Laurent Direr ` and :user:`Maniteja - Nandana `. - - - Fix bug where :mod:`mixture` ``sample`` methods did not return as many - samples as requested. :issue:`7702` by :user:`Levi John Wolf `. - - - Fix for uninformative error in :class:`decomposition.incremental_pca`: - now an error is raised if the number of components is larger than the - chosen batch size. The ``n_components=None`` case was adapted accordingly. - :issue:`6452`. By :user:`Wally Gauze `. - +- Fix a bug in :class:`decomposition.LatentDirichletAllocation` + where the ``perplexity`` method was returning incorrect results because + the ``transform`` method returns normalized document topic distributions + as of version 0.18. :issue:`7954` by :user:`Gary Foreman `. + +- Fix output shape and bugs with n_jobs > 1 in + :class:`decomposition.SparseCoder` transform and + :func:`decomposition.sparse_encode` + for one-dimensional data and one component. + This also impacts the output shape of :class:`decomposition.DictionaryLearning`. + :issue:`8086` by `Andreas Müller`_. + +- Fixed the implementation of ``explained_variance_`` + in :class:`decomposition.PCA`, + :class:`decomposition.RandomizedPCA` and + :class:`decomposition.IncrementalPCA`. + :issue:`9105` by `Hanmin Qin `_. + +- Fixed the implementation of noise_variance_ in :class:`decomposition.PCA`. + :issue:`9108` by `Hanmin Qin `_. + +- Fixed a bug where :class:`cluster.DBSCAN` gives incorrect + result when input is a precomputed sparse matrix with initial + rows all zero. :issue:`8306` by :user:`Akshay Gupta ` + +- Fix a bug regarding fitting :class:`cluster.KMeans` with a sparse + array X and initial centroids, where X's means were unnecessarily being + subtracted from the centroids. :issue:`7872` by :user:`Josh Karnofsky `. + +- Fixes to the input validation in :class:`covariance.EllipticEnvelope`. + :issue:`8086` by `Andreas Müller`_. + +- Fixed a bug in :class:`covariance.MinCovDet` where inputting data + that produced a singular covariance matrix would cause the helper method + ``_c_step`` to throw an exception. + :issue:`3367` by :user:`Jeremy Steward ` + +- Fixed a bug in :class:`manifold.TSNE` affecting convergence of the + gradient descent. :issue:`8768` by :user:`David DeTomaso `. + +- Fixed a bug in :class:`manifold.TSNE` where it stored the incorrect + ``kl_divergence_``. :issue:`6507` by :user:`Sebastian Saeger `. + +- Fixed improper scaling in :class:`cross_decomposition.PLSRegression` + with ``scale=True``. :issue:`7819` by :user:`jayzed82 `. + +- :class:`cluster.bicluster.SpectralCoclustering` and + :class:`cluster.bicluster.SpectralBiclustering` ``fit`` method conforms + with API by accepting ``y`` and returning the object. :issue:`6126`, + :issue:`7814` by :user:`Laurent Direr ` and :user:`Maniteja + Nandana `. + +- Fix bug where :mod:`mixture` ``sample`` methods did not return as many + samples as requested. :issue:`7702` by :user:`Levi John Wolf `. + +- Fixed the shrinkage implementation in :class:`neighbors.NearestCentroid`. + :issue:`9219` by `Hanmin Qin `_. + Preprocessing and feature selection - - For sparse matrices, :func:`preprocessing.normalize` with ``return_norm=True`` - will now raise a ``NotImplementedError`` with 'l1' or 'l2' norm and with - norm 'max' the norms returned will be the same as for dense matrices. - :issue:`7771` by `Ang Lu `_. +- For sparse matrices, :func:`preprocessing.normalize` with ``return_norm=True`` + will now raise a ``NotImplementedError`` with 'l1' or 'l2' norm and with + norm 'max' the norms returned will be the same as for dense matrices. + :issue:`7771` by `Ang Lu `_. - - Fix a bug where :class:`feature_selection.SelectFdr` did not - exactly implement Benjamini-Hochberg procedure. It formerly may have - selected fewer features than it should. - :issue:`7490` by :user:`Peng Meng `. +- Fix a bug where :class:`feature_selection.SelectFdr` did not + exactly implement Benjamini-Hochberg procedure. It formerly may have + selected fewer features than it should. + :issue:`7490` by :user:`Peng Meng `. - - Fixed a bug where :class:`linear_model.RandomizedLasso` and - :class:`linear_model.RandomizedLogisticRegression` breaks for - sparse input. :issue:`8259` by :user:`Aman Dalmia `. +- Fixed a bug where :class:`linear_model.RandomizedLasso` and + :class:`linear_model.RandomizedLogisticRegression` breaks for + sparse input. :issue:`8259` by :user:`Aman Dalmia `. - - Fix a bug where :class:`feature_extraction.FeatureHasher` - mandatorily applied a sparse random projection to the hashed features, - preventing the use of - :class:`feature_extraction.text.HashingVectorizer` in a - pipeline with :class:`feature_extraction.text.TfidfTransformer`. - :issue:`7565` by :user:`Roman Yurchak `. +- Fix a bug where :class:`feature_extraction.FeatureHasher` + mandatorily applied a sparse random projection to the hashed features, + preventing the use of + :class:`feature_extraction.text.HashingVectorizer` in a + pipeline with :class:`feature_extraction.text.TfidfTransformer`. + :issue:`7565` by :user:`Roman Yurchak `. - - Fix a bug where :class:`feature_selection.mutual_info_regression` did not - correctly use ``n_neighbors``. :issue:`8181` by :user:`Guillaume Lemaitre - `. +- Fix a bug where :class:`feature_selection.mutual_info_regression` did not + correctly use ``n_neighbors``. :issue:`8181` by :user:`Guillaume Lemaitre + `. Model evaluation and meta-estimators - - Fixed a bug where :func:`model_selection.BaseSearchCV.inverse_transform` - returns ``self.best_estimator_.transform()`` instead of - ``self.best_estimator_.inverse_transform()``. - :issue:`8344` by :user:`Akshay Gupta ` and :user:`Rasmus Eriksson `. +- Fixed a bug where :func:`model_selection.BaseSearchCV.inverse_transform` + returns ``self.best_estimator_.transform()`` instead of + ``self.best_estimator_.inverse_transform()``. + :issue:`8344` by :user:`Akshay Gupta ` and :user:`Rasmus Eriksson `. + +- Added ``classes_`` attribute to :class:`model_selection.GridSearchCV`, + :class:`model_selection.RandomizedSearchCV`, :class:`grid_search.GridSearchCV`, + and :class:`grid_search.RandomizedSearchCV` that matches the ``classes_`` + attribute of ``best_estimator_``. :issue:`7661` and :issue:`8295` + by :user:`Alyssa Batula `, :user:`Dylan Werner-Meier `, + and :user:`Stephen Hoover `. - - Added ``classes_`` attribute to :class:`model_selection.GridSearchCV`, - :class:`model_selection.RandomizedSearchCV`, :class:`grid_search.GridSearchCV`, - and :class:`grid_search.RandomizedSearchCV` that matches the ``classes_`` - attribute of ``best_estimator_``. :issue:`7661` and :issue:`8295` - by :user:`Alyssa Batula `, :user:`Dylan Werner-Meier `, - and :user:`Stephen Hoover `. +- Fixed a bug where :func:`model_selection.validation_curve` + reused the same estimator for each parameter value. + :issue:`7365` by :user:`Aleksandr Sandrovskii `. - - Fixed a bug where :func:`model_selection.validation_curve` - reused the same estimator for each parameter value. - :issue:`7365` by :user:`Aleksandr Sandrovskii `. +- :func:`model_selection.permutation_test_score` now works with Pandas + types. :issue:`5697` by :user:`Stijn Tonk `. - - :func:`model_selection.permutation_test_score` now works with Pandas - types. :issue:`5697` by :user:`Stijn Tonk `. +- Several fixes to input validation in + :class:`multiclass.OutputCodeClassifier` + :issue:`8086` by `Andreas Müller`_. - - Several fixes to input validation in - :class:`multiclass.OutputCodeClassifier` - :issue:`8086` by `Andreas Müller`_. +- :class:`multiclass.OneVsOneClassifier`'s ``partial_fit`` now ensures all + classes are provided up-front. :issue:`6250` by + :user:`Asish Panda `. - - :class:`multiclass.OneVsOneClassifier`'s ``partial_fit`` now ensures all - classes are provided up-front. :issue:`6250` by - :user:`Asish Panda `. +- Fix :func:`multioutput.MultiOutputClassifier.predict_proba` to return a + list of 2d arrays, rather than a 3d array. In the case where different + target columns had different numbers of classes, a ``ValueError`` would be + raised on trying to stack matrices with different dimensions. + :issue:`8093` by :user:`Peter Bull `. - - Fix :func:`multioutput.MultiOutputClassifier.predict_proba` to return a - list of 2d arrays, rather than a 3d array. In the case where different - target columns had different numbers of classes, a ``ValueError`` would be - raised on trying to stack matrices with different dimensions. - :issue:`8093` by :user:`Peter Bull `. +- Cross validation now works with Pandas datatypes that that have a + read-only index. :issue:`9507` by `Loic Esteve`_. Metrics - - :func:`metrics.average_precision_score` no longer linearly - interpolates between operating points, and instead weighs precisions - by the change in recall since the last operating point, as per the - `Wikipedia entry `_. - (`#7356 `_). By - :user:`Nick Dingwall ` and `Gael Varoquaux`_. +- :func:`metrics.average_precision_score` no longer linearly + interpolates between operating points, and instead weighs precisions + by the change in recall since the last operating point, as per the + `Wikipedia entry `_. + (`#7356 `_). By + :user:`Nick Dingwall ` and `Gael Varoquaux`_. - - Fix a bug in :func:`metrics.classification._check_targets` - which would return ``'binary'`` if ``y_true`` and ``y_pred`` were - both ``'binary'`` but the union of ``y_true`` and ``y_pred`` was - ``'multiclass'``. :issue:`8377` by `Loic Esteve`_. +- Fix a bug in :func:`metrics.classification._check_targets` + which would return ``'binary'`` if ``y_true`` and ``y_pred`` were + both ``'binary'`` but the union of ``y_true`` and ``y_pred`` was + ``'multiclass'``. :issue:`8377` by `Loic Esteve`_. - - Fixed an integer overflow bug in :func:`metrics.confusion_matrix` and - hence :func:`metrics.cohen_kappa_score`. :issue:`8354`, :issue:`7929` - by `Joel Nothman`_ and :user:`Jon Crall `. +- Fixed an integer overflow bug in :func:`metrics.confusion_matrix` and + hence :func:`metrics.cohen_kappa_score`. :issue:`8354`, :issue:`7929` + by `Joel Nothman`_ and :user:`Jon Crall `. - - Fixed passing of ``gamma`` parameter to the ``chi2`` kernel in - :func:`metrics.pairwise.pairwise_kernels` :issue:`5211` by - :user:`Nick Rhinehart `, - :user:`Saurabh Bansod ` and `Andreas Müller`_. +- Fixed passing of ``gamma`` parameter to the ``chi2`` kernel in + :func:`metrics.pairwise.pairwise_kernels` :issue:`5211` by + :user:`Nick Rhinehart `, + :user:`Saurabh Bansod ` and `Andreas Müller`_. Miscellaneous - - Fixed a bug when :func:`datasets.make_classification` fails - when generating more than 30 features. :issue:`8159` by - :user:`Herilalaina Rakotoarison `. +- Fixed a bug when :func:`datasets.make_classification` fails + when generating more than 30 features. :issue:`8159` by + :user:`Herilalaina Rakotoarison `. - - Fixed a bug where :func:`datasets.make_moons` gives an - incorrect result when ``n_samples`` is odd. - :issue:`8198` by :user:`Josh Levy `. +- Fixed a bug where :func:`datasets.make_moons` gives an + incorrect result when ``n_samples`` is odd. + :issue:`8198` by :user:`Josh Levy `. - - Some ``fetch_`` functions in :mod:`datasets` were ignoring the - ``download_if_missing`` keyword. :issue:`7944` by :user:`Ralf Gommers `. +- Some ``fetch_`` functions in :mod:`datasets` were ignoring the + ``download_if_missing`` keyword. :issue:`7944` by :user:`Ralf Gommers `. - - Fix estimators to accept a ``sample_weight`` parameter of type - ``pandas.Series`` in their ``fit`` function. :issue:`7825` by - `Kathleen Chen`_. +- Fix estimators to accept a ``sample_weight`` parameter of type + ``pandas.Series`` in their ``fit`` function. :issue:`7825` by + `Kathleen Chen`_. - - Fix a bug in cases where ``numpy.cumsum`` may be numerically unstable, - raising an exception if instability is identified. :issue:`7376` and - :issue:`7331` by `Joel Nothman`_ and :user:`yangarbiter`. +- Fix a bug in cases where ``numpy.cumsum`` may be numerically unstable, + raising an exception if instability is identified. :issue:`7376` and + :issue:`7331` by `Joel Nothman`_ and :user:`yangarbiter`. - - Fix a bug where :meth:`base.BaseEstimator.__getstate__` - obstructed pickling customizations of child-classes, when used in a - multiple inheritance context. - :issue:`8316` by :user:`Holger Peters `. +- Fix a bug where :meth:`base.BaseEstimator.__getstate__` + obstructed pickling customizations of child-classes, when used in a + multiple inheritance context. + :issue:`8316` by :user:`Holger Peters `. - - Update Sphinx-Gallery from 0.1.4 to 0.1.7 for resolving links in - documentation build with Sphinx>1.5 :issue:`8010`, :issue:`7986` by - :user:`Oscar Najera ` +- Update Sphinx-Gallery from 0.1.4 to 0.1.7 for resolving links in + documentation build with Sphinx>1.5 :issue:`8010`, :issue:`7986` by + :user:`Oscar Najera ` - - Add ``data_home`` parameter to :func:`sklearn.datasets.fetch_kddcup99`. - :issue:`9289` by `Loic Esteve`_. +- Add ``data_home`` parameter to :func:`sklearn.datasets.fetch_kddcup99`. + :issue:`9289` by `Loic Esteve`_. - - Fix dataset loaders using Python 3 version of makedirs to also work in - Python 2. :issue:`9284` by :user:`Sebastin Santy `. +- Fix dataset loaders using Python 3 version of makedirs to also work in + Python 2. :issue:`9284` by :user:`Sebastin Santy `. - - Several minor issues were fixed with thanks to the alerts of - [lgtm.com](http://lgtm.com). :issue:`9278` by :user:`Jean Helie `, - among others. +- Several minor issues were fixed with thanks to the alerts of + [lgtm.com](http://lgtm.com). :issue:`9278` by :user:`Jean Helie `, + among others. API changes summary ------------------- Trees and ensembles - - Gradient boosting base models are no longer estimators. By `Andreas Müller`_. +- Gradient boosting base models are no longer estimators. By `Andreas Müller`_. - - All tree based estimators now accept a ``min_impurity_decrease`` - parameter in lieu of the ``min_impurity_split``, which is now deprecated. - The ``min_impurity_decrease`` helps stop splitting the nodes in which - the weighted impurity decrease from splitting is no longer alteast - ``min_impurity_decrease``. :issue:`8449` by `Raghav RV`_. +- All tree based estimators now accept a ``min_impurity_decrease`` + parameter in lieu of the ``min_impurity_split``, which is now deprecated. + The ``min_impurity_decrease`` helps stop splitting the nodes in which + the weighted impurity decrease from splitting is no longer alteast + ``min_impurity_decrease``. :issue:`8449` by `Raghav RV`_. Linear, kernelized and related models - - ``n_iter`` parameter is deprecated in :class:`linear_model.SGDClassifier`, - :class:`linear_model.SGDRegressor`, - :class:`linear_model.PassiveAggressiveClassifier`, - :class:`linear_model.PassiveAggressiveRegressor` and - :class:`linear_model.Perceptron`. By `Tom Dupre la Tour`_. +- ``n_iter`` parameter is deprecated in :class:`linear_model.SGDClassifier`, + :class:`linear_model.SGDRegressor`, + :class:`linear_model.PassiveAggressiveClassifier`, + :class:`linear_model.PassiveAggressiveRegressor` and + :class:`linear_model.Perceptron`. By `Tom Dupre la Tour`_. Other predictors - - :class:`neighbors.LSHForest` has been deprecated and will be - removed in 0.21 due to poor performance. - :issue:`9078` by :user:`Laurent Direr `. +- :class:`neighbors.LSHForest` has been deprecated and will be + removed in 0.21 due to poor performance. + :issue:`9078` by :user:`Laurent Direr `. - - :class:`neighbors.NearestCentroid` no longer purports to support - ``metric='precomputed'`` which now raises an error. :issue:`8515` by - :user:`Sergul Aydore `. +- :class:`neighbors.NearestCentroid` no longer purports to support + ``metric='precomputed'`` which now raises an error. :issue:`8515` by + :user:`Sergul Aydore `. - - The ``alpha`` parameter of :class:`semi_supervised.LabelPropagation` now - has no effect and is deprecated to be removed in 0.21. :issue:`9239` - by :user:`Andre Ambrosio Boechat `, :user:`Utkarsh Upadhyay - `, and `Joel Nothman`_. +- The ``alpha`` parameter of :class:`semi_supervised.LabelPropagation` now + has no effect and is deprecated to be removed in 0.21. :issue:`9239` + by :user:`Andre Ambrosio Boechat `, :user:`Utkarsh Upadhyay + `, and `Joel Nothman`_. Decomposition, manifold learning and clustering - - Deprecate the ``doc_topic_distr`` argument of the ``perplexity`` method - in :class:`decomposition.LatentDirichletAllocation` because the - user no longer has access to the unnormalized document topic distribution - needed for the perplexity calculation. :issue:`7954` by - :user:`Gary Foreman `. +- Deprecate the ``doc_topic_distr`` argument of the ``perplexity`` method + in :class:`decomposition.LatentDirichletAllocation` because the + user no longer has access to the unnormalized document topic distribution + needed for the perplexity calculation. :issue:`7954` by + :user:`Gary Foreman `. - - The ``n_topics`` parameter of :class:`decomposition.LatentDirichletAllocation` - has been renamed to ``n_components`` and will be removed in version 0.21. - :issue:`8922` by :user:`Attractadore`. +- The ``n_topics`` parameter of :class:`decomposition.LatentDirichletAllocation` + has been renamed to ``n_components`` and will be removed in version 0.21. + :issue:`8922` by :user:`Attractadore`. - - :meth:`decomposition.SparsePCA.transform`'s ``ridge_alpha`` parameter is - deprecated in preference for class parameter. - :issue:`8137` by :user:`Naoya Kanai `. +- :meth:`decomposition.SparsePCA.transform`'s ``ridge_alpha`` parameter is + deprecated in preference for class parameter. + :issue:`8137` by :user:`Naoya Kanai `. - - :class:`cluster.DBSCAN` now has a ``metric_params`` parameter. - :issue:`8139` by :user:`Naoya Kanai `. +- :class:`cluster.DBSCAN` now has a ``metric_params`` parameter. + :issue:`8139` by :user:`Naoya Kanai `. Preprocessing and feature selection - - :class:`feature_selection.SelectFromModel` now has a ``partial_fit`` - method only if the underlying estimator does. By `Andreas Müller`_. +- :class:`feature_selection.SelectFromModel` now has a ``partial_fit`` + method only if the underlying estimator does. By `Andreas Müller`_. - - :class:`feature_selection.SelectFromModel` now validates the ``threshold`` - parameter and sets the ``threshold_`` attribute during the call to - ``fit``, and no longer during the call to ``transform```. By `Andreas - Müller`_. +- :class:`feature_selection.SelectFromModel` now validates the ``threshold`` + parameter and sets the ``threshold_`` attribute during the call to + ``fit``, and no longer during the call to ``transform```. By `Andreas + Müller`_. - - The ``non_negative`` parameter in :class:`feature_extraction.FeatureHasher` - has been deprecated, and replaced with a more principled alternative, - ``alternate_sign``. - :issue:`7565` by :user:`Roman Yurchak `. +- The ``non_negative`` parameter in :class:`feature_extraction.FeatureHasher` + has been deprecated, and replaced with a more principled alternative, + ``alternate_sign``. + :issue:`7565` by :user:`Roman Yurchak `. - - :class:`linear_model.RandomizedLogisticRegression`, - and :class:`linear_model.RandomizedLasso` have been deprecated and will - be removed in version 0.21. - :issue:`8995` by :user:`Ramana.S `. +- :class:`linear_model.RandomizedLogisticRegression`, + and :class:`linear_model.RandomizedLasso` have been deprecated and will + be removed in version 0.21. + :issue:`8995` by :user:`Ramana.S `. Model evaluation and meta-estimators - - Deprecate the ``fit_params`` constructor input to the - :class:`model_selection.GridSearchCV` and - :class:`model_selection.RandomizedSearchCV` in favor - of passing keyword parameters to the ``fit`` methods - of those classes. Data-dependent parameters needed for model - training should be passed as keyword arguments to ``fit``, - and conforming to this convention will allow the hyperparameter - selection classes to be used with tools such as - :func:`model_selection.cross_val_predict`. - :issue:`2879` by :user:`Stephen Hoover `. - - - In version 0.21, the default behavior of splitters that use the - ``test_size`` and ``train_size`` parameter will change, such that - specifying ``train_size`` alone will cause ``test_size`` to be the - remainder. :issue:`7459` by :user:`Nelson Liu `. - - - :class:`multiclass.OneVsRestClassifier` now has ``partial_fit``, - ``decision_function`` and ``predict_proba`` methods only when the - underlying estimator does. :issue:`7812` by `Andreas Müller`_ and - :user:`Mikhail Korobov `. - - - :class:`multiclass.OneVsRestClassifier` now has a ``partial_fit`` method - only if the underlying estimator does. By `Andreas Müller`_. - - - The ``decision_function`` output shape for binary classification in - :class:`multiclass.OneVsRestClassifier` and - :class:`multiclass.OneVsOneClassifier` is now ``(n_samples,)`` to conform - to scikit-learn conventions. :issue:`9100` by `Andreas Müller`_. - - - The :func:`multioutput.MultiOutputClassifier.predict_proba` - function used to return a 3d array (``n_samples``, ``n_classes``, - ``n_outputs``). In the case where different target columns had different - numbers of classes, a ``ValueError`` would be raised on trying to stack - matrices with different dimensions. This function now returns a list of - arrays where the length of the list is ``n_outputs``, and each array is - (``n_samples``, ``n_classes``) for that particular output. - :issue:`8093` by :user:`Peter Bull `. - - - Replace attribute ``named_steps`` ``dict`` to :class:`utils.Bunch` - in :class:`pipeline.Pipeline` to enable tab completion in interactive - environment. In the case conflict value on ``named_steps`` and ``dict`` - attribute, ``dict`` behavior will be prioritized. - :issue:`8481` by :user:`Herilalaina Rakotoarison `. +- Deprecate the ``fit_params`` constructor input to the + :class:`model_selection.GridSearchCV` and + :class:`model_selection.RandomizedSearchCV` in favor + of passing keyword parameters to the ``fit`` methods + of those classes. Data-dependent parameters needed for model + training should be passed as keyword arguments to ``fit``, + and conforming to this convention will allow the hyperparameter + selection classes to be used with tools such as + :func:`model_selection.cross_val_predict`. + :issue:`2879` by :user:`Stephen Hoover `. + +- In version 0.21, the default behavior of splitters that use the + ``test_size`` and ``train_size`` parameter will change, such that + specifying ``train_size`` alone will cause ``test_size`` to be the + remainder. :issue:`7459` by :user:`Nelson Liu `. + +- :class:`multiclass.OneVsRestClassifier` now has ``partial_fit``, + ``decision_function`` and ``predict_proba`` methods only when the + underlying estimator does. :issue:`7812` by `Andreas Müller`_ and + :user:`Mikhail Korobov `. + +- :class:`multiclass.OneVsRestClassifier` now has a ``partial_fit`` method + only if the underlying estimator does. By `Andreas Müller`_. + +- The ``decision_function`` output shape for binary classification in + :class:`multiclass.OneVsRestClassifier` and + :class:`multiclass.OneVsOneClassifier` is now ``(n_samples,)`` to conform + to scikit-learn conventions. :issue:`9100` by `Andreas Müller`_. + +- The :func:`multioutput.MultiOutputClassifier.predict_proba` + function used to return a 3d array (``n_samples``, ``n_classes``, + ``n_outputs``). In the case where different target columns had different + numbers of classes, a ``ValueError`` would be raised on trying to stack + matrices with different dimensions. This function now returns a list of + arrays where the length of the list is ``n_outputs``, and each array is + (``n_samples``, ``n_classes``) for that particular output. + :issue:`8093` by :user:`Peter Bull `. + +- Replace attribute ``named_steps`` ``dict`` to :class:`utils.Bunch` + in :class:`pipeline.Pipeline` to enable tab completion in interactive + environment. In the case conflict value on ``named_steps`` and ``dict`` + attribute, ``dict`` behavior will be prioritized. + :issue:`8481` by :user:`Herilalaina Rakotoarison `. Miscellaneous - - Deprecate the ``y`` parameter in ``transform`` and ``inverse_transform``. - The method should not accept ``y`` parameter, as it's used at the prediction time. - :issue:`8174` by :user:`Tahar Zanouda `, `Alexandre Gramfort`_ - and `Raghav RV`_. - - - SciPy >= 0.13.3 and NumPy >= 1.8.2 are now the minimum supported versions - for scikit-learn. The following backported functions in - :mod:`utils` have been removed or deprecated accordingly. - :issue:`8854` and :issue:`8874` by :user:`Naoya Kanai ` - - Removed in 0.19: - - - ``utils.fixes.argpartition`` - - ``utils.fixes.array_equal`` - - ``utils.fixes.astype`` - - ``utils.fixes.bincount`` - - ``utils.fixes.expit`` - - ``utils.fixes.frombuffer_empty`` - - ``utils.fixes.in1d`` - - ``utils.fixes.norm`` - - ``utils.fixes.rankdata`` - - ``utils.fixes.safe_copy`` - - Deprecated in 0.19, to be removed in 0.21: - - - ``utils.arpack.eigs`` - - ``utils.arpack.eigsh`` - - ``utils.arpack.svds`` - - ``utils.extmath.fast_dot`` - - ``utils.extmath.logsumexp`` - - ``utils.extmath.norm`` - - ``utils.extmath.pinvh`` - - ``utils.graph.graph_laplacian`` - - ``utils.random.choice`` - - ``utils.sparsetools.connected_components`` - - ``utils.stats.rankdata`` - - - Estimators with both methods ``decision_function`` and ``predict_proba`` - are now required to have a monotonic relation between them. The - method ``check_decision_proba_consistency`` has been added in - **utils.estimator_checks** to check their consistency. - :issue:`7578` by :user:`Shubham Bhardwaj ` - - - All checks in ``utils.estimator_checks``, in particular - :func:`utils.estimator_checks.check_estimator` now accept estimator - instances. Most other checks do not accept - estimator classes any more. :issue:`9019` by `Andreas Müller`_. - - - Ensure that estimators' attributes ending with ``_`` are not set - in the constructor but only in the ``fit`` method. Most notably, - ensemble estimators (deriving from :class:`ensemble.BaseEnsemble`) - now only have ``self.estimators_`` available after ``fit``. - :issue:`7464` by `Lars Buitinck`_ and `Loic Esteve`_. - +- Deprecate the ``y`` parameter in ``transform`` and ``inverse_transform``. + The method should not accept ``y`` parameter, as it's used at the prediction time. + :issue:`8174` by :user:`Tahar Zanouda `, `Alexandre Gramfort`_ + and `Raghav RV`_. + +- SciPy >= 0.13.3 and NumPy >= 1.8.2 are now the minimum supported versions + for scikit-learn. The following backported functions in + :mod:`utils` have been removed or deprecated accordingly. + :issue:`8854` and :issue:`8874` by :user:`Naoya Kanai ` + +- The ``store_covariances`` and ``covariances_`` parameters of + :class:`discriminant_analysis.QuadraticDiscriminantAnalysis` + has been renamed to ``store_covariance`` and ``covariance_`` to be + consistent with the corresponding parameter names of the + :class:`discriminant_analysis.LinearDiscriminantAnalysis`. They will be + removed in version 0.21. :issue:`7998` by :user:`Jiacheng ` + + Removed in 0.19: + + - ``utils.fixes.argpartition`` + - ``utils.fixes.array_equal`` + - ``utils.fixes.astype`` + - ``utils.fixes.bincount`` + - ``utils.fixes.expit`` + - ``utils.fixes.frombuffer_empty`` + - ``utils.fixes.in1d`` + - ``utils.fixes.norm`` + - ``utils.fixes.rankdata`` + - ``utils.fixes.safe_copy`` + + Deprecated in 0.19, to be removed in 0.21: + + - ``utils.arpack.eigs`` + - ``utils.arpack.eigsh`` + - ``utils.arpack.svds`` + - ``utils.extmath.fast_dot`` + - ``utils.extmath.logsumexp`` + - ``utils.extmath.norm`` + - ``utils.extmath.pinvh`` + - ``utils.graph.graph_laplacian`` + - ``utils.random.choice`` + - ``utils.sparsetools.connected_components`` + - ``utils.stats.rankdata`` + +- Estimators with both methods ``decision_function`` and ``predict_proba`` + are now required to have a monotonic relation between them. The + method ``check_decision_proba_consistency`` has been added in + **utils.estimator_checks** to check their consistency. + :issue:`7578` by :user:`Shubham Bhardwaj ` + +- All checks in ``utils.estimator_checks``, in particular + :func:`utils.estimator_checks.check_estimator` now accept estimator + instances. Most other checks do not accept + estimator classes any more. :issue:`9019` by `Andreas Müller`_. + +- Ensure that estimators' attributes ending with ``_`` are not set + in the constructor but only in the ``fit`` method. Most notably, + ensemble estimators (deriving from :class:`ensemble.BaseEnsemble`) + now only have ``self.estimators_`` available after ``fit``. + :issue:`7464` by `Lars Buitinck`_ and `Loic Esteve`_. + + +Code and Documentation Contributors +----------------------------------- + +Thanks to everyone who has contributed to the maintenance and improvement of the +project since version 0.18, including: + +Joel Nothman, Loic Esteve, Andreas Mueller, Guillaume Lemaitre, Olivier Grisel, +Hanmin Qin, Raghav RV, Alexandre Gramfort, themrmax, Aman Dalmia, Gael +Varoquaux, Naoya Kanai, Tom Dupré la Tour, Rishikesh, Nelson Liu, Taehoon Lee, +Nelle Varoquaux, Aashil, Mikhail Korobov, Sebastin Santy, Joan Massich, Roman +Yurchak, RAKOTOARISON Herilalaina, Thierry Guillemot, Alexandre Abadie, Carol +Willing, Balakumaran Manoharan, Josh Karnofsky, Vlad Niculae, Utkarsh Upadhyay, +Dmitry Petrov, Minghui Liu, Srivatsan, Vincent Pham, Albert Thomas, Jake +VanderPlas, Attractadore, JC Liu, alexandercbooth, chkoar, Óscar Nájera, +Aarshay Jain, Kyle Gilliam, Ramana Subramanyam, CJ Carey, Clement Joudet, David +Robles, He Chen, Joris Van den Bossche, Karan Desai, Katie Luangkote, Leland +McInnes, Maniteja Nandana, Michele Lacchia, Sergei Lebedev, Shubham Bhardwaj, +akshay0724, omtcyfz, rickiepark, waterponey, Vathsala Achar, jbDelafosse, Ralf +Gommers, Ekaterina Krivich, Vivek Kumar, Ishank Gulati, Dave Elliott, ldirer, +Reiichiro Nakano, Levi John Wolf, Mathieu Blondel, Sid Kapur, Dougal J. +Sutherland, midinas, mikebenfield, Sourav Singh, Aseem Bansal, Ibraim Ganiev, +Stephen Hoover, AishwaryaRK, Steven C. Howell, Gary Foreman, Neeraj Gangwar, +Tahar, Jon Crall, dokato, Kathy Chen, ferria, Thomas Moreau, Charlie Brummitt, +Nicolas Goix, Adam Kleczewski, Sam Shleifer, Nikita Singh, Basil Beirouti, +Giorgio Patrini, Manoj Kumar, Rafael Possas, James Bourbeau, James A. Bednar, +Janine Harper, Jaye, Jean Helie, Jeremy Steward, Artsiom, John Wei, Jonathan +LIgo, Jonathan Rahn, seanpwilliams, Arthur Mensch, Josh Levy, Julian Kuhlmann, +Julien Aubert, Jörn Hees, Kai, shivamgargsya, Kat Hempstalk, Kaushik +Lakshmikanth, Kennedy, Kenneth Lyons, Kenneth Myers, Kevin Yap, Kirill Bobyrev, +Konstantin Podshumok, Arthur Imbert, Lee Murray, toastedcornflakes, Lera, Li +Li, Arthur Douillard, Mainak Jas, tobycheese, Manraj Singh, Manvendra Singh, +Marc Meketon, MarcoFalke, Matthew Brett, Matthias Gilch, Mehul Ahuja, Melanie +Goetz, Meng, Peng, Michael Dezube, Michal Baumgartner, vibrantabhi19, Artem +Golubin, Milen Paskov, Antonin Carette, Morikko, MrMjauh, NALEPA Emmanuel, +Namiya, Antoine Wendlinger, Narine Kokhlikyan, NarineK, Nate Guerin, Angus +Williams, Ang Lu, Nicole Vavrova, Nitish Pandey, Okhlopkov Daniil Olegovich, +Andy Craze, Om Prakash, Parminder Singh, Patrick Carlson, Patrick Pei, Paul +Ganssle, Paulo Haddad, Paweł Lorek, Peng Yu, Pete Bachant, Peter Bull, Peter +Csizsek, Peter Wang, Pieter Arthur de Jong, Ping-Yao, Chang, Preston Parry, +Puneet Mathur, Quentin Hibon, Andrew Smith, Andrew Jackson, 1kastner, Rameshwar +Bhaskaran, Rebecca Bilbro, Remi Rampin, Andrea Esuli, Rob Hall, Robert +Bradshaw, Romain Brault, Aman Pratik, Ruifeng Zheng, Russell Smith, Sachin +Agarwal, Sailesh Choyal, Samson Tan, Samuël Weber, Sarah Brown, Sebastian +Pölsterl, Sebastian Raschka, Sebastian Saeger, Alyssa Batula, Abhyuday Pratap +Singh, Sergey Feldman, Sergul Aydore, Sharan Yalburgi, willduan, Siddharth +Gupta, Sri Krishna, Almer, Stijn Tonk, Allen Riddell, Theofilos Papapanagiotou, +Alison, Alexis Mignon, Tommy Boucher, Tommy Löfstedt, Toshihiro Kamishima, +Tyler Folkman, Tyler Lanigan, Alexander Junge, Varun Shenoy, Victor Poughon, +Vilhelm von Ehrenheim, Aleksandr Sandrovskii, Alan Yee, Vlasios Vasileiou, +Warut Vijitbenjaronk, Yang Zhang, Yaroslav Halchenko, Yichuan Liu, Yuichi +Fujikawa, affanv14, aivision2020, xor, andreh7, brady salz, campustrampus, +Agamemnon Krasoulis, ditenberg, elena-sharova, filipj8, fukatani, gedeck, +guiniol, guoci, hakaa1, hongkahjun, i-am-xhy, jakirkham, jaroslaw-weber, +jayzed82, jeroko, jmontoyam, jonathan.striebel, josephsalmon, jschendel, +leereeves, martin-hahn, mathurinm, mehak-sachdeva, mlewis1729, mlliou112, +mthorrell, ndingwall, nuffe, yangarbiter, plagree, pldtc325, Breno Freitas, +Brett Olsen, Brian A. Alfano, Brian Burns, polmauri, Brandon Carter, Charlton +Austin, Chayant T15h, Chinmaya Pancholi, Christian Danielsen, Chung Yen, +Chyi-Kwei Yau, pravarmahajan, DOHMATOB Elvis, Daniel LeJeune, Daniel Hnyk, +Darius Morawiec, David DeTomaso, David Gasquez, David Haberthür, David +Heryanto, David Kirkby, David Nicholson, rashchedrin, Deborah Gertrude Digges, +Denis Engemann, Devansh D, Dickson, Bob Baxley, Don86, E. Lynch-Klarup, Ed +Rogers, Elizabeth Ferriss, Ellen-Co2, Fabian Egli, Fang-Chieh Chou, Bing Tian +Dai, Greg Stupp, Grzegorz Szpak, Bertrand Thirion, Hadrien Bertrand, Harizo +Rajaona, zxcvbnius, Henry Lin, Holger Peters, Icyblade Dai, Igor +Andriushchenko, Ilya, Isaac Laughlin, Iván Vallés, Aurélien Bellet, JPFrancoia, +Jacob Schreiber, Asish Mahapatra .. _changes_0_18_2: @@ -850,11 +970,11 @@ Version 0.18.2 Changelog --------- - - Fixes for compatibility with NumPy 1.13.0: :issue:`7946` :issue:`8355` by - `Loic Esteve`_. +- Fixes for compatibility with NumPy 1.13.0: :issue:`7946` :issue:`8355` by + `Loic Esteve`_. - - Minor compatibility changes in the examples :issue:`9010` :issue:`8040` - :issue:`9149`. +- Minor compatibility changes in the examples :issue:`9010` :issue:`8040` + :issue:`9149`. Code Contributors ----------------- @@ -874,132 +994,132 @@ Changelog Enhancements ............ - - Improved ``sample_without_replacement`` speed by utilizing - numpy.random.permutation for most cases. As a result, - samples may differ in this release for a fixed random state. - Affected estimators: +- Improved ``sample_without_replacement`` speed by utilizing + numpy.random.permutation for most cases. As a result, + samples may differ in this release for a fixed random state. + Affected estimators: - - :class:`ensemble.BaggingClassifier` - - :class:`ensemble.BaggingRegressor` - - :class:`linear_model.RANSACRegressor` - - :class:`model_selection.RandomizedSearchCV` - - :class:`random_projection.SparseRandomProjection` + - :class:`ensemble.BaggingClassifier` + - :class:`ensemble.BaggingRegressor` + - :class:`linear_model.RANSACRegressor` + - :class:`model_selection.RandomizedSearchCV` + - :class:`random_projection.SparseRandomProjection` - This also affects the :meth:`datasets.make_classification` - method. + This also affects the :meth:`datasets.make_classification` + method. Bug fixes ......... - - Fix issue where ``min_grad_norm`` and ``n_iter_without_progress`` - parameters were not being utilised by :class:`manifold.TSNE`. - :issue:`6497` by :user:`Sebastian Säger ` - - - Fix bug for svm's decision values when ``decision_function_shape`` - is ``ovr`` in :class:`svm.SVC`. - :class:`svm.SVC`'s decision_function was incorrect from versions - 0.17.0 through 0.18.0. - :issue:`7724` by `Bing Tian Dai`_ - - - Attribute ``explained_variance_ratio`` of - :class:`discriminant_analysis.LinearDiscriminantAnalysis` calculated - with SVD and Eigen solver are now of the same length. :issue:`7632` - by :user:`JPFrancoia ` - - - Fixes issue in :ref:`univariate_feature_selection` where score - functions were not accepting multi-label targets. :issue:`7676` - by :user:`Mohammed Affan ` - - - Fixed setting parameters when calling ``fit`` multiple times on - :class:`feature_selection.SelectFromModel`. :issue:`7756` by `Andreas Müller`_ - - - Fixes issue in ``partial_fit`` method of - :class:`multiclass.OneVsRestClassifier` when number of classes used in - ``partial_fit`` was less than the total number of classes in the - data. :issue:`7786` by `Srivatsan Ramesh`_ - - - Fixes issue in :class:`calibration.CalibratedClassifierCV` where - the sum of probabilities of each class for a data was not 1, and - ``CalibratedClassifierCV`` now handles the case where the training set - has less number of classes than the total data. :issue:`7799` by - `Srivatsan Ramesh`_ - - - Fix a bug where :class:`sklearn.feature_selection.SelectFdr` did not - exactly implement Benjamini-Hochberg procedure. It formerly may have - selected fewer features than it should. - :issue:`7490` by :user:`Peng Meng `. - - - :class:`sklearn.manifold.LocallyLinearEmbedding` now correctly handles - integer inputs. :issue:`6282` by `Jake Vanderplas`_. - - - The ``min_weight_fraction_leaf`` parameter of tree-based classifiers and - regressors now assumes uniform sample weights by default if the - ``sample_weight`` argument is not passed to the ``fit`` function. - Previously, the parameter was silently ignored. :issue:`7301` - by :user:`Nelson Liu `. - - - Numerical issue with :class:`linear_model.RidgeCV` on centered data when - `n_features > n_samples`. :issue:`6178` by `Bertrand Thirion`_ - - - Tree splitting criterion classes' cloning/pickling is now memory safe - :issue:`7680` by :user:`Ibraim Ganiev `. - - - Fixed a bug where :class:`decomposition.NMF` sets its ``n_iters_`` - attribute in `transform()`. :issue:`7553` by :user:`Ekaterina - Krivich `. - - - :class:`sklearn.linear_model.LogisticRegressionCV` now correctly handles - string labels. :issue:`5874` by `Raghav RV`_. - - - Fixed a bug where :func:`sklearn.model_selection.train_test_split` raised - an error when ``stratify`` is a list of string labels. :issue:`7593` by - `Raghav RV`_. - - - Fixed a bug where :class:`sklearn.model_selection.GridSearchCV` and - :class:`sklearn.model_selection.RandomizedSearchCV` were not pickleable - because of a pickling bug in ``np.ma.MaskedArray``. :issue:`7594` by - `Raghav RV`_. - - - All cross-validation utilities in :mod:`sklearn.model_selection` now - permit one time cross-validation splitters for the ``cv`` parameter. Also - non-deterministic cross-validation splitters (where multiple calls to - ``split`` produce dissimilar splits) can be used as ``cv`` parameter. - The :class:`sklearn.model_selection.GridSearchCV` will cross-validate each - parameter setting on the split produced by the first ``split`` call - to the cross-validation splitter. :issue:`7660` by `Raghav RV`_. - - - Fix bug where :meth:`preprocessing.MultiLabelBinarizer.fit_transform` - returned an invalid CSR matrix. - :issue:`7750` by :user:`CJ Carey `. - - - Fixed a bug where :func:`metrics.pairwise.cosine_distances` could return a - small negative distance. :issue:`7732` by :user:`Artsion `. +- Fix issue where ``min_grad_norm`` and ``n_iter_without_progress`` + parameters were not being utilised by :class:`manifold.TSNE`. + :issue:`6497` by :user:`Sebastian Säger ` + +- Fix bug for svm's decision values when ``decision_function_shape`` + is ``ovr`` in :class:`svm.SVC`. + :class:`svm.SVC`'s decision_function was incorrect from versions + 0.17.0 through 0.18.0. + :issue:`7724` by `Bing Tian Dai`_ + +- Attribute ``explained_variance_ratio`` of + :class:`discriminant_analysis.LinearDiscriminantAnalysis` calculated + with SVD and Eigen solver are now of the same length. :issue:`7632` + by :user:`JPFrancoia ` + +- Fixes issue in :ref:`univariate_feature_selection` where score + functions were not accepting multi-label targets. :issue:`7676` + by :user:`Mohammed Affan ` + +- Fixed setting parameters when calling ``fit`` multiple times on + :class:`feature_selection.SelectFromModel`. :issue:`7756` by `Andreas Müller`_ + +- Fixes issue in ``partial_fit`` method of + :class:`multiclass.OneVsRestClassifier` when number of classes used in + ``partial_fit`` was less than the total number of classes in the + data. :issue:`7786` by `Srivatsan Ramesh`_ + +- Fixes issue in :class:`calibration.CalibratedClassifierCV` where + the sum of probabilities of each class for a data was not 1, and + ``CalibratedClassifierCV`` now handles the case where the training set + has less number of classes than the total data. :issue:`7799` by + `Srivatsan Ramesh`_ + +- Fix a bug where :class:`sklearn.feature_selection.SelectFdr` did not + exactly implement Benjamini-Hochberg procedure. It formerly may have + selected fewer features than it should. + :issue:`7490` by :user:`Peng Meng `. + +- :class:`sklearn.manifold.LocallyLinearEmbedding` now correctly handles + integer inputs. :issue:`6282` by `Jake Vanderplas`_. + +- The ``min_weight_fraction_leaf`` parameter of tree-based classifiers and + regressors now assumes uniform sample weights by default if the + ``sample_weight`` argument is not passed to the ``fit`` function. + Previously, the parameter was silently ignored. :issue:`7301` + by :user:`Nelson Liu `. + +- Numerical issue with :class:`linear_model.RidgeCV` on centered data when + `n_features > n_samples`. :issue:`6178` by `Bertrand Thirion`_ + +- Tree splitting criterion classes' cloning/pickling is now memory safe + :issue:`7680` by :user:`Ibraim Ganiev `. + +- Fixed a bug where :class:`decomposition.NMF` sets its ``n_iters_`` + attribute in `transform()`. :issue:`7553` by :user:`Ekaterina + Krivich `. + +- :class:`sklearn.linear_model.LogisticRegressionCV` now correctly handles + string labels. :issue:`5874` by `Raghav RV`_. + +- Fixed a bug where :func:`sklearn.model_selection.train_test_split` raised + an error when ``stratify`` is a list of string labels. :issue:`7593` by + `Raghav RV`_. + +- Fixed a bug where :class:`sklearn.model_selection.GridSearchCV` and + :class:`sklearn.model_selection.RandomizedSearchCV` were not pickleable + because of a pickling bug in ``np.ma.MaskedArray``. :issue:`7594` by + `Raghav RV`_. + +- All cross-validation utilities in :mod:`sklearn.model_selection` now + permit one time cross-validation splitters for the ``cv`` parameter. Also + non-deterministic cross-validation splitters (where multiple calls to + ``split`` produce dissimilar splits) can be used as ``cv`` parameter. + The :class:`sklearn.model_selection.GridSearchCV` will cross-validate each + parameter setting on the split produced by the first ``split`` call + to the cross-validation splitter. :issue:`7660` by `Raghav RV`_. + +- Fix bug where :meth:`preprocessing.MultiLabelBinarizer.fit_transform` + returned an invalid CSR matrix. + :issue:`7750` by :user:`CJ Carey `. + +- Fixed a bug where :func:`metrics.pairwise.cosine_distances` could return a + small negative distance. :issue:`7732` by :user:`Artsion `. API changes summary ------------------- Trees and forests - - The ``min_weight_fraction_leaf`` parameter of tree-based classifiers and - regressors now assumes uniform sample weights by default if the - ``sample_weight`` argument is not passed to the ``fit`` function. - Previously, the parameter was silently ignored. :issue:`7301` by :user:`Nelson - Liu `. +- The ``min_weight_fraction_leaf`` parameter of tree-based classifiers and + regressors now assumes uniform sample weights by default if the + ``sample_weight`` argument is not passed to the ``fit`` function. + Previously, the parameter was silently ignored. :issue:`7301` by :user:`Nelson + Liu `. - - Tree splitting criterion classes' cloning/pickling is now memory safe. - :issue:`7680` by :user:`Ibraim Ganiev `. +- Tree splitting criterion classes' cloning/pickling is now memory safe. + :issue:`7680` by :user:`Ibraim Ganiev `. Linear, kernelized and related models - - Length of ``explained_variance_ratio`` of - :class:`discriminant_analysis.LinearDiscriminantAnalysis` - changed for both Eigen and SVD solvers. The attribute has now a length - of min(n_components, n_classes - 1). :issue:`7632` - by :user:`JPFrancoia ` +- Length of ``explained_variance_ratio`` of + :class:`discriminant_analysis.LinearDiscriminantAnalysis` + changed for both Eigen and SVD solvers. The attribute has now a length + of min(n_components, n_classes - 1). :issue:`7632` + by :user:`JPFrancoia ` - - Numerical issue with :class:`linear_model.RidgeCV` on centered data when - ``n_features > n_samples``. :issue:`6178` by `Bertrand Thirion`_ +- Numerical issue with :class:`linear_model.RidgeCV` on centered data when + ``n_features > n_samples``. :issue:`6178` by `Bertrand Thirion`_ .. _changes_0_18: @@ -1018,101 +1138,101 @@ Version 0.18 Model Selection Enhancements and API Changes -------------------------------------------- - - **The model_selection module** +- **The model_selection module** - The new module :mod:`sklearn.model_selection`, which groups together the - functionalities of formerly :mod:`sklearn.cross_validation`, - :mod:`sklearn.grid_search` and :mod:`sklearn.learning_curve`, introduces new - possibilities such as nested cross-validation and better manipulation of - parameter searches with Pandas. + The new module :mod:`sklearn.model_selection`, which groups together the + functionalities of formerly :mod:`sklearn.cross_validation`, + :mod:`sklearn.grid_search` and :mod:`sklearn.learning_curve`, introduces new + possibilities such as nested cross-validation and better manipulation of + parameter searches with Pandas. - Many things will stay the same but there are some key differences. Read - below to know more about the changes. + Many things will stay the same but there are some key differences. Read + below to know more about the changes. - - **Data-independent CV splitters enabling nested cross-validation** +- **Data-independent CV splitters enabling nested cross-validation** - The new cross-validation splitters, defined in the - :mod:`sklearn.model_selection`, are no longer initialized with any - data-dependent parameters such as ``y``. Instead they expose a - :func:`split` method that takes in the data and yields a generator for the - different splits. + The new cross-validation splitters, defined in the + :mod:`sklearn.model_selection`, are no longer initialized with any + data-dependent parameters such as ``y``. Instead they expose a + :func:`split` method that takes in the data and yields a generator for the + different splits. - This change makes it possible to use the cross-validation splitters to - perform nested cross-validation, facilitated by - :class:`model_selection.GridSearchCV` and - :class:`model_selection.RandomizedSearchCV` utilities. + This change makes it possible to use the cross-validation splitters to + perform nested cross-validation, facilitated by + :class:`model_selection.GridSearchCV` and + :class:`model_selection.RandomizedSearchCV` utilities. - - **The enhanced cv_results_ attribute** +- **The enhanced cv_results_ attribute** - The new ``cv_results_`` attribute (of :class:`model_selection.GridSearchCV` - and :class:`model_selection.RandomizedSearchCV`) introduced in lieu of the - ``grid_scores_`` attribute is a dict of 1D arrays with elements in each - array corresponding to the parameter settings (i.e. search candidates). + The new ``cv_results_`` attribute (of :class:`model_selection.GridSearchCV` + and :class:`model_selection.RandomizedSearchCV`) introduced in lieu of the + ``grid_scores_`` attribute is a dict of 1D arrays with elements in each + array corresponding to the parameter settings (i.e. search candidates). - The ``cv_results_`` dict can be easily imported into ``pandas`` as a - ``DataFrame`` for exploring the search results. + The ``cv_results_`` dict can be easily imported into ``pandas`` as a + ``DataFrame`` for exploring the search results. - The ``cv_results_`` arrays include scores for each cross-validation split - (with keys such as ``'split0_test_score'``), as well as their mean - (``'mean_test_score'``) and standard deviation (``'std_test_score'``). + The ``cv_results_`` arrays include scores for each cross-validation split + (with keys such as ``'split0_test_score'``), as well as their mean + (``'mean_test_score'``) and standard deviation (``'std_test_score'``). - The ranks for the search candidates (based on their mean - cross-validation score) is available at ``cv_results_['rank_test_score']``. + The ranks for the search candidates (based on their mean + cross-validation score) is available at ``cv_results_['rank_test_score']``. - The parameter values for each parameter is stored separately as numpy - masked object arrays. The value, for that search candidate, is masked if - the corresponding parameter is not applicable. Additionally a list of all - the parameter dicts are stored at ``cv_results_['params']``. + The parameter values for each parameter is stored separately as numpy + masked object arrays. The value, for that search candidate, is masked if + the corresponding parameter is not applicable. Additionally a list of all + the parameter dicts are stored at ``cv_results_['params']``. - - **Parameters n_folds and n_iter renamed to n_splits** +- **Parameters n_folds and n_iter renamed to n_splits** - Some parameter names have changed: - The ``n_folds`` parameter in new :class:`model_selection.KFold`, - :class:`model_selection.GroupKFold` (see below for the name change), - and :class:`model_selection.StratifiedKFold` is now renamed to - ``n_splits``. The ``n_iter`` parameter in - :class:`model_selection.ShuffleSplit`, the new class - :class:`model_selection.GroupShuffleSplit` and - :class:`model_selection.StratifiedShuffleSplit` is now renamed to - ``n_splits``. + Some parameter names have changed: + The ``n_folds`` parameter in new :class:`model_selection.KFold`, + :class:`model_selection.GroupKFold` (see below for the name change), + and :class:`model_selection.StratifiedKFold` is now renamed to + ``n_splits``. The ``n_iter`` parameter in + :class:`model_selection.ShuffleSplit`, the new class + :class:`model_selection.GroupShuffleSplit` and + :class:`model_selection.StratifiedShuffleSplit` is now renamed to + ``n_splits``. - - **Rename of splitter classes which accepts group labels along with data** +- **Rename of splitter classes which accepts group labels along with data** - The cross-validation splitters ``LabelKFold``, - ``LabelShuffleSplit``, ``LeaveOneLabelOut`` and ``LeavePLabelOut`` have - been renamed to :class:`model_selection.GroupKFold`, - :class:`model_selection.GroupShuffleSplit`, - :class:`model_selection.LeaveOneGroupOut` and - :class:`model_selection.LeavePGroupsOut` respectively. + The cross-validation splitters ``LabelKFold``, + ``LabelShuffleSplit``, ``LeaveOneLabelOut`` and ``LeavePLabelOut`` have + been renamed to :class:`model_selection.GroupKFold`, + :class:`model_selection.GroupShuffleSplit`, + :class:`model_selection.LeaveOneGroupOut` and + :class:`model_selection.LeavePGroupsOut` respectively. - Note the change from singular to plural form in - :class:`model_selection.LeavePGroupsOut`. + Note the change from singular to plural form in + :class:`model_selection.LeavePGroupsOut`. - - **Fit parameter labels renamed to groups** +- **Fit parameter labels renamed to groups** - The ``labels`` parameter in the :func:`split` method of the newly renamed - splitters :class:`model_selection.GroupKFold`, - :class:`model_selection.LeaveOneGroupOut`, - :class:`model_selection.LeavePGroupsOut`, - :class:`model_selection.GroupShuffleSplit` is renamed to ``groups`` - following the new nomenclature of their class names. + The ``labels`` parameter in the :func:`split` method of the newly renamed + splitters :class:`model_selection.GroupKFold`, + :class:`model_selection.LeaveOneGroupOut`, + :class:`model_selection.LeavePGroupsOut`, + :class:`model_selection.GroupShuffleSplit` is renamed to ``groups`` + following the new nomenclature of their class names. - - **Parameter n_labels renamed to n_groups** +- **Parameter n_labels renamed to n_groups** - The parameter ``n_labels`` in the newly renamed - :class:`model_selection.LeavePGroupsOut` is changed to ``n_groups``. + The parameter ``n_labels`` in the newly renamed + :class:`model_selection.LeavePGroupsOut` is changed to ``n_groups``. - - Training scores and Timing information +- Training scores and Timing information - ``cv_results_`` also includes the training scores for each - cross-validation split (with keys such as ``'split0_train_score'``), as - well as their mean (``'mean_train_score'``) and standard deviation - (``'std_train_score'``). To avoid the cost of evaluating training score, - set ``return_train_score=False``. + ``cv_results_`` also includes the training scores for each + cross-validation split (with keys such as ``'split0_train_score'``), as + well as their mean (``'mean_train_score'``) and standard deviation + (``'std_train_score'``). To avoid the cost of evaluating training score, + set ``return_train_score=False``. - Additionally the mean and standard deviation of the times taken to split, - train and score the model across all the cross-validation splits is - available at the key ``'mean_time'`` and ``'std_time'`` respectively. + Additionally the mean and standard deviation of the times taken to split, + train and score the model across all the cross-validation splits is + available at the key ``'mean_time'`` and ``'std_time'`` respectively. Changelog --------- @@ -1122,399 +1242,399 @@ New features Classifiers and Regressors - - The Gaussian Process module has been reimplemented and now offers classification - and regression estimators through :class:`gaussian_process.GaussianProcessClassifier` - and :class:`gaussian_process.GaussianProcessRegressor`. Among other things, the new - implementation supports kernel engineering, gradient-based hyperparameter optimization or - sampling of functions from GP prior and GP posterior. Extensive documentation and - examples are provided. By `Jan Hendrik Metzen`_. +- The Gaussian Process module has been reimplemented and now offers classification + and regression estimators through :class:`gaussian_process.GaussianProcessClassifier` + and :class:`gaussian_process.GaussianProcessRegressor`. Among other things, the new + implementation supports kernel engineering, gradient-based hyperparameter optimization or + sampling of functions from GP prior and GP posterior. Extensive documentation and + examples are provided. By `Jan Hendrik Metzen`_. - - Added new supervised learning algorithm: :ref:`Multi-layer Perceptron ` - :issue:`3204` by :user:`Issam H. Laradji ` +- Added new supervised learning algorithm: :ref:`Multi-layer Perceptron ` + :issue:`3204` by :user:`Issam H. Laradji ` - - Added :class:`linear_model.HuberRegressor`, a linear model robust to outliers. - :issue:`5291` by `Manoj Kumar`_. +- Added :class:`linear_model.HuberRegressor`, a linear model robust to outliers. + :issue:`5291` by `Manoj Kumar`_. - - Added the :class:`multioutput.MultiOutputRegressor` meta-estimator. It - converts single output regressors to multi-output regressors by fitting - one regressor per output. By :user:`Tim Head `. +- Added the :class:`multioutput.MultiOutputRegressor` meta-estimator. It + converts single output regressors to multi-output regressors by fitting + one regressor per output. By :user:`Tim Head `. Other estimators - - New :class:`mixture.GaussianMixture` and :class:`mixture.BayesianGaussianMixture` - replace former mixture models, employing faster inference - for sounder results. :issue:`7295` by :user:`Wei Xue ` and - :user:`Thierry Guillemot `. +- New :class:`mixture.GaussianMixture` and :class:`mixture.BayesianGaussianMixture` + replace former mixture models, employing faster inference + for sounder results. :issue:`7295` by :user:`Wei Xue ` and + :user:`Thierry Guillemot `. - - Class :class:`decomposition.RandomizedPCA` is now factored into :class:`decomposition.PCA` - and it is available calling with parameter ``svd_solver='randomized'``. - The default number of ``n_iter`` for ``'randomized'`` has changed to 4. The old - behavior of PCA is recovered by ``svd_solver='full'``. An additional solver - calls ``arpack`` and performs truncated (non-randomized) SVD. By default, - the best solver is selected depending on the size of the input and the - number of components requested. :issue:`5299` by :user:`Giorgio Patrini `. +- Class :class:`decomposition.RandomizedPCA` is now factored into :class:`decomposition.PCA` + and it is available calling with parameter ``svd_solver='randomized'``. + The default number of ``n_iter`` for ``'randomized'`` has changed to 4. The old + behavior of PCA is recovered by ``svd_solver='full'``. An additional solver + calls ``arpack`` and performs truncated (non-randomized) SVD. By default, + the best solver is selected depending on the size of the input and the + number of components requested. :issue:`5299` by :user:`Giorgio Patrini `. - - Added two functions for mutual information estimation: - :func:`feature_selection.mutual_info_classif` and - :func:`feature_selection.mutual_info_regression`. These functions can be - used in :class:`feature_selection.SelectKBest` and - :class:`feature_selection.SelectPercentile` as score functions. - By :user:`Andrea Bravi ` and :user:`Nikolay Mayorov `. +- Added two functions for mutual information estimation: + :func:`feature_selection.mutual_info_classif` and + :func:`feature_selection.mutual_info_regression`. These functions can be + used in :class:`feature_selection.SelectKBest` and + :class:`feature_selection.SelectPercentile` as score functions. + By :user:`Andrea Bravi ` and :user:`Nikolay Mayorov `. - - Added the :class:`ensemble.IsolationForest` class for anomaly detection based on - random forests. By `Nicolas Goix`_. +- Added the :class:`ensemble.IsolationForest` class for anomaly detection based on + random forests. By `Nicolas Goix`_. - - Added ``algorithm="elkan"`` to :class:`cluster.KMeans` implementing - Elkan's fast K-Means algorithm. By `Andreas Müller`_. +- Added ``algorithm="elkan"`` to :class:`cluster.KMeans` implementing + Elkan's fast K-Means algorithm. By `Andreas Müller`_. Model selection and evaluation - - Added :func:`metrics.cluster.fowlkes_mallows_score`, the Fowlkes Mallows - Index which measures the similarity of two clusterings of a set of points - By :user:`Arnaud Fouchet ` and :user:`Thierry Guillemot `. +- Added :func:`metrics.cluster.fowlkes_mallows_score`, the Fowlkes Mallows + Index which measures the similarity of two clusterings of a set of points + By :user:`Arnaud Fouchet ` and :user:`Thierry Guillemot `. - - Added :func:`metrics.calinski_harabaz_score`, which computes the Calinski - and Harabaz score to evaluate the resulting clustering of a set of points. - By :user:`Arnaud Fouchet ` and :user:`Thierry Guillemot `. +- Added :func:`metrics.calinski_harabaz_score`, which computes the Calinski + and Harabaz score to evaluate the resulting clustering of a set of points. + By :user:`Arnaud Fouchet ` and :user:`Thierry Guillemot `. - - Added new cross-validation splitter - :class:`model_selection.TimeSeriesSplit` to handle time series data. - :issue:`6586` by :user:`YenChen Lin ` +- Added new cross-validation splitter + :class:`model_selection.TimeSeriesSplit` to handle time series data. + :issue:`6586` by :user:`YenChen Lin ` - - The cross-validation iterators are replaced by cross-validation splitters - available from :mod:`sklearn.model_selection`, allowing for nested - cross-validation. See :ref:`model_selection_changes` for more information. - :issue:`4294` by `Raghav RV`_. +- The cross-validation iterators are replaced by cross-validation splitters + available from :mod:`sklearn.model_selection`, allowing for nested + cross-validation. See :ref:`model_selection_changes` for more information. + :issue:`4294` by `Raghav RV`_. Enhancements ............ Trees and ensembles - - Added a new splitting criterion for :class:`tree.DecisionTreeRegressor`, - the mean absolute error. This criterion can also be used in - :class:`ensemble.ExtraTreesRegressor`, - :class:`ensemble.RandomForestRegressor`, and the gradient boosting - estimators. :issue:`6667` by :user:`Nelson Liu `. +- Added a new splitting criterion for :class:`tree.DecisionTreeRegressor`, + the mean absolute error. This criterion can also be used in + :class:`ensemble.ExtraTreesRegressor`, + :class:`ensemble.RandomForestRegressor`, and the gradient boosting + estimators. :issue:`6667` by :user:`Nelson Liu `. - - Added weighted impurity-based early stopping criterion for decision tree - growth. :issue:`6954` by :user:`Nelson Liu ` +- Added weighted impurity-based early stopping criterion for decision tree + growth. :issue:`6954` by :user:`Nelson Liu ` - - The random forest, extra tree and decision tree estimators now has a - method ``decision_path`` which returns the decision path of samples in - the tree. By `Arnaud Joly`_. +- The random forest, extra tree and decision tree estimators now has a + method ``decision_path`` which returns the decision path of samples in + the tree. By `Arnaud Joly`_. - - A new example has been added unveiling the decision tree structure. - By `Arnaud Joly`_. +- A new example has been added unveiling the decision tree structure. + By `Arnaud Joly`_. - - Random forest, extra trees, decision trees and gradient boosting estimator - accept the parameter ``min_samples_split`` and ``min_samples_leaf`` - provided as a percentage of the training samples. By :user:`yelite ` and `Arnaud Joly`_. +- Random forest, extra trees, decision trees and gradient boosting estimator + accept the parameter ``min_samples_split`` and ``min_samples_leaf`` + provided as a percentage of the training samples. By :user:`yelite ` and `Arnaud Joly`_. - - Gradient boosting estimators accept the parameter ``criterion`` to specify - to splitting criterion used in built decision trees. - :issue:`6667` by :user:`Nelson Liu `. +- Gradient boosting estimators accept the parameter ``criterion`` to specify + to splitting criterion used in built decision trees. + :issue:`6667` by :user:`Nelson Liu `. - - The memory footprint is reduced (sometimes greatly) for - :class:`ensemble.bagging.BaseBagging` and classes that inherit from it, - i.e, :class:`ensemble.BaggingClassifier`, - :class:`ensemble.BaggingRegressor`, and :class:`ensemble.IsolationForest`, - by dynamically generating attribute ``estimators_samples_`` only when it is - needed. By :user:`David Staub `. +- The memory footprint is reduced (sometimes greatly) for + :class:`ensemble.bagging.BaseBagging` and classes that inherit from it, + i.e, :class:`ensemble.BaggingClassifier`, + :class:`ensemble.BaggingRegressor`, and :class:`ensemble.IsolationForest`, + by dynamically generating attribute ``estimators_samples_`` only when it is + needed. By :user:`David Staub `. - - Added ``n_jobs`` and ``sample_weight`` parameters for - :class:`ensemble.VotingClassifier` to fit underlying estimators in parallel. - :issue:`5805` by :user:`Ibraim Ganiev `. +- Added ``n_jobs`` and ``sample_weight`` parameters for + :class:`ensemble.VotingClassifier` to fit underlying estimators in parallel. + :issue:`5805` by :user:`Ibraim Ganiev `. Linear, kernelized and related models - - In :class:`linear_model.LogisticRegression`, the SAG solver is now - available in the multinomial case. :issue:`5251` by `Tom Dupre la Tour`_. +- In :class:`linear_model.LogisticRegression`, the SAG solver is now + available in the multinomial case. :issue:`5251` by `Tom Dupre la Tour`_. - - :class:`linear_model.RANSACRegressor`, :class:`svm.LinearSVC` and - :class:`svm.LinearSVR` now support ``sample_weight``. - By :user:`Imaculate `. +- :class:`linear_model.RANSACRegressor`, :class:`svm.LinearSVC` and + :class:`svm.LinearSVR` now support ``sample_weight``. + By :user:`Imaculate `. - - Add parameter ``loss`` to :class:`linear_model.RANSACRegressor` to measure the - error on the samples for every trial. By `Manoj Kumar`_. +- Add parameter ``loss`` to :class:`linear_model.RANSACRegressor` to measure the + error on the samples for every trial. By `Manoj Kumar`_. - - Prediction of out-of-sample events with Isotonic Regression - (:class:`isotonic.IsotonicRegression`) is now much faster (over 1000x in tests with synthetic - data). By :user:`Jonathan Arfa `. +- Prediction of out-of-sample events with Isotonic Regression + (:class:`isotonic.IsotonicRegression`) is now much faster (over 1000x in tests with synthetic + data). By :user:`Jonathan Arfa `. - - Isotonic regression (:class:`isotonic.IsotonicRegression`) now uses a better algorithm to avoid - `O(n^2)` behavior in pathological cases, and is also generally faster - (:issue:`#6691`). By `Antony Lee`_. +- Isotonic regression (:class:`isotonic.IsotonicRegression`) now uses a better algorithm to avoid + `O(n^2)` behavior in pathological cases, and is also generally faster + (:issue:`#6691`). By `Antony Lee`_. - - :class:`naive_bayes.GaussianNB` now accepts data-independent class-priors - through the parameter ``priors``. By :user:`Guillaume Lemaitre `. +- :class:`naive_bayes.GaussianNB` now accepts data-independent class-priors + through the parameter ``priors``. By :user:`Guillaume Lemaitre `. - - :class:`linear_model.ElasticNet` and :class:`linear_model.Lasso` - now works with ``np.float32`` input data without converting it - into ``np.float64``. This allows to reduce the memory - consumption. :issue:`6913` by :user:`YenChen Lin `. +- :class:`linear_model.ElasticNet` and :class:`linear_model.Lasso` + now works with ``np.float32`` input data without converting it + into ``np.float64``. This allows to reduce the memory + consumption. :issue:`6913` by :user:`YenChen Lin `. - - :class:`semi_supervised.LabelPropagation` and :class:`semi_supervised.LabelSpreading` - now accept arbitrary kernel functions in addition to strings ``knn`` and ``rbf``. - :issue:`5762` by :user:`Utkarsh Upadhyay `. +- :class:`semi_supervised.LabelPropagation` and :class:`semi_supervised.LabelSpreading` + now accept arbitrary kernel functions in addition to strings ``knn`` and ``rbf``. + :issue:`5762` by :user:`Utkarsh Upadhyay `. Decomposition, manifold learning and clustering - - Added ``inverse_transform`` function to :class:`decomposition.NMF` to compute - data matrix of original shape. By :user:`Anish Shah `. +- Added ``inverse_transform`` function to :class:`decomposition.NMF` to compute + data matrix of original shape. By :user:`Anish Shah `. - - :class:`cluster.KMeans` and :class:`cluster.MiniBatchKMeans` now works - with ``np.float32`` and ``np.float64`` input data without converting it. - This allows to reduce the memory consumption by using ``np.float32``. - :issue:`6846` by :user:`Sebastian Säger ` and - :user:`YenChen Lin `. +- :class:`cluster.KMeans` and :class:`cluster.MiniBatchKMeans` now works + with ``np.float32`` and ``np.float64`` input data without converting it. + This allows to reduce the memory consumption by using ``np.float32``. + :issue:`6846` by :user:`Sebastian Säger ` and + :user:`YenChen Lin `. Preprocessing and feature selection - - :class:`preprocessing.RobustScaler` now accepts ``quantile_range`` parameter. - :issue:`5929` by :user:`Konstantin Podshumok `. +- :class:`preprocessing.RobustScaler` now accepts ``quantile_range`` parameter. + :issue:`5929` by :user:`Konstantin Podshumok `. - - :class:`feature_extraction.FeatureHasher` now accepts string values. - :issue:`6173` by :user:`Ryad Zenine ` and - :user:`Devashish Deshpande `. +- :class:`feature_extraction.FeatureHasher` now accepts string values. + :issue:`6173` by :user:`Ryad Zenine ` and + :user:`Devashish Deshpande `. - - Keyword arguments can now be supplied to ``func`` in - :class:`preprocessing.FunctionTransformer` by means of the ``kw_args`` - parameter. By `Brian McFee`_. +- Keyword arguments can now be supplied to ``func`` in + :class:`preprocessing.FunctionTransformer` by means of the ``kw_args`` + parameter. By `Brian McFee`_. - - :class:`feature_selection.SelectKBest` and :class:`feature_selection.SelectPercentile` - now accept score functions that take X, y as input and return only the scores. - By :user:`Nikolay Mayorov `. +- :class:`feature_selection.SelectKBest` and :class:`feature_selection.SelectPercentile` + now accept score functions that take X, y as input and return only the scores. + By :user:`Nikolay Mayorov `. Model evaluation and meta-estimators - - :class:`multiclass.OneVsOneClassifier` and :class:`multiclass.OneVsRestClassifier` - now support ``partial_fit``. By :user:`Asish Panda ` and - :user:`Philipp Dowling `. +- :class:`multiclass.OneVsOneClassifier` and :class:`multiclass.OneVsRestClassifier` + now support ``partial_fit``. By :user:`Asish Panda ` and + :user:`Philipp Dowling `. - - Added support for substituting or disabling :class:`pipeline.Pipeline` - and :class:`pipeline.FeatureUnion` components using the ``set_params`` - interface that powers :mod:`sklearn.grid_search`. - See :ref:`sphx_glr_auto_examples_plot_compare_reduction.py` - By `Joel Nothman`_ and :user:`Robert McGibbon `. +- Added support for substituting or disabling :class:`pipeline.Pipeline` + and :class:`pipeline.FeatureUnion` components using the ``set_params`` + interface that powers :mod:`sklearn.grid_search`. + See :ref:`sphx_glr_auto_examples_plot_compare_reduction.py` + By `Joel Nothman`_ and :user:`Robert McGibbon `. - - The new ``cv_results_`` attribute of :class:`model_selection.GridSearchCV` - (and :class:`model_selection.RandomizedSearchCV`) can be easily imported - into pandas as a ``DataFrame``. Ref :ref:`model_selection_changes` for - more information. :issue:`6697` by `Raghav RV`_. +- The new ``cv_results_`` attribute of :class:`model_selection.GridSearchCV` + (and :class:`model_selection.RandomizedSearchCV`) can be easily imported + into pandas as a ``DataFrame``. Ref :ref:`model_selection_changes` for + more information. :issue:`6697` by `Raghav RV`_. - - Generalization of :func:`model_selection.cross_val_predict`. - One can pass method names such as `predict_proba` to be used in the cross - validation framework instead of the default `predict`. - By :user:`Ori Ziv ` and :user:`Sears Merritt `. +- Generalization of :func:`model_selection.cross_val_predict`. + One can pass method names such as `predict_proba` to be used in the cross + validation framework instead of the default `predict`. + By :user:`Ori Ziv ` and :user:`Sears Merritt `. - - The training scores and time taken for training followed by scoring for - each search candidate are now available at the ``cv_results_`` dict. - See :ref:`model_selection_changes` for more information. - :issue:`7325` by :user:`Eugene Chen ` and `Raghav RV`_. +- The training scores and time taken for training followed by scoring for + each search candidate are now available at the ``cv_results_`` dict. + See :ref:`model_selection_changes` for more information. + :issue:`7325` by :user:`Eugene Chen ` and `Raghav RV`_. Metrics - - Added ``labels`` flag to :class:`metrics.log_loss` to explicitly provide - the labels when the number of classes in ``y_true`` and ``y_pred`` differ. - :issue:`7239` by :user:`Hong Guangguo ` with help from - :user:`Mads Jensen ` and :user:`Nelson Liu `. +- Added ``labels`` flag to :class:`metrics.log_loss` to explicitly provide + the labels when the number of classes in ``y_true`` and ``y_pred`` differ. + :issue:`7239` by :user:`Hong Guangguo ` with help from + :user:`Mads Jensen ` and :user:`Nelson Liu `. - - Support sparse contingency matrices in cluster evaluation - (:mod:`metrics.cluster.supervised`) to scale to a large number of - clusters. - :issue:`7419` by :user:`Gregory Stupp ` and `Joel Nothman`_. +- Support sparse contingency matrices in cluster evaluation + (:mod:`metrics.cluster.supervised`) to scale to a large number of + clusters. + :issue:`7419` by :user:`Gregory Stupp ` and `Joel Nothman`_. - - Add ``sample_weight`` parameter to :func:`metrics.matthews_corrcoef`. - By :user:`Jatin Shah ` and `Raghav RV`_. +- Add ``sample_weight`` parameter to :func:`metrics.matthews_corrcoef`. + By :user:`Jatin Shah ` and `Raghav RV`_. - - Speed up :func:`metrics.silhouette_score` by using vectorized operations. - By `Manoj Kumar`_. +- Speed up :func:`metrics.silhouette_score` by using vectorized operations. + By `Manoj Kumar`_. - - Add ``sample_weight`` parameter to :func:`metrics.confusion_matrix`. - By :user:`Bernardo Stein `. +- Add ``sample_weight`` parameter to :func:`metrics.confusion_matrix`. + By :user:`Bernardo Stein `. Miscellaneous - - Added ``n_jobs`` parameter to :class:`feature_selection.RFECV` to compute - the score on the test folds in parallel. By `Manoj Kumar`_ - - - Codebase does not contain C/C++ cython generated files: they are - generated during build. Distribution packages will still contain generated - C/C++ files. By :user:`Arthur Mensch `. - - - Reduce the memory usage for 32-bit float input arrays of - :func:`utils.sparse_func.mean_variance_axis` and - :func:`utils.sparse_func.incr_mean_variance_axis` by supporting cython - fused types. By :user:`YenChen Lin `. - - - The :func:`ignore_warnings` now accept a category argument to ignore only - the warnings of a specified type. By :user:`Thierry Guillemot `. - - - Added parameter ``return_X_y`` and return type ``(data, target) : tuple`` option to - :func:`load_iris` dataset - :issue:`7049`, - :func:`load_breast_cancer` dataset - :issue:`7152`, - :func:`load_digits` dataset, - :func:`load_diabetes` dataset, - :func:`load_linnerud` dataset, - :func:`load_boston` dataset - :issue:`7154` by - :user:`Manvendra Singh`. - - - Simplification of the ``clone`` function, deprecate support for estimators - that modify parameters in ``__init__``. :issue:`5540` by `Andreas Müller`_. - - - When unpickling a scikit-learn estimator in a different version than the one - the estimator was trained with, a ``UserWarning`` is raised, see :ref:`the documentation - on model persistence ` for more details. (:issue:`7248`) - By `Andreas Müller`_. +- Added ``n_jobs`` parameter to :class:`feature_selection.RFECV` to compute + the score on the test folds in parallel. By `Manoj Kumar`_ + +- Codebase does not contain C/C++ cython generated files: they are + generated during build. Distribution packages will still contain generated + C/C++ files. By :user:`Arthur Mensch `. + +- Reduce the memory usage for 32-bit float input arrays of + :func:`utils.sparse_func.mean_variance_axis` and + :func:`utils.sparse_func.incr_mean_variance_axis` by supporting cython + fused types. By :user:`YenChen Lin `. + +- The :func:`ignore_warnings` now accept a category argument to ignore only + the warnings of a specified type. By :user:`Thierry Guillemot `. + +- Added parameter ``return_X_y`` and return type ``(data, target) : tuple`` option to + :func:`load_iris` dataset + :issue:`7049`, + :func:`load_breast_cancer` dataset + :issue:`7152`, + :func:`load_digits` dataset, + :func:`load_diabetes` dataset, + :func:`load_linnerud` dataset, + :func:`load_boston` dataset + :issue:`7154` by + :user:`Manvendra Singh`. + +- Simplification of the ``clone`` function, deprecate support for estimators + that modify parameters in ``__init__``. :issue:`5540` by `Andreas Müller`_. + +- When unpickling a scikit-learn estimator in a different version than the one + the estimator was trained with, a ``UserWarning`` is raised, see :ref:`the documentation + on model persistence ` for more details. (:issue:`7248`) + By `Andreas Müller`_. Bug fixes ......... Trees and ensembles - - Random forest, extra trees, decision trees and gradient boosting - won't accept anymore ``min_samples_split=1`` as at least 2 samples - are required to split a decision tree node. By `Arnaud Joly`_ +- Random forest, extra trees, decision trees and gradient boosting + won't accept anymore ``min_samples_split=1`` as at least 2 samples + are required to split a decision tree node. By `Arnaud Joly`_ - - :class:`ensemble.VotingClassifier` now raises ``NotFittedError`` if ``predict``, - ``transform`` or ``predict_proba`` are called on the non-fitted estimator. - by `Sebastian Raschka`_. +- :class:`ensemble.VotingClassifier` now raises ``NotFittedError`` if ``predict``, + ``transform`` or ``predict_proba`` are called on the non-fitted estimator. + by `Sebastian Raschka`_. - - Fix bug where :class:`ensemble.AdaBoostClassifier` and - :class:`ensemble.AdaBoostRegressor` would perform poorly if the - ``random_state`` was fixed - (:issue:`7411`). By `Joel Nothman`_. +- Fix bug where :class:`ensemble.AdaBoostClassifier` and + :class:`ensemble.AdaBoostRegressor` would perform poorly if the + ``random_state`` was fixed + (:issue:`7411`). By `Joel Nothman`_. - - Fix bug in ensembles with randomization where the ensemble would not - set ``random_state`` on base estimators in a pipeline or similar nesting. - (:issue:`7411`). Note, results for :class:`ensemble.BaggingClassifier` - :class:`ensemble.BaggingRegressor`, :class:`ensemble.AdaBoostClassifier` - and :class:`ensemble.AdaBoostRegressor` will now differ from previous - versions. By `Joel Nothman`_. +- Fix bug in ensembles with randomization where the ensemble would not + set ``random_state`` on base estimators in a pipeline or similar nesting. + (:issue:`7411`). Note, results for :class:`ensemble.BaggingClassifier` + :class:`ensemble.BaggingRegressor`, :class:`ensemble.AdaBoostClassifier` + and :class:`ensemble.AdaBoostRegressor` will now differ from previous + versions. By `Joel Nothman`_. Linear, kernelized and related models - - Fixed incorrect gradient computation for ``loss='squared_epsilon_insensitive'`` in - :class:`linear_model.SGDClassifier` and :class:`linear_model.SGDRegressor` - (:issue:`6764`). By :user:`Wenhua Yang `. +- Fixed incorrect gradient computation for ``loss='squared_epsilon_insensitive'`` in + :class:`linear_model.SGDClassifier` and :class:`linear_model.SGDRegressor` + (:issue:`6764`). By :user:`Wenhua Yang `. - - Fix bug in :class:`linear_model.LogisticRegressionCV` where - ``solver='liblinear'`` did not accept ``class_weights='balanced``. - (:issue:`6817`). By `Tom Dupre la Tour`_. +- Fix bug in :class:`linear_model.LogisticRegressionCV` where + ``solver='liblinear'`` did not accept ``class_weights='balanced``. + (:issue:`6817`). By `Tom Dupre la Tour`_. - - Fix bug in :class:`neighbors.RadiusNeighborsClassifier` where an error - occurred when there were outliers being labelled and a weight function - specified (:issue:`6902`). By - `LeonieBorne `_. +- Fix bug in :class:`neighbors.RadiusNeighborsClassifier` where an error + occurred when there were outliers being labelled and a weight function + specified (:issue:`6902`). By + `LeonieBorne `_. - - Fix :class:`linear_model.ElasticNet` sparse decision function to match - output with dense in the multioutput case. +- Fix :class:`linear_model.ElasticNet` sparse decision function to match + output with dense in the multioutput case. Decomposition, manifold learning and clustering - - :class:`decomposition.RandomizedPCA` default number of `iterated_power` is 4 instead of 3. - :issue:`5141` by :user:`Giorgio Patrini `. +- :class:`decomposition.RandomizedPCA` default number of `iterated_power` is 4 instead of 3. + :issue:`5141` by :user:`Giorgio Patrini `. - - :func:`utils.extmath.randomized_svd` performs 4 power iterations by default, instead or 0. - In practice this is enough for obtaining a good approximation of the - true eigenvalues/vectors in the presence of noise. When `n_components` is - small (``< .1 * min(X.shape)``) `n_iter` is set to 7, unless the user specifies - a higher number. This improves precision with few components. - :issue:`5299` by :user:`Giorgio Patrini`. +- :func:`utils.extmath.randomized_svd` performs 4 power iterations by default, instead or 0. + In practice this is enough for obtaining a good approximation of the + true eigenvalues/vectors in the presence of noise. When `n_components` is + small (``< .1 * min(X.shape)``) `n_iter` is set to 7, unless the user specifies + a higher number. This improves precision with few components. + :issue:`5299` by :user:`Giorgio Patrini`. - - Whiten/non-whiten inconsistency between components of :class:`decomposition.PCA` - and :class:`decomposition.RandomizedPCA` (now factored into PCA, see the - New features) is fixed. `components_` are stored with no whitening. - :issue:`5299` by :user:`Giorgio Patrini `. +- Whiten/non-whiten inconsistency between components of :class:`decomposition.PCA` + and :class:`decomposition.RandomizedPCA` (now factored into PCA, see the + New features) is fixed. `components_` are stored with no whitening. + :issue:`5299` by :user:`Giorgio Patrini `. - - Fixed bug in :func:`manifold.spectral_embedding` where diagonal of unnormalized - Laplacian matrix was incorrectly set to 1. :issue:`4995` by :user:`Peter Fischer `. +- Fixed bug in :func:`manifold.spectral_embedding` where diagonal of unnormalized + Laplacian matrix was incorrectly set to 1. :issue:`4995` by :user:`Peter Fischer `. - - Fixed incorrect initialization of :func:`utils.arpack.eigsh` on all - occurrences. Affects :class:`cluster.bicluster.SpectralBiclustering`, - :class:`decomposition.KernelPCA`, :class:`manifold.LocallyLinearEmbedding`, - and :class:`manifold.SpectralEmbedding` (:issue:`5012`). By - :user:`Peter Fischer `. +- Fixed incorrect initialization of :func:`utils.arpack.eigsh` on all + occurrences. Affects :class:`cluster.bicluster.SpectralBiclustering`, + :class:`decomposition.KernelPCA`, :class:`manifold.LocallyLinearEmbedding`, + and :class:`manifold.SpectralEmbedding` (:issue:`5012`). By + :user:`Peter Fischer `. - - Attribute ``explained_variance_ratio_`` calculated with the SVD solver - of :class:`discriminant_analysis.LinearDiscriminantAnalysis` now returns - correct results. By :user:`JPFrancoia ` +- Attribute ``explained_variance_ratio_`` calculated with the SVD solver + of :class:`discriminant_analysis.LinearDiscriminantAnalysis` now returns + correct results. By :user:`JPFrancoia ` Preprocessing and feature selection - - :func:`preprocessing.data._transform_selected` now always passes a copy - of ``X`` to transform function when ``copy=True`` (:issue:`7194`). By `Caio - Oliveira `_. +- :func:`preprocessing.data._transform_selected` now always passes a copy + of ``X`` to transform function when ``copy=True`` (:issue:`7194`). By `Caio + Oliveira `_. Model evaluation and meta-estimators - - :class:`model_selection.StratifiedKFold` now raises error if all n_labels - for individual classes is less than n_folds. - :issue:`6182` by :user:`Devashish Deshpande `. +- :class:`model_selection.StratifiedKFold` now raises error if all n_labels + for individual classes is less than n_folds. + :issue:`6182` by :user:`Devashish Deshpande `. - - Fixed bug in :class:`model_selection.StratifiedShuffleSplit` - where train and test sample could overlap in some edge cases, - see :issue:`6121` for - more details. By `Loic Esteve`_. +- Fixed bug in :class:`model_selection.StratifiedShuffleSplit` + where train and test sample could overlap in some edge cases, + see :issue:`6121` for + more details. By `Loic Esteve`_. - - Fix in :class:`sklearn.model_selection.StratifiedShuffleSplit` to - return splits of size ``train_size`` and ``test_size`` in all cases - (:issue:`6472`). By `Andreas Müller`_. +- Fix in :class:`sklearn.model_selection.StratifiedShuffleSplit` to + return splits of size ``train_size`` and ``test_size`` in all cases + (:issue:`6472`). By `Andreas Müller`_. - - Cross-validation of :class:`OneVsOneClassifier` and - :class:`OneVsRestClassifier` now works with precomputed kernels. - :issue:`7350` by :user:`Russell Smith `. +- Cross-validation of :class:`OneVsOneClassifier` and + :class:`OneVsRestClassifier` now works with precomputed kernels. + :issue:`7350` by :user:`Russell Smith `. - - Fix incomplete ``predict_proba`` method delegation from - :class:`model_selection.GridSearchCV` to - :class:`linear_model.SGDClassifier` (:issue:`7159`) - by `Yichuan Liu `_. +- Fix incomplete ``predict_proba`` method delegation from + :class:`model_selection.GridSearchCV` to + :class:`linear_model.SGDClassifier` (:issue:`7159`) + by `Yichuan Liu `_. Metrics - - Fix bug in :func:`metrics.silhouette_score` in which clusters of - size 1 were incorrectly scored. They should get a score of 0. - By `Joel Nothman`_. +- Fix bug in :func:`metrics.silhouette_score` in which clusters of + size 1 were incorrectly scored. They should get a score of 0. + By `Joel Nothman`_. - - Fix bug in :func:`metrics.silhouette_samples` so that it now works with - arbitrary labels, not just those ranging from 0 to n_clusters - 1. +- Fix bug in :func:`metrics.silhouette_samples` so that it now works with + arbitrary labels, not just those ranging from 0 to n_clusters - 1. - - Fix bug where expected and adjusted mutual information were incorrect if - cluster contingency cells exceeded ``2**16``. By `Joel Nothman`_. +- Fix bug where expected and adjusted mutual information were incorrect if + cluster contingency cells exceeded ``2**16``. By `Joel Nothman`_. - - :func:`metrics.pairwise.pairwise_distances` now converts arrays to - boolean arrays when required in ``scipy.spatial.distance``. - :issue:`5460` by `Tom Dupre la Tour`_. +- :func:`metrics.pairwise.pairwise_distances` now converts arrays to + boolean arrays when required in ``scipy.spatial.distance``. + :issue:`5460` by `Tom Dupre la Tour`_. - - Fix sparse input support in :func:`metrics.silhouette_score` as well as - example examples/text/document_clustering.py. By :user:`YenChen Lin `. +- Fix sparse input support in :func:`metrics.silhouette_score` as well as + example examples/text/document_clustering.py. By :user:`YenChen Lin `. - - :func:`metrics.roc_curve` and :func:`metrics.precision_recall_curve` no - longer round ``y_score`` values when creating ROC curves; this was causing - problems for users with very small differences in scores (:issue:`7353`). +- :func:`metrics.roc_curve` and :func:`metrics.precision_recall_curve` no + longer round ``y_score`` values when creating ROC curves; this was causing + problems for users with very small differences in scores (:issue:`7353`). Miscellaneous - - :func:`model_selection.tests._search._check_param_grid` now works correctly with all types - that extends/implements `Sequence` (except string), including range (Python 3.x) and xrange - (Python 2.x). :issue:`7323` by Viacheslav Kovalevskyi. +- :func:`model_selection.tests._search._check_param_grid` now works correctly with all types + that extends/implements `Sequence` (except string), including range (Python 3.x) and xrange + (Python 2.x). :issue:`7323` by Viacheslav Kovalevskyi. - - :func:`utils.extmath.randomized_range_finder` is more numerically stable when many - power iterations are requested, since it applies LU normalization by default. - If ``n_iter<2`` numerical issues are unlikely, thus no normalization is applied. - Other normalization options are available: ``'none', 'LU'`` and ``'QR'``. - :issue:`5141` by :user:`Giorgio Patrini `. +- :func:`utils.extmath.randomized_range_finder` is more numerically stable when many + power iterations are requested, since it applies LU normalization by default. + If ``n_iter<2`` numerical issues are unlikely, thus no normalization is applied. + Other normalization options are available: ``'none', 'LU'`` and ``'QR'``. + :issue:`5141` by :user:`Giorgio Patrini `. - - Fix a bug where some formats of ``scipy.sparse`` matrix, and estimators - with them as parameters, could not be passed to :func:`base.clone`. - By `Loic Esteve`_. +- Fix a bug where some formats of ``scipy.sparse`` matrix, and estimators + with them as parameters, could not be passed to :func:`base.clone`. + By `Loic Esteve`_. - - :func:`datasets.load_svmlight_file` now is able to read long int QID values. - :issue:`7101` by :user:`Ibraim Ganiev `. +- :func:`datasets.load_svmlight_file` now is able to read long int QID values. + :issue:`7101` by :user:`Ibraim Ganiev `. API changes summary @@ -1522,74 +1642,74 @@ API changes summary Linear, kernelized and related models - - ``residual_metric`` has been deprecated in :class:`linear_model.RANSACRegressor`. - Use ``loss`` instead. By `Manoj Kumar`_. +- ``residual_metric`` has been deprecated in :class:`linear_model.RANSACRegressor`. + Use ``loss`` instead. By `Manoj Kumar`_. - - Access to public attributes ``.X_`` and ``.y_`` has been deprecated in - :class:`isotonic.IsotonicRegression`. By :user:`Jonathan Arfa `. +- Access to public attributes ``.X_`` and ``.y_`` has been deprecated in + :class:`isotonic.IsotonicRegression`. By :user:`Jonathan Arfa `. Decomposition, manifold learning and clustering - - The old :class:`mixture.DPGMM` is deprecated in favor of the new - :class:`mixture.BayesianGaussianMixture` (with the parameter - ``weight_concentration_prior_type='dirichlet_process'``). - The new class solves the computational - problems of the old class and computes the Gaussian mixture with a - Dirichlet process prior faster than before. - :issue:`7295` by :user:`Wei Xue ` and :user:`Thierry Guillemot `. - - - The old :class:`mixture.VBGMM` is deprecated in favor of the new - :class:`mixture.BayesianGaussianMixture` (with the parameter - ``weight_concentration_prior_type='dirichlet_distribution'``). - The new class solves the computational - problems of the old class and computes the Variational Bayesian Gaussian - mixture faster than before. - :issue:`6651` by :user:`Wei Xue ` and :user:`Thierry Guillemot `. - - - The old :class:`mixture.GMM` is deprecated in favor of the new - :class:`mixture.GaussianMixture`. The new class computes the Gaussian mixture - faster than before and some of computational problems have been solved. - :issue:`6666` by :user:`Wei Xue ` and :user:`Thierry Guillemot `. +- The old :class:`mixture.DPGMM` is deprecated in favor of the new + :class:`mixture.BayesianGaussianMixture` (with the parameter + ``weight_concentration_prior_type='dirichlet_process'``). + The new class solves the computational + problems of the old class and computes the Gaussian mixture with a + Dirichlet process prior faster than before. + :issue:`7295` by :user:`Wei Xue ` and :user:`Thierry Guillemot `. + +- The old :class:`mixture.VBGMM` is deprecated in favor of the new + :class:`mixture.BayesianGaussianMixture` (with the parameter + ``weight_concentration_prior_type='dirichlet_distribution'``). + The new class solves the computational + problems of the old class and computes the Variational Bayesian Gaussian + mixture faster than before. + :issue:`6651` by :user:`Wei Xue ` and :user:`Thierry Guillemot `. + +- The old :class:`mixture.GMM` is deprecated in favor of the new + :class:`mixture.GaussianMixture`. The new class computes the Gaussian mixture + faster than before and some of computational problems have been solved. + :issue:`6666` by :user:`Wei Xue ` and :user:`Thierry Guillemot `. Model evaluation and meta-estimators - - The :mod:`sklearn.cross_validation`, :mod:`sklearn.grid_search` and - :mod:`sklearn.learning_curve` have been deprecated and the classes and - functions have been reorganized into the :mod:`sklearn.model_selection` - module. Ref :ref:`model_selection_changes` for more information. - :issue:`4294` by `Raghav RV`_. - - - The ``grid_scores_`` attribute of :class:`model_selection.GridSearchCV` - and :class:`model_selection.RandomizedSearchCV` is deprecated in favor of - the attribute ``cv_results_``. - Ref :ref:`model_selection_changes` for more information. - :issue:`6697` by `Raghav RV`_. - - - The parameters ``n_iter`` or ``n_folds`` in old CV splitters are replaced - by the new parameter ``n_splits`` since it can provide a consistent - and unambiguous interface to represent the number of train-test splits. - :issue:`7187` by :user:`YenChen Lin `. - - - ``classes`` parameter was renamed to ``labels`` in - :func:`metrics.hamming_loss`. :issue:`7260` by :user:`Sebastián Vanrell `. - - - The splitter classes ``LabelKFold``, ``LabelShuffleSplit``, - ``LeaveOneLabelOut`` and ``LeavePLabelsOut`` are renamed to - :class:`model_selection.GroupKFold`, - :class:`model_selection.GroupShuffleSplit`, - :class:`model_selection.LeaveOneGroupOut` - and :class:`model_selection.LeavePGroupsOut` respectively. - Also the parameter ``labels`` in the :func:`split` method of the newly - renamed splitters :class:`model_selection.LeaveOneGroupOut` and - :class:`model_selection.LeavePGroupsOut` is renamed to - ``groups``. Additionally in :class:`model_selection.LeavePGroupsOut`, - the parameter ``n_labels`` is renamed to ``n_groups``. - :issue:`6660` by `Raghav RV`_. - - - Error and loss names for ``scoring`` parameters are now prefixed by - ``'neg_'``, such as ``neg_mean_squared_error``. The unprefixed versions - are deprecated and will be removed in version 0.20. - :issue:`7261` by :user:`Tim Head `. +- The :mod:`sklearn.cross_validation`, :mod:`sklearn.grid_search` and + :mod:`sklearn.learning_curve` have been deprecated and the classes and + functions have been reorganized into the :mod:`sklearn.model_selection` + module. Ref :ref:`model_selection_changes` for more information. + :issue:`4294` by `Raghav RV`_. + +- The ``grid_scores_`` attribute of :class:`model_selection.GridSearchCV` + and :class:`model_selection.RandomizedSearchCV` is deprecated in favor of + the attribute ``cv_results_``. + Ref :ref:`model_selection_changes` for more information. + :issue:`6697` by `Raghav RV`_. + +- The parameters ``n_iter`` or ``n_folds`` in old CV splitters are replaced + by the new parameter ``n_splits`` since it can provide a consistent + and unambiguous interface to represent the number of train-test splits. + :issue:`7187` by :user:`YenChen Lin `. + +- ``classes`` parameter was renamed to ``labels`` in + :func:`metrics.hamming_loss`. :issue:`7260` by :user:`Sebastián Vanrell `. + +- The splitter classes ``LabelKFold``, ``LabelShuffleSplit``, + ``LeaveOneLabelOut`` and ``LeavePLabelsOut`` are renamed to + :class:`model_selection.GroupKFold`, + :class:`model_selection.GroupShuffleSplit`, + :class:`model_selection.LeaveOneGroupOut` + and :class:`model_selection.LeavePGroupsOut` respectively. + Also the parameter ``labels`` in the :func:`split` method of the newly + renamed splitters :class:`model_selection.LeaveOneGroupOut` and + :class:`model_selection.LeavePGroupsOut` is renamed to + ``groups``. Additionally in :class:`model_selection.LeavePGroupsOut`, + the parameter ``n_labels`` is renamed to ``n_groups``. + :issue:`6660` by `Raghav RV`_. + +- Error and loss names for ``scoring`` parameters are now prefixed by + ``'neg_'``, such as ``neg_mean_squared_error``. The unprefixed versions + are deprecated and will be removed in version 0.20. + :issue:`7261` by :user:`Tim Head `. Code Contributors ----------------- @@ -1662,29 +1782,29 @@ Bug fixes ......... - - Upgrade vendored joblib to version 0.9.4 that fixes an important bug in - ``joblib.Parallel`` that can silently yield to wrong results when working - on datasets larger than 1MB: - https://github.com/joblib/joblib/blob/0.9.4/CHANGES.rst +- Upgrade vendored joblib to version 0.9.4 that fixes an important bug in + ``joblib.Parallel`` that can silently yield to wrong results when working + on datasets larger than 1MB: + https://github.com/joblib/joblib/blob/0.9.4/CHANGES.rst - - Fixed reading of Bunch pickles generated with scikit-learn - version <= 0.16. This can affect users who have already - downloaded a dataset with scikit-learn 0.16 and are loading it - with scikit-learn 0.17. See :issue:`6196` for - how this affected :func:`datasets.fetch_20newsgroups`. By `Loic - Esteve`_. +- Fixed reading of Bunch pickles generated with scikit-learn + version <= 0.16. This can affect users who have already + downloaded a dataset with scikit-learn 0.16 and are loading it + with scikit-learn 0.17. See :issue:`6196` for + how this affected :func:`datasets.fetch_20newsgroups`. By `Loic + Esteve`_. - - Fixed a bug that prevented using ROC AUC score to perform grid search on - several CPU / cores on large arrays. See :issue:`6147` - By `Olivier Grisel`_. +- Fixed a bug that prevented using ROC AUC score to perform grid search on + several CPU / cores on large arrays. See :issue:`6147` + By `Olivier Grisel`_. - - Fixed a bug that prevented to properly set the ``presort`` parameter - in :class:`ensemble.GradientBoostingRegressor`. See :issue:`5857` - By Andrew McCulloh. +- Fixed a bug that prevented to properly set the ``presort`` parameter + in :class:`ensemble.GradientBoostingRegressor`. See :issue:`5857` + By Andrew McCulloh. - - Fixed a joblib error when evaluating the perplexity of a - :class:`decomposition.LatentDirichletAllocation` model. See :issue:`6258` - By Chyi-Kwei Yau. +- Fixed a joblib error when evaluating the perplexity of a + :class:`decomposition.LatentDirichletAllocation` model. See :issue:`6258` + By Chyi-Kwei Yau. .. _changes_0_17: @@ -1700,425 +1820,425 @@ Changelog New features ............ - - All the Scaler classes but :class:`preprocessing.RobustScaler` can be fitted online by - calling `partial_fit`. By :user:`Giorgio Patrini `. - - - The new class :class:`ensemble.VotingClassifier` implements a - "majority rule" / "soft voting" ensemble classifier to combine - estimators for classification. By `Sebastian Raschka`_. - - - The new class :class:`preprocessing.RobustScaler` provides an - alternative to :class:`preprocessing.StandardScaler` for feature-wise - centering and range normalization that is robust to outliers. - By :user:`Thomas Unterthiner `. - - - The new class :class:`preprocessing.MaxAbsScaler` provides an - alternative to :class:`preprocessing.MinMaxScaler` for feature-wise - range normalization when the data is already centered or sparse. - By :user:`Thomas Unterthiner `. - - - The new class :class:`preprocessing.FunctionTransformer` turns a Python - function into a ``Pipeline``-compatible transformer object. - By Joe Jevnik. - - - The new classes :class:`cross_validation.LabelKFold` and - :class:`cross_validation.LabelShuffleSplit` generate train-test folds, - respectively similar to :class:`cross_validation.KFold` and - :class:`cross_validation.ShuffleSplit`, except that the folds are - conditioned on a label array. By `Brian McFee`_, :user:`Jean - Kossaifi ` and `Gilles Louppe`_. - - - :class:`decomposition.LatentDirichletAllocation` implements the Latent - Dirichlet Allocation topic model with online variational - inference. By :user:`Chyi-Kwei Yau `, with code based on an implementation - by Matt Hoffman. (:issue:`3659`) - - - The new solver ``sag`` implements a Stochastic Average Gradient descent - and is available in both :class:`linear_model.LogisticRegression` and - :class:`linear_model.Ridge`. This solver is very efficient for large - datasets. By :user:`Danny Sullivan ` and `Tom Dupre la Tour`_. - (:issue:`4738`) - - - The new solver ``cd`` implements a Coordinate Descent in - :class:`decomposition.NMF`. Previous solver based on Projected Gradient is - still available setting new parameter ``solver`` to ``pg``, but is - deprecated and will be removed in 0.19, along with - :class:`decomposition.ProjectedGradientNMF` and parameters ``sparseness``, - ``eta``, ``beta`` and ``nls_max_iter``. New parameters ``alpha`` and - ``l1_ratio`` control L1 and L2 regularization, and ``shuffle`` adds a - shuffling step in the ``cd`` solver. - By `Tom Dupre la Tour`_ and `Mathieu Blondel`_. +- All the Scaler classes but :class:`preprocessing.RobustScaler` can be fitted online by + calling `partial_fit`. By :user:`Giorgio Patrini `. + +- The new class :class:`ensemble.VotingClassifier` implements a + "majority rule" / "soft voting" ensemble classifier to combine + estimators for classification. By `Sebastian Raschka`_. + +- The new class :class:`preprocessing.RobustScaler` provides an + alternative to :class:`preprocessing.StandardScaler` for feature-wise + centering and range normalization that is robust to outliers. + By :user:`Thomas Unterthiner `. + +- The new class :class:`preprocessing.MaxAbsScaler` provides an + alternative to :class:`preprocessing.MinMaxScaler` for feature-wise + range normalization when the data is already centered or sparse. + By :user:`Thomas Unterthiner `. + +- The new class :class:`preprocessing.FunctionTransformer` turns a Python + function into a ``Pipeline``-compatible transformer object. + By Joe Jevnik. + +- The new classes :class:`cross_validation.LabelKFold` and + :class:`cross_validation.LabelShuffleSplit` generate train-test folds, + respectively similar to :class:`cross_validation.KFold` and + :class:`cross_validation.ShuffleSplit`, except that the folds are + conditioned on a label array. By `Brian McFee`_, :user:`Jean + Kossaifi ` and `Gilles Louppe`_. + +- :class:`decomposition.LatentDirichletAllocation` implements the Latent + Dirichlet Allocation topic model with online variational + inference. By :user:`Chyi-Kwei Yau `, with code based on an implementation + by Matt Hoffman. (:issue:`3659`) + +- The new solver ``sag`` implements a Stochastic Average Gradient descent + and is available in both :class:`linear_model.LogisticRegression` and + :class:`linear_model.Ridge`. This solver is very efficient for large + datasets. By :user:`Danny Sullivan ` and `Tom Dupre la Tour`_. + (:issue:`4738`) + +- The new solver ``cd`` implements a Coordinate Descent in + :class:`decomposition.NMF`. Previous solver based on Projected Gradient is + still available setting new parameter ``solver`` to ``pg``, but is + deprecated and will be removed in 0.19, along with + :class:`decomposition.ProjectedGradientNMF` and parameters ``sparseness``, + ``eta``, ``beta`` and ``nls_max_iter``. New parameters ``alpha`` and + ``l1_ratio`` control L1 and L2 regularization, and ``shuffle`` adds a + shuffling step in the ``cd`` solver. + By `Tom Dupre la Tour`_ and `Mathieu Blondel`_. Enhancements ............ - - :class:`manifold.TSNE` now supports approximate optimization via the - Barnes-Hut method, leading to much faster fitting. By Christopher Erick Moody. - (:issue:`4025`) +- :class:`manifold.TSNE` now supports approximate optimization via the + Barnes-Hut method, leading to much faster fitting. By Christopher Erick Moody. + (:issue:`4025`) - - :class:`cluster.mean_shift_.MeanShift` now supports parallel execution, - as implemented in the ``mean_shift`` function. By :user:`Martino - Sorbaro `. +- :class:`cluster.mean_shift_.MeanShift` now supports parallel execution, + as implemented in the ``mean_shift`` function. By :user:`Martino + Sorbaro `. - - :class:`naive_bayes.GaussianNB` now supports fitting with ``sample_weight``. - By `Jan Hendrik Metzen`_. +- :class:`naive_bayes.GaussianNB` now supports fitting with ``sample_weight``. + By `Jan Hendrik Metzen`_. - - :class:`dummy.DummyClassifier` now supports a prior fitting strategy. - By `Arnaud Joly`_. +- :class:`dummy.DummyClassifier` now supports a prior fitting strategy. + By `Arnaud Joly`_. - - Added a ``fit_predict`` method for :class:`mixture.GMM` and subclasses. - By :user:`Cory Lorenz `. +- Added a ``fit_predict`` method for :class:`mixture.GMM` and subclasses. + By :user:`Cory Lorenz `. - - Added the :func:`metrics.label_ranking_loss` metric. - By `Arnaud Joly`_. +- Added the :func:`metrics.label_ranking_loss` metric. + By `Arnaud Joly`_. - - Added the :func:`metrics.cohen_kappa_score` metric. +- Added the :func:`metrics.cohen_kappa_score` metric. - - Added a ``warm_start`` constructor parameter to the bagging ensemble - models to increase the size of the ensemble. By :user:`Tim Head `. +- Added a ``warm_start`` constructor parameter to the bagging ensemble + models to increase the size of the ensemble. By :user:`Tim Head `. - - Added option to use multi-output regression metrics without averaging. - By Konstantin Shmelkov and :user:`Michael Eickenberg`. +- Added option to use multi-output regression metrics without averaging. + By Konstantin Shmelkov and :user:`Michael Eickenberg`. - - Added ``stratify`` option to :func:`cross_validation.train_test_split` - for stratified splitting. By Miroslav Batchkarov. +- Added ``stratify`` option to :func:`cross_validation.train_test_split` + for stratified splitting. By Miroslav Batchkarov. - - The :func:`tree.export_graphviz` function now supports aesthetic - improvements for :class:`tree.DecisionTreeClassifier` and - :class:`tree.DecisionTreeRegressor`, including options for coloring nodes - by their majority class or impurity, showing variable names, and using - node proportions instead of raw sample counts. By `Trevor Stephens`_. +- The :func:`tree.export_graphviz` function now supports aesthetic + improvements for :class:`tree.DecisionTreeClassifier` and + :class:`tree.DecisionTreeRegressor`, including options for coloring nodes + by their majority class or impurity, showing variable names, and using + node proportions instead of raw sample counts. By `Trevor Stephens`_. - - Improved speed of ``newton-cg`` solver in - :class:`linear_model.LogisticRegression`, by avoiding loss computation. - By `Mathieu Blondel`_ and `Tom Dupre la Tour`_. +- Improved speed of ``newton-cg`` solver in + :class:`linear_model.LogisticRegression`, by avoiding loss computation. + By `Mathieu Blondel`_ and `Tom Dupre la Tour`_. - - The ``class_weight="auto"`` heuristic in classifiers supporting - ``class_weight`` was deprecated and replaced by the ``class_weight="balanced"`` - option, which has a simpler formula and interpretation. - By `Hanna Wallach`_ and `Andreas Müller`_. +- The ``class_weight="auto"`` heuristic in classifiers supporting + ``class_weight`` was deprecated and replaced by the ``class_weight="balanced"`` + option, which has a simpler formula and interpretation. + By `Hanna Wallach`_ and `Andreas Müller`_. - - Add ``class_weight`` parameter to automatically weight samples by class - frequency for :class:`linear_model.PassiveAgressiveClassifier`. By - `Trevor Stephens`_. +- Add ``class_weight`` parameter to automatically weight samples by class + frequency for :class:`linear_model.PassiveAgressiveClassifier`. By + `Trevor Stephens`_. - - Added backlinks from the API reference pages to the user guide. By - `Andreas Müller`_. +- Added backlinks from the API reference pages to the user guide. By + `Andreas Müller`_. - - The ``labels`` parameter to :func:`sklearn.metrics.f1_score`, - :func:`sklearn.metrics.fbeta_score`, - :func:`sklearn.metrics.recall_score` and - :func:`sklearn.metrics.precision_score` has been extended. - It is now possible to ignore one or more labels, such as where - a multiclass problem has a majority class to ignore. By `Joel Nothman`_. +- The ``labels`` parameter to :func:`sklearn.metrics.f1_score`, + :func:`sklearn.metrics.fbeta_score`, + :func:`sklearn.metrics.recall_score` and + :func:`sklearn.metrics.precision_score` has been extended. + It is now possible to ignore one or more labels, such as where + a multiclass problem has a majority class to ignore. By `Joel Nothman`_. - - Add ``sample_weight`` support to :class:`linear_model.RidgeClassifier`. - By `Trevor Stephens`_. +- Add ``sample_weight`` support to :class:`linear_model.RidgeClassifier`. + By `Trevor Stephens`_. - - Provide an option for sparse output from - :func:`sklearn.metrics.pairwise.cosine_similarity`. By - :user:`Jaidev Deshpande `. +- Provide an option for sparse output from + :func:`sklearn.metrics.pairwise.cosine_similarity`. By + :user:`Jaidev Deshpande `. - - Add :func:`minmax_scale` to provide a function interface for - :class:`MinMaxScaler`. By :user:`Thomas Unterthiner `. +- Add :func:`minmax_scale` to provide a function interface for + :class:`MinMaxScaler`. By :user:`Thomas Unterthiner `. - - ``dump_svmlight_file`` now handles multi-label datasets. - By Chih-Wei Chang. +- ``dump_svmlight_file`` now handles multi-label datasets. + By Chih-Wei Chang. - - RCV1 dataset loader (:func:`sklearn.datasets.fetch_rcv1`). - By `Tom Dupre la Tour`_. +- RCV1 dataset loader (:func:`sklearn.datasets.fetch_rcv1`). + By `Tom Dupre la Tour`_. - - The "Wisconsin Breast Cancer" classical two-class classification dataset - is now included in scikit-learn, available with - :func:`sklearn.dataset.load_breast_cancer`. +- The "Wisconsin Breast Cancer" classical two-class classification dataset + is now included in scikit-learn, available with + :func:`sklearn.dataset.load_breast_cancer`. - - Upgraded to joblib 0.9.3 to benefit from the new automatic batching of - short tasks. This makes it possible for scikit-learn to benefit from - parallelism when many very short tasks are executed in parallel, for - instance by the :class:`grid_search.GridSearchCV` meta-estimator - with ``n_jobs > 1`` used with a large grid of parameters on a small - dataset. By `Vlad Niculae`_, `Olivier Grisel`_ and `Loic Esteve`_. +- Upgraded to joblib 0.9.3 to benefit from the new automatic batching of + short tasks. This makes it possible for scikit-learn to benefit from + parallelism when many very short tasks are executed in parallel, for + instance by the :class:`grid_search.GridSearchCV` meta-estimator + with ``n_jobs > 1`` used with a large grid of parameters on a small + dataset. By `Vlad Niculae`_, `Olivier Grisel`_ and `Loic Esteve`_. - - For more details about changes in joblib 0.9.3 see the release notes: - https://github.com/joblib/joblib/blob/master/CHANGES.rst#release-093 +- For more details about changes in joblib 0.9.3 see the release notes: + https://github.com/joblib/joblib/blob/master/CHANGES.rst#release-093 - - Improved speed (3 times per iteration) of - :class:`decomposition.DictLearning` with coordinate descent method - from :class:`linear_model.Lasso`. By :user:`Arthur Mensch `. +- Improved speed (3 times per iteration) of + :class:`decomposition.DictLearning` with coordinate descent method + from :class:`linear_model.Lasso`. By :user:`Arthur Mensch `. - - Parallel processing (threaded) for queries of nearest neighbors - (using the ball-tree) by Nikolay Mayorov. +- Parallel processing (threaded) for queries of nearest neighbors + (using the ball-tree) by Nikolay Mayorov. - - Allow :func:`datasets.make_multilabel_classification` to output - a sparse ``y``. By Kashif Rasul. +- Allow :func:`datasets.make_multilabel_classification` to output + a sparse ``y``. By Kashif Rasul. - - :class:`cluster.DBSCAN` now accepts a sparse matrix of precomputed - distances, allowing memory-efficient distance precomputation. By - `Joel Nothman`_. +- :class:`cluster.DBSCAN` now accepts a sparse matrix of precomputed + distances, allowing memory-efficient distance precomputation. By + `Joel Nothman`_. - - :class:`tree.DecisionTreeClassifier` now exposes an ``apply`` method - for retrieving the leaf indices samples are predicted as. By - :user:`Daniel Galvez ` and `Gilles Louppe`_. +- :class:`tree.DecisionTreeClassifier` now exposes an ``apply`` method + for retrieving the leaf indices samples are predicted as. By + :user:`Daniel Galvez ` and `Gilles Louppe`_. - - Speed up decision tree regressors, random forest regressors, extra trees - regressors and gradient boosting estimators by computing a proxy - of the impurity improvement during the tree growth. The proxy quantity is - such that the split that maximizes this value also maximizes the impurity - improvement. By `Arnaud Joly`_, :user:`Jacob Schreiber ` - and `Gilles Louppe`_. +- Speed up decision tree regressors, random forest regressors, extra trees + regressors and gradient boosting estimators by computing a proxy + of the impurity improvement during the tree growth. The proxy quantity is + such that the split that maximizes this value also maximizes the impurity + improvement. By `Arnaud Joly`_, :user:`Jacob Schreiber ` + and `Gilles Louppe`_. - - Speed up tree based methods by reducing the number of computations needed - when computing the impurity measure taking into account linear - relationship of the computed statistics. The effect is particularly - visible with extra trees and on datasets with categorical or sparse - features. By `Arnaud Joly`_. +- Speed up tree based methods by reducing the number of computations needed + when computing the impurity measure taking into account linear + relationship of the computed statistics. The effect is particularly + visible with extra trees and on datasets with categorical or sparse + features. By `Arnaud Joly`_. - - :class:`ensemble.GradientBoostingRegressor` and - :class:`ensemble.GradientBoostingClassifier` now expose an ``apply`` - method for retrieving the leaf indices each sample ends up in under - each try. By :user:`Jacob Schreiber `. +- :class:`ensemble.GradientBoostingRegressor` and + :class:`ensemble.GradientBoostingClassifier` now expose an ``apply`` + method for retrieving the leaf indices each sample ends up in under + each try. By :user:`Jacob Schreiber `. - - Add ``sample_weight`` support to :class:`linear_model.LinearRegression`. - By Sonny Hu. (:issue:`#4881`) +- Add ``sample_weight`` support to :class:`linear_model.LinearRegression`. + By Sonny Hu. (:issue:`#4881`) - - Add ``n_iter_without_progress`` to :class:`manifold.TSNE` to control - the stopping criterion. By Santi Villalba. (:issue:`5186`) +- Add ``n_iter_without_progress`` to :class:`manifold.TSNE` to control + the stopping criterion. By Santi Villalba. (:issue:`5186`) - - Added optional parameter ``random_state`` in :class:`linear_model.Ridge` - , to set the seed of the pseudo random generator used in ``sag`` solver. By `Tom Dupre la Tour`_. +- Added optional parameter ``random_state`` in :class:`linear_model.Ridge` + , to set the seed of the pseudo random generator used in ``sag`` solver. By `Tom Dupre la Tour`_. - - Added optional parameter ``warm_start`` in - :class:`linear_model.LogisticRegression`. If set to True, the solvers - ``lbfgs``, ``newton-cg`` and ``sag`` will be initialized with the - coefficients computed in the previous fit. By `Tom Dupre la Tour`_. +- Added optional parameter ``warm_start`` in + :class:`linear_model.LogisticRegression`. If set to True, the solvers + ``lbfgs``, ``newton-cg`` and ``sag`` will be initialized with the + coefficients computed in the previous fit. By `Tom Dupre la Tour`_. - - Added ``sample_weight`` support to :class:`linear_model.LogisticRegression` for - the ``lbfgs``, ``newton-cg``, and ``sag`` solvers. By `Valentin Stolbunov`_. - Support added to the ``liblinear`` solver. By `Manoj Kumar`_. +- Added ``sample_weight`` support to :class:`linear_model.LogisticRegression` for + the ``lbfgs``, ``newton-cg``, and ``sag`` solvers. By `Valentin Stolbunov`_. + Support added to the ``liblinear`` solver. By `Manoj Kumar`_. - - Added optional parameter ``presort`` to :class:`ensemble.GradientBoostingRegressor` - and :class:`ensemble.GradientBoostingClassifier`, keeping default behavior - the same. This allows gradient boosters to turn off presorting when building - deep trees or using sparse data. By :user:`Jacob Schreiber `. +- Added optional parameter ``presort`` to :class:`ensemble.GradientBoostingRegressor` + and :class:`ensemble.GradientBoostingClassifier`, keeping default behavior + the same. This allows gradient boosters to turn off presorting when building + deep trees or using sparse data. By :user:`Jacob Schreiber `. - - Altered :func:`metrics.roc_curve` to drop unnecessary thresholds by - default. By :user:`Graham Clenaghan `. +- Altered :func:`metrics.roc_curve` to drop unnecessary thresholds by + default. By :user:`Graham Clenaghan `. - - Added :class:`feature_selection.SelectFromModel` meta-transformer which can - be used along with estimators that have `coef_` or `feature_importances_` - attribute to select important features of the input data. By - :user:`Maheshakya Wijewardena `, `Joel Nothman`_ and `Manoj Kumar`_. +- Added :class:`feature_selection.SelectFromModel` meta-transformer which can + be used along with estimators that have `coef_` or `feature_importances_` + attribute to select important features of the input data. By + :user:`Maheshakya Wijewardena `, `Joel Nothman`_ and `Manoj Kumar`_. - - Added :func:`metrics.pairwise.laplacian_kernel`. By `Clyde Fare `_. +- Added :func:`metrics.pairwise.laplacian_kernel`. By `Clyde Fare `_. - - :class:`covariance.GraphLasso` allows separate control of the convergence criterion - for the Elastic-Net subproblem via the ``enet_tol`` parameter. +- :class:`covariance.GraphLasso` allows separate control of the convergence criterion + for the Elastic-Net subproblem via the ``enet_tol`` parameter. - - Improved verbosity in :class:`decomposition.DictionaryLearning`. +- Improved verbosity in :class:`decomposition.DictionaryLearning`. - - :class:`ensemble.RandomForestClassifier` and - :class:`ensemble.RandomForestRegressor` no longer explicitly store the - samples used in bagging, resulting in a much reduced memory footprint for - storing random forest models. +- :class:`ensemble.RandomForestClassifier` and + :class:`ensemble.RandomForestRegressor` no longer explicitly store the + samples used in bagging, resulting in a much reduced memory footprint for + storing random forest models. - - Added ``positive`` option to :class:`linear_model.Lars` and - :func:`linear_model.lars_path` to force coefficients to be positive. - (:issue:`5131`) +- Added ``positive`` option to :class:`linear_model.Lars` and + :func:`linear_model.lars_path` to force coefficients to be positive. + (:issue:`5131`) - - Added the ``X_norm_squared`` parameter to :func:`metrics.pairwise.euclidean_distances` - to provide precomputed squared norms for ``X``. +- Added the ``X_norm_squared`` parameter to :func:`metrics.pairwise.euclidean_distances` + to provide precomputed squared norms for ``X``. - - Added the ``fit_predict`` method to :class:`pipeline.Pipeline`. +- Added the ``fit_predict`` method to :class:`pipeline.Pipeline`. - - Added the :func:`preprocessing.min_max_scale` function. +- Added the :func:`preprocessing.min_max_scale` function. Bug fixes ......... - - Fixed non-determinism in :class:`dummy.DummyClassifier` with sparse - multi-label output. By `Andreas Müller`_. +- Fixed non-determinism in :class:`dummy.DummyClassifier` with sparse + multi-label output. By `Andreas Müller`_. - - Fixed the output shape of :class:`linear_model.RANSACRegressor` to - ``(n_samples, )``. By `Andreas Müller`_. +- Fixed the output shape of :class:`linear_model.RANSACRegressor` to + ``(n_samples, )``. By `Andreas Müller`_. - - Fixed bug in :class:`decomposition.DictLearning` when ``n_jobs < 0``. By - `Andreas Müller`_. +- Fixed bug in :class:`decomposition.DictLearning` when ``n_jobs < 0``. By + `Andreas Müller`_. - - Fixed bug where :class:`grid_search.RandomizedSearchCV` could consume a - lot of memory for large discrete grids. By `Joel Nothman`_. +- Fixed bug where :class:`grid_search.RandomizedSearchCV` could consume a + lot of memory for large discrete grids. By `Joel Nothman`_. - - Fixed bug in :class:`linear_model.LogisticRegressionCV` where `penalty` was ignored - in the final fit. By `Manoj Kumar`_. +- Fixed bug in :class:`linear_model.LogisticRegressionCV` where `penalty` was ignored + in the final fit. By `Manoj Kumar`_. - - Fixed bug in :class:`ensemble.forest.ForestClassifier` while computing - oob_score and X is a sparse.csc_matrix. By :user:`Ankur Ankan `. +- Fixed bug in :class:`ensemble.forest.ForestClassifier` while computing + oob_score and X is a sparse.csc_matrix. By :user:`Ankur Ankan `. - - All regressors now consistently handle and warn when given ``y`` that is of - shape ``(n_samples, 1)``. By `Andreas Müller`_ and Henry Lin. - (:issue:`5431`) +- All regressors now consistently handle and warn when given ``y`` that is of + shape ``(n_samples, 1)``. By `Andreas Müller`_ and Henry Lin. + (:issue:`5431`) - - Fix in :class:`cluster.KMeans` cluster reassignment for sparse input by - `Lars Buitinck`_. +- Fix in :class:`cluster.KMeans` cluster reassignment for sparse input by + `Lars Buitinck`_. - - Fixed a bug in :class:`lda.LDA` that could cause asymmetric covariance - matrices when using shrinkage. By `Martin Billinger`_. +- Fixed a bug in :class:`lda.LDA` that could cause asymmetric covariance + matrices when using shrinkage. By `Martin Billinger`_. - - Fixed :func:`cross_validation.cross_val_predict` for estimators with - sparse predictions. By Buddha Prakash. +- Fixed :func:`cross_validation.cross_val_predict` for estimators with + sparse predictions. By Buddha Prakash. - - Fixed the ``predict_proba`` method of :class:`linear_model.LogisticRegression` - to use soft-max instead of one-vs-rest normalization. By `Manoj Kumar`_. - (:issue:`5182`) +- Fixed the ``predict_proba`` method of :class:`linear_model.LogisticRegression` + to use soft-max instead of one-vs-rest normalization. By `Manoj Kumar`_. + (:issue:`5182`) - - Fixed the :func:`partial_fit` method of :class:`linear_model.SGDClassifier` - when called with ``average=True``. By :user:`Andrew Lamb `. - (:issue:`5282`) +- Fixed the :func:`partial_fit` method of :class:`linear_model.SGDClassifier` + when called with ``average=True``. By :user:`Andrew Lamb `. + (:issue:`5282`) - - Dataset fetchers use different filenames under Python 2 and Python 3 to - avoid pickling compatibility issues. By `Olivier Grisel`_. - (:issue:`5355`) +- Dataset fetchers use different filenames under Python 2 and Python 3 to + avoid pickling compatibility issues. By `Olivier Grisel`_. + (:issue:`5355`) - - Fixed a bug in :class:`naive_bayes.GaussianNB` which caused classification - results to depend on scale. By `Jake Vanderplas`_. +- Fixed a bug in :class:`naive_bayes.GaussianNB` which caused classification + results to depend on scale. By `Jake Vanderplas`_. - - Fixed temporarily :class:`linear_model.Ridge`, which was incorrect - when fitting the intercept in the case of sparse data. The fix - automatically changes the solver to 'sag' in this case. - :issue:`5360` by `Tom Dupre la Tour`_. +- Fixed temporarily :class:`linear_model.Ridge`, which was incorrect + when fitting the intercept in the case of sparse data. The fix + automatically changes the solver to 'sag' in this case. + :issue:`5360` by `Tom Dupre la Tour`_. - - Fixed a performance bug in :class:`decomposition.RandomizedPCA` on data - with a large number of features and fewer samples. (:issue:`4478`) - By `Andreas Müller`_, `Loic Esteve`_ and :user:`Giorgio Patrini `. +- Fixed a performance bug in :class:`decomposition.RandomizedPCA` on data + with a large number of features and fewer samples. (:issue:`4478`) + By `Andreas Müller`_, `Loic Esteve`_ and :user:`Giorgio Patrini `. - - Fixed bug in :class:`cross_decomposition.PLS` that yielded unstable and - platform dependent output, and failed on `fit_transform`. - By :user:`Arthur Mensch `. +- Fixed bug in :class:`cross_decomposition.PLS` that yielded unstable and + platform dependent output, and failed on `fit_transform`. + By :user:`Arthur Mensch `. - - Fixes to the ``Bunch`` class used to store datasets. +- Fixes to the ``Bunch`` class used to store datasets. - - Fixed :func:`ensemble.plot_partial_dependence` ignoring the - ``percentiles`` parameter. +- Fixed :func:`ensemble.plot_partial_dependence` ignoring the + ``percentiles`` parameter. - - Providing a ``set`` as vocabulary in ``CountVectorizer`` no longer - leads to inconsistent results when pickling. +- Providing a ``set`` as vocabulary in ``CountVectorizer`` no longer + leads to inconsistent results when pickling. - - Fixed the conditions on when a precomputed Gram matrix needs to - be recomputed in :class:`linear_model.LinearRegression`, - :class:`linear_model.OrthogonalMatchingPursuit`, - :class:`linear_model.Lasso` and :class:`linear_model.ElasticNet`. +- Fixed the conditions on when a precomputed Gram matrix needs to + be recomputed in :class:`linear_model.LinearRegression`, + :class:`linear_model.OrthogonalMatchingPursuit`, + :class:`linear_model.Lasso` and :class:`linear_model.ElasticNet`. - - Fixed inconsistent memory layout in the coordinate descent solver - that affected :class:`linear_model.DictionaryLearning` and - :class:`covariance.GraphLasso`. (:issue:`5337`) - By `Olivier Grisel`_. +- Fixed inconsistent memory layout in the coordinate descent solver + that affected :class:`linear_model.DictionaryLearning` and + :class:`covariance.GraphLasso`. (:issue:`5337`) + By `Olivier Grisel`_. - - :class:`manifold.LocallyLinearEmbedding` no longer ignores the ``reg`` - parameter. +- :class:`manifold.LocallyLinearEmbedding` no longer ignores the ``reg`` + parameter. - - Nearest Neighbor estimators with custom distance metrics can now be pickled. - (:issue:`4362`) +- Nearest Neighbor estimators with custom distance metrics can now be pickled. + (:issue:`4362`) - - Fixed a bug in :class:`pipeline.FeatureUnion` where ``transformer_weights`` - were not properly handled when performing grid-searches. +- Fixed a bug in :class:`pipeline.FeatureUnion` where ``transformer_weights`` + were not properly handled when performing grid-searches. - - Fixed a bug in :class:`linear_model.LogisticRegression` and - :class:`linear_model.LogisticRegressionCV` when using - ``class_weight='balanced'```or ``class_weight='auto'``. - By `Tom Dupre la Tour`_. +- Fixed a bug in :class:`linear_model.LogisticRegression` and + :class:`linear_model.LogisticRegressionCV` when using + ``class_weight='balanced'```or ``class_weight='auto'``. + By `Tom Dupre la Tour`_. - - Fixed bug :issue:`5495` when - doing OVR(SVC(decision_function_shape="ovr")). Fixed by - :user:`Elvis Dohmatob `. +- Fixed bug :issue:`5495` when + doing OVR(SVC(decision_function_shape="ovr")). Fixed by + :user:`Elvis Dohmatob `. API changes summary ------------------- - - Attribute `data_min`, `data_max` and `data_range` in - :class:`preprocessing.MinMaxScaler` are deprecated and won't be available - from 0.19. Instead, the class now exposes `data_min_`, `data_max_` - and `data_range_`. By :user:`Giorgio Patrini `. +- Attribute `data_min`, `data_max` and `data_range` in + :class:`preprocessing.MinMaxScaler` are deprecated and won't be available + from 0.19. Instead, the class now exposes `data_min_`, `data_max_` + and `data_range_`. By :user:`Giorgio Patrini `. - - All Scaler classes now have an `scale_` attribute, the feature-wise - rescaling applied by their `transform` methods. The old attribute `std_` - in :class:`preprocessing.StandardScaler` is deprecated and superseded - by `scale_`; it won't be available in 0.19. By :user:`Giorgio Patrini `. +- All Scaler classes now have an `scale_` attribute, the feature-wise + rescaling applied by their `transform` methods. The old attribute `std_` + in :class:`preprocessing.StandardScaler` is deprecated and superseded + by `scale_`; it won't be available in 0.19. By :user:`Giorgio Patrini `. - - :class:`svm.SVC`` and :class:`svm.NuSVC` now have an ``decision_function_shape`` - parameter to make their decision function of shape ``(n_samples, n_classes)`` - by setting ``decision_function_shape='ovr'``. This will be the default behavior - starting in 0.19. By `Andreas Müller`_. +- :class:`svm.SVC`` and :class:`svm.NuSVC` now have an ``decision_function_shape`` + parameter to make their decision function of shape ``(n_samples, n_classes)`` + by setting ``decision_function_shape='ovr'``. This will be the default behavior + starting in 0.19. By `Andreas Müller`_. - - Passing 1D data arrays as input to estimators is now deprecated as it - caused confusion in how the array elements should be interpreted - as features or as samples. All data arrays are now expected - to be explicitly shaped ``(n_samples, n_features)``. - By :user:`Vighnesh Birodkar `. +- Passing 1D data arrays as input to estimators is now deprecated as it + caused confusion in how the array elements should be interpreted + as features or as samples. All data arrays are now expected + to be explicitly shaped ``(n_samples, n_features)``. + By :user:`Vighnesh Birodkar `. - - :class:`lda.LDA` and :class:`qda.QDA` have been moved to - :class:`discriminant_analysis.LinearDiscriminantAnalysis` and - :class:`discriminant_analysis.QuadraticDiscriminantAnalysis`. +- :class:`lda.LDA` and :class:`qda.QDA` have been moved to + :class:`discriminant_analysis.LinearDiscriminantAnalysis` and + :class:`discriminant_analysis.QuadraticDiscriminantAnalysis`. - - The ``store_covariance`` and ``tol`` parameters have been moved from - the fit method to the constructor in - :class:`discriminant_analysis.LinearDiscriminantAnalysis` and the - ``store_covariances`` and ``tol`` parameters have been moved from the - fit method to the constructor in - :class:`discriminant_analysis.QuadraticDiscriminantAnalysis`. +- The ``store_covariance`` and ``tol`` parameters have been moved from + the fit method to the constructor in + :class:`discriminant_analysis.LinearDiscriminantAnalysis` and the + ``store_covariances`` and ``tol`` parameters have been moved from the + fit method to the constructor in + :class:`discriminant_analysis.QuadraticDiscriminantAnalysis`. - - Models inheriting from ``_LearntSelectorMixin`` will no longer support the - transform methods. (i.e, RandomForests, GradientBoosting, LogisticRegression, - DecisionTrees, SVMs and SGD related models). Wrap these models around the - metatransfomer :class:`feature_selection.SelectFromModel` to remove - features (according to `coefs_` or `feature_importances_`) - which are below a certain threshold value instead. +- Models inheriting from ``_LearntSelectorMixin`` will no longer support the + transform methods. (i.e, RandomForests, GradientBoosting, LogisticRegression, + DecisionTrees, SVMs and SGD related models). Wrap these models around the + metatransfomer :class:`feature_selection.SelectFromModel` to remove + features (according to `coefs_` or `feature_importances_`) + which are below a certain threshold value instead. - - :class:`cluster.KMeans` re-runs cluster-assignments in case of non-convergence, - to ensure consistency of ``predict(X)`` and ``labels_``. By - :user:`Vighnesh Birodkar `. +- :class:`cluster.KMeans` re-runs cluster-assignments in case of non-convergence, + to ensure consistency of ``predict(X)`` and ``labels_``. By + :user:`Vighnesh Birodkar `. - - Classifier and Regressor models are now tagged as such using the - ``_estimator_type`` attribute. +- Classifier and Regressor models are now tagged as such using the + ``_estimator_type`` attribute. - - Cross-validation iterators always provide indices into training and test set, - not boolean masks. +- Cross-validation iterators always provide indices into training and test set, + not boolean masks. - - The ``decision_function`` on all regressors was deprecated and will be - removed in 0.19. Use ``predict`` instead. +- The ``decision_function`` on all regressors was deprecated and will be + removed in 0.19. Use ``predict`` instead. - - :func:`datasets.load_lfw_pairs` is deprecated and will be removed in 0.19. - Use :func:`datasets.fetch_lfw_pairs` instead. +- :func:`datasets.load_lfw_pairs` is deprecated and will be removed in 0.19. + Use :func:`datasets.fetch_lfw_pairs` instead. - - The deprecated ``hmm`` module was removed. +- The deprecated ``hmm`` module was removed. - - The deprecated ``Bootstrap`` cross-validation iterator was removed. +- The deprecated ``Bootstrap`` cross-validation iterator was removed. - - The deprecated ``Ward`` and ``WardAgglomerative`` classes have been removed. - Use :class:`clustering.AgglomerativeClustering` instead. +- The deprecated ``Ward`` and ``WardAgglomerative`` classes have been removed. + Use :class:`clustering.AgglomerativeClustering` instead. - - :func:`cross_validation.check_cv` is now a public function. +- :func:`cross_validation.check_cv` is now a public function. - - The property ``residues_`` of :class:`linear_model.LinearRegression` is deprecated - and will be removed in 0.19. +- The property ``residues_`` of :class:`linear_model.LinearRegression` is deprecated + and will be removed in 0.19. - - The deprecated ``n_jobs`` parameter of :class:`linear_model.LinearRegression` has been moved - to the constructor. +- The deprecated ``n_jobs`` parameter of :class:`linear_model.LinearRegression` has been moved + to the constructor. - - Removed deprecated ``class_weight`` parameter from :class:`linear_model.SGDClassifier`'s ``fit`` - method. Use the construction parameter instead. +- Removed deprecated ``class_weight`` parameter from :class:`linear_model.SGDClassifier`'s ``fit`` + method. Use the construction parameter instead. - - The deprecated support for the sequence of sequences (or list of lists) multilabel - format was removed. To convert to and from the supported binary - indicator matrix format, use - :class:`MultiLabelBinarizer `. +- The deprecated support for the sequence of sequences (or list of lists) multilabel + format was removed. To convert to and from the supported binary + indicator matrix format, use + :class:`MultiLabelBinarizer `. - - The behavior of calling the ``inverse_transform`` method of ``Pipeline.pipeline`` will - change in 0.19. It will no longer reshape one-dimensional input to two-dimensional input. +- The behavior of calling the ``inverse_transform`` method of ``Pipeline.pipeline`` will + change in 0.19. It will no longer reshape one-dimensional input to two-dimensional input. - - The deprecated attributes ``indicator_matrix_``, ``multilabel_`` and ``classes_`` of - :class:`preprocessing.LabelBinarizer` were removed. +- The deprecated attributes ``indicator_matrix_``, ``multilabel_`` and ``classes_`` of + :class:`preprocessing.LabelBinarizer` were removed. - - Using ``gamma=0`` in :class:`svm.SVC` and :class:`svm.SVR` to automatically set the - gamma to ``1. / n_features`` is deprecated and will be removed in 0.19. - Use ``gamma="auto"`` instead. +- Using ``gamma=0`` in :class:`svm.SVC` and :class:`svm.SVR` to automatically set the + gamma to ``1. / n_features`` is deprecated and will be removed in 0.19. + Use ``gamma="auto"`` instead. Code Contributors ----------------- @@ -2168,26 +2288,26 @@ Changelog Bug fixes ......... - - Allow input data larger than ``block_size`` in - :class:`covariance.LedoitWolf` by `Andreas Müller`_. +- Allow input data larger than ``block_size`` in + :class:`covariance.LedoitWolf` by `Andreas Müller`_. - - Fix a bug in :class:`isotonic.IsotonicRegression` deduplication that - caused unstable result in :class:`calibration.CalibratedClassifierCV` by - `Jan Hendrik Metzen`_. +- Fix a bug in :class:`isotonic.IsotonicRegression` deduplication that + caused unstable result in :class:`calibration.CalibratedClassifierCV` by + `Jan Hendrik Metzen`_. - - Fix sorting of labels in func:`preprocessing.label_binarize` by Michael Heilman. +- Fix sorting of labels in func:`preprocessing.label_binarize` by Michael Heilman. - - Fix several stability and convergence issues in - :class:`cross_decomposition.CCA` and - :class:`cross_decomposition.PLSCanonical` by `Andreas Müller`_ +- Fix several stability and convergence issues in + :class:`cross_decomposition.CCA` and + :class:`cross_decomposition.PLSCanonical` by `Andreas Müller`_ - - Fix a bug in :class:`cluster.KMeans` when ``precompute_distances=False`` - on fortran-ordered data. +- Fix a bug in :class:`cluster.KMeans` when ``precompute_distances=False`` + on fortran-ordered data. - - Fix a speed regression in :class:`ensemble.RandomForestClassifier`'s ``predict`` - and ``predict_proba`` by `Andreas Müller`_. +- Fix a speed regression in :class:`ensemble.RandomForestClassifier`'s ``predict`` + and ``predict_proba`` by `Andreas Müller`_. - - Fix a regression where ``utils.shuffle`` converted lists and dataframes to arrays, by `Olivier Grisel`_ +- Fix a regression where ``utils.shuffle`` converted lists and dataframes to arrays, by `Olivier Grisel`_ .. _changes_0_16: @@ -2199,25 +2319,25 @@ Version 0.16 Highlights ----------- - - Speed improvements (notably in :class:`cluster.DBSCAN`), reduced memory - requirements, bug-fixes and better default settings. +- Speed improvements (notably in :class:`cluster.DBSCAN`), reduced memory + requirements, bug-fixes and better default settings. - - Multinomial Logistic regression and a path algorithm in - :class:`linear_model.LogisticRegressionCV`. +- Multinomial Logistic regression and a path algorithm in + :class:`linear_model.LogisticRegressionCV`. - - Out-of core learning of PCA via :class:`decomposition.IncrementalPCA`. +- Out-of core learning of PCA via :class:`decomposition.IncrementalPCA`. - - Probability callibration of classifiers using - :class:`calibration.CalibratedClassifierCV`. +- Probability callibration of classifiers using + :class:`calibration.CalibratedClassifierCV`. - - :class:`cluster.Birch` clustering method for large-scale datasets. +- :class:`cluster.Birch` clustering method for large-scale datasets. - - Scalable approximate nearest neighbors search with Locality-sensitive - hashing forests in :class:`neighbors.LSHForest`. +- Scalable approximate nearest neighbors search with Locality-sensitive + hashing forests in :class:`neighbors.LSHForest`. - - Improved error messages and better validation when using malformed input data. +- Improved error messages and better validation when using malformed input data. - - More robust integration with pandas dataframes. +- More robust integration with pandas dataframes. Changelog --------- @@ -2225,438 +2345,438 @@ Changelog New features ............ - - The new :class:`neighbors.LSHForest` implements locality-sensitive hashing - for approximate nearest neighbors search. By :user:`Maheshakya Wijewardena`. +- The new :class:`neighbors.LSHForest` implements locality-sensitive hashing + for approximate nearest neighbors search. By :user:`Maheshakya Wijewardena`. - - Added :class:`svm.LinearSVR`. This class uses the liblinear implementation - of Support Vector Regression which is much faster for large - sample sizes than :class:`svm.SVR` with linear kernel. By - `Fabian Pedregosa`_ and Qiang Luo. +- Added :class:`svm.LinearSVR`. This class uses the liblinear implementation + of Support Vector Regression which is much faster for large + sample sizes than :class:`svm.SVR` with linear kernel. By + `Fabian Pedregosa`_ and Qiang Luo. - - Incremental fit for :class:`GaussianNB `. +- Incremental fit for :class:`GaussianNB `. - - Added ``sample_weight`` support to :class:`dummy.DummyClassifier` and - :class:`dummy.DummyRegressor`. By `Arnaud Joly`_. +- Added ``sample_weight`` support to :class:`dummy.DummyClassifier` and + :class:`dummy.DummyRegressor`. By `Arnaud Joly`_. - - Added the :func:`metrics.label_ranking_average_precision_score` metrics. - By `Arnaud Joly`_. +- Added the :func:`metrics.label_ranking_average_precision_score` metrics. + By `Arnaud Joly`_. - - Add the :func:`metrics.coverage_error` metrics. By `Arnaud Joly`_. +- Add the :func:`metrics.coverage_error` metrics. By `Arnaud Joly`_. - - Added :class:`linear_model.LogisticRegressionCV`. By - `Manoj Kumar`_, `Fabian Pedregosa`_, `Gael Varoquaux`_ - and `Alexandre Gramfort`_. +- Added :class:`linear_model.LogisticRegressionCV`. By + `Manoj Kumar`_, `Fabian Pedregosa`_, `Gael Varoquaux`_ + and `Alexandre Gramfort`_. - - Added ``warm_start`` constructor parameter to make it possible for any - trained forest model to grow additional trees incrementally. By - :user:`Laurent Direr`. +- Added ``warm_start`` constructor parameter to make it possible for any + trained forest model to grow additional trees incrementally. By + :user:`Laurent Direr`. - - Added ``sample_weight`` support to :class:`ensemble.GradientBoostingClassifier` and - :class:`ensemble.GradientBoostingRegressor`. By `Peter Prettenhofer`_. +- Added ``sample_weight`` support to :class:`ensemble.GradientBoostingClassifier` and + :class:`ensemble.GradientBoostingRegressor`. By `Peter Prettenhofer`_. - - Added :class:`decomposition.IncrementalPCA`, an implementation of the PCA - algorithm that supports out-of-core learning with a ``partial_fit`` - method. By `Kyle Kastner`_. +- Added :class:`decomposition.IncrementalPCA`, an implementation of the PCA + algorithm that supports out-of-core learning with a ``partial_fit`` + method. By `Kyle Kastner`_. - - Averaged SGD for :class:`SGDClassifier ` - and :class:`SGDRegressor ` By - :user:`Danny Sullivan `. +- Averaged SGD for :class:`SGDClassifier ` + and :class:`SGDRegressor ` By + :user:`Danny Sullivan `. - - Added :func:`cross_val_predict ` - function which computes cross-validated estimates. By `Luis Pedro Coelho`_ +- Added :func:`cross_val_predict ` + function which computes cross-validated estimates. By `Luis Pedro Coelho`_ - - Added :class:`linear_model.TheilSenRegressor`, a robust - generalized-median-based estimator. By :user:`Florian Wilhelm `. +- Added :class:`linear_model.TheilSenRegressor`, a robust + generalized-median-based estimator. By :user:`Florian Wilhelm `. - - Added :func:`metrics.median_absolute_error`, a robust metric. - By `Gael Varoquaux`_ and :user:`Florian Wilhelm `. +- Added :func:`metrics.median_absolute_error`, a robust metric. + By `Gael Varoquaux`_ and :user:`Florian Wilhelm `. - - Add :class:`cluster.Birch`, an online clustering algorithm. By - `Manoj Kumar`_, `Alexandre Gramfort`_ and `Joel Nothman`_. +- Add :class:`cluster.Birch`, an online clustering algorithm. By + `Manoj Kumar`_, `Alexandre Gramfort`_ and `Joel Nothman`_. - - Added shrinkage support to :class:`discriminant_analysis.LinearDiscriminantAnalysis` - using two new solvers. By :user:`Clemens Brunner ` and `Martin Billinger`_. +- Added shrinkage support to :class:`discriminant_analysis.LinearDiscriminantAnalysis` + using two new solvers. By :user:`Clemens Brunner ` and `Martin Billinger`_. - - Added :class:`kernel_ridge.KernelRidge`, an implementation of - kernelized ridge regression. - By `Mathieu Blondel`_ and `Jan Hendrik Metzen`_. +- Added :class:`kernel_ridge.KernelRidge`, an implementation of + kernelized ridge regression. + By `Mathieu Blondel`_ and `Jan Hendrik Metzen`_. - - All solvers in :class:`linear_model.Ridge` now support `sample_weight`. - By `Mathieu Blondel`_. +- All solvers in :class:`linear_model.Ridge` now support `sample_weight`. + By `Mathieu Blondel`_. - - Added :class:`cross_validation.PredefinedSplit` cross-validation - for fixed user-provided cross-validation folds. - By :user:`Thomas Unterthiner `. +- Added :class:`cross_validation.PredefinedSplit` cross-validation + for fixed user-provided cross-validation folds. + By :user:`Thomas Unterthiner `. - - Added :class:`calibration.CalibratedClassifierCV`, an approach for - calibrating the predicted probabilities of a classifier. - By `Alexandre Gramfort`_, `Jan Hendrik Metzen`_, `Mathieu Blondel`_ - and :user:`Balazs Kegl `. +- Added :class:`calibration.CalibratedClassifierCV`, an approach for + calibrating the predicted probabilities of a classifier. + By `Alexandre Gramfort`_, `Jan Hendrik Metzen`_, `Mathieu Blondel`_ + and :user:`Balazs Kegl `. Enhancements ............ - - Add option ``return_distance`` in :func:`hierarchical.ward_tree` - to return distances between nodes for both structured and unstructured - versions of the algorithm. By `Matteo Visconti di Oleggio Castello`_. - The same option was added in :func:`hierarchical.linkage_tree`. - By `Manoj Kumar`_ +- Add option ``return_distance`` in :func:`hierarchical.ward_tree` + to return distances between nodes for both structured and unstructured + versions of the algorithm. By `Matteo Visconti di Oleggio Castello`_. + The same option was added in :func:`hierarchical.linkage_tree`. + By `Manoj Kumar`_ - - Add support for sample weights in scorer objects. Metrics with sample - weight support will automatically benefit from it. By `Noel Dawe`_ and - `Vlad Niculae`_. +- Add support for sample weights in scorer objects. Metrics with sample + weight support will automatically benefit from it. By `Noel Dawe`_ and + `Vlad Niculae`_. - - Added ``newton-cg`` and `lbfgs` solver support in - :class:`linear_model.LogisticRegression`. By `Manoj Kumar`_. +- Added ``newton-cg`` and `lbfgs` solver support in + :class:`linear_model.LogisticRegression`. By `Manoj Kumar`_. - - Add ``selection="random"`` parameter to implement stochastic coordinate - descent for :class:`linear_model.Lasso`, :class:`linear_model.ElasticNet` - and related. By `Manoj Kumar`_. +- Add ``selection="random"`` parameter to implement stochastic coordinate + descent for :class:`linear_model.Lasso`, :class:`linear_model.ElasticNet` + and related. By `Manoj Kumar`_. - - Add ``sample_weight`` parameter to - :func:`metrics.jaccard_similarity_score` and :func:`metrics.log_loss`. - By :user:`Jatin Shah `. +- Add ``sample_weight`` parameter to + :func:`metrics.jaccard_similarity_score` and :func:`metrics.log_loss`. + By :user:`Jatin Shah `. - - Support sparse multilabel indicator representation in - :class:`preprocessing.LabelBinarizer` and - :class:`multiclass.OneVsRestClassifier` (by :user:`Hamzeh Alsalhi ` with thanks - to Rohit Sivaprasad), as well as evaluation metrics (by - `Joel Nothman`_). +- Support sparse multilabel indicator representation in + :class:`preprocessing.LabelBinarizer` and + :class:`multiclass.OneVsRestClassifier` (by :user:`Hamzeh Alsalhi ` with thanks + to Rohit Sivaprasad), as well as evaluation metrics (by + `Joel Nothman`_). - - Add ``sample_weight`` parameter to `metrics.jaccard_similarity_score`. - By `Jatin Shah`. +- Add ``sample_weight`` parameter to `metrics.jaccard_similarity_score`. + By `Jatin Shah`. - - Add support for multiclass in `metrics.hinge_loss`. Added ``labels=None`` - as optional parameter. By `Saurabh Jha`. +- Add support for multiclass in `metrics.hinge_loss`. Added ``labels=None`` + as optional parameter. By `Saurabh Jha`. - - Add ``sample_weight`` parameter to `metrics.hinge_loss`. - By `Saurabh Jha`. +- Add ``sample_weight`` parameter to `metrics.hinge_loss`. + By `Saurabh Jha`. - - Add ``multi_class="multinomial"`` option in - :class:`linear_model.LogisticRegression` to implement a Logistic - Regression solver that minimizes the cross-entropy or multinomial loss - instead of the default One-vs-Rest setting. Supports `lbfgs` and - `newton-cg` solvers. By `Lars Buitinck`_ and `Manoj Kumar`_. Solver option - `newton-cg` by Simon Wu. +- Add ``multi_class="multinomial"`` option in + :class:`linear_model.LogisticRegression` to implement a Logistic + Regression solver that minimizes the cross-entropy or multinomial loss + instead of the default One-vs-Rest setting. Supports `lbfgs` and + `newton-cg` solvers. By `Lars Buitinck`_ and `Manoj Kumar`_. Solver option + `newton-cg` by Simon Wu. - - ``DictVectorizer`` can now perform ``fit_transform`` on an iterable in a - single pass, when giving the option ``sort=False``. By :user:`Dan - Blanchard `. +- ``DictVectorizer`` can now perform ``fit_transform`` on an iterable in a + single pass, when giving the option ``sort=False``. By :user:`Dan + Blanchard `. - - :class:`GridSearchCV` and :class:`RandomizedSearchCV` can now be - configured to work with estimators that may fail and raise errors on - individual folds. This option is controlled by the `error_score` - parameter. This does not affect errors raised on re-fit. By - :user:`Michal Romaniuk `. +- :class:`GridSearchCV` and :class:`RandomizedSearchCV` can now be + configured to work with estimators that may fail and raise errors on + individual folds. This option is controlled by the `error_score` + parameter. This does not affect errors raised on re-fit. By + :user:`Michal Romaniuk `. - - Add ``digits`` parameter to `metrics.classification_report` to allow - report to show different precision of floating point numbers. By - :user:`Ian Gilmore `. +- Add ``digits`` parameter to `metrics.classification_report` to allow + report to show different precision of floating point numbers. By + :user:`Ian Gilmore `. - - Add a quantile prediction strategy to the :class:`dummy.DummyRegressor`. - By :user:`Aaron Staple `. +- Add a quantile prediction strategy to the :class:`dummy.DummyRegressor`. + By :user:`Aaron Staple `. - - Add ``handle_unknown`` option to :class:`preprocessing.OneHotEncoder` to - handle unknown categorical features more gracefully during transform. - By `Manoj Kumar`_. +- Add ``handle_unknown`` option to :class:`preprocessing.OneHotEncoder` to + handle unknown categorical features more gracefully during transform. + By `Manoj Kumar`_. - - Added support for sparse input data to decision trees and their ensembles. - By `Fares Hedyati`_ and `Arnaud Joly`_. +- Added support for sparse input data to decision trees and their ensembles. + By `Fares Hedyati`_ and `Arnaud Joly`_. - - Optimized :class:`cluster.AffinityPropagation` by reducing the number of - memory allocations of large temporary data-structures. By `Antony Lee`_. +- Optimized :class:`cluster.AffinityPropagation` by reducing the number of + memory allocations of large temporary data-structures. By `Antony Lee`_. - - Parellization of the computation of feature importances in random forest. - By `Olivier Grisel`_ and `Arnaud Joly`_. +- Parellization of the computation of feature importances in random forest. + By `Olivier Grisel`_ and `Arnaud Joly`_. - - Add ``n_iter_`` attribute to estimators that accept a ``max_iter`` attribute - in their constructor. By `Manoj Kumar`_. +- Add ``n_iter_`` attribute to estimators that accept a ``max_iter`` attribute + in their constructor. By `Manoj Kumar`_. - - Added decision function for :class:`multiclass.OneVsOneClassifier` - By `Raghav RV`_ and :user:`Kyle Beauchamp `. +- Added decision function for :class:`multiclass.OneVsOneClassifier` + By `Raghav RV`_ and :user:`Kyle Beauchamp `. - - :func:`neighbors.kneighbors_graph` and :func:`radius_neighbors_graph` - support non-Euclidean metrics. By `Manoj Kumar`_ +- :func:`neighbors.kneighbors_graph` and :func:`radius_neighbors_graph` + support non-Euclidean metrics. By `Manoj Kumar`_ - - Parameter ``connectivity`` in :class:`cluster.AgglomerativeClustering` - and family now accept callables that return a connectivity matrix. - By `Manoj Kumar`_. +- Parameter ``connectivity`` in :class:`cluster.AgglomerativeClustering` + and family now accept callables that return a connectivity matrix. + By `Manoj Kumar`_. - - Sparse support for :func:`paired_distances`. By `Joel Nothman`_. +- Sparse support for :func:`paired_distances`. By `Joel Nothman`_. - - :class:`cluster.DBSCAN` now supports sparse input and sample weights and - has been optimized: the inner loop has been rewritten in Cython and - radius neighbors queries are now computed in batch. By `Joel Nothman`_ - and `Lars Buitinck`_. +- :class:`cluster.DBSCAN` now supports sparse input and sample weights and + has been optimized: the inner loop has been rewritten in Cython and + radius neighbors queries are now computed in batch. By `Joel Nothman`_ + and `Lars Buitinck`_. - - Add ``class_weight`` parameter to automatically weight samples by class - frequency for :class:`ensemble.RandomForestClassifier`, - :class:`tree.DecisionTreeClassifier`, :class:`ensemble.ExtraTreesClassifier` - and :class:`tree.ExtraTreeClassifier`. By `Trevor Stephens`_. +- Add ``class_weight`` parameter to automatically weight samples by class + frequency for :class:`ensemble.RandomForestClassifier`, + :class:`tree.DecisionTreeClassifier`, :class:`ensemble.ExtraTreesClassifier` + and :class:`tree.ExtraTreeClassifier`. By `Trevor Stephens`_. - - :class:`grid_search.RandomizedSearchCV` now does sampling without - replacement if all parameters are given as lists. By `Andreas Müller`_. +- :class:`grid_search.RandomizedSearchCV` now does sampling without + replacement if all parameters are given as lists. By `Andreas Müller`_. - - Parallelized calculation of :func:`pairwise_distances` is now supported - for scipy metrics and custom callables. By `Joel Nothman`_. +- Parallelized calculation of :func:`pairwise_distances` is now supported + for scipy metrics and custom callables. By `Joel Nothman`_. - - Allow the fitting and scoring of all clustering algorithms in - :class:`pipeline.Pipeline`. By `Andreas Müller`_. +- Allow the fitting and scoring of all clustering algorithms in + :class:`pipeline.Pipeline`. By `Andreas Müller`_. - - More robust seeding and improved error messages in :class:`cluster.MeanShift` - by `Andreas Müller`_. +- More robust seeding and improved error messages in :class:`cluster.MeanShift` + by `Andreas Müller`_. - - Make the stopping criterion for :class:`mixture.GMM`, - :class:`mixture.DPGMM` and :class:`mixture.VBGMM` less dependent on the - number of samples by thresholding the average log-likelihood change - instead of its sum over all samples. By `Hervé Bredin`_. +- Make the stopping criterion for :class:`mixture.GMM`, + :class:`mixture.DPGMM` and :class:`mixture.VBGMM` less dependent on the + number of samples by thresholding the average log-likelihood change + instead of its sum over all samples. By `Hervé Bredin`_. - - The outcome of :func:`manifold.spectral_embedding` was made deterministic - by flipping the sign of eigenvectors. By :user:`Hasil Sharma `. +- The outcome of :func:`manifold.spectral_embedding` was made deterministic + by flipping the sign of eigenvectors. By :user:`Hasil Sharma `. - - Significant performance and memory usage improvements in - :class:`preprocessing.PolynomialFeatures`. By `Eric Martin`_. +- Significant performance and memory usage improvements in + :class:`preprocessing.PolynomialFeatures`. By `Eric Martin`_. - - Numerical stability improvements for :class:`preprocessing.StandardScaler` - and :func:`preprocessing.scale`. By `Nicolas Goix`_ +- Numerical stability improvements for :class:`preprocessing.StandardScaler` + and :func:`preprocessing.scale`. By `Nicolas Goix`_ - - :class:`svm.SVC` fitted on sparse input now implements ``decision_function``. - By `Rob Zinkov`_ and `Andreas Müller`_. +- :class:`svm.SVC` fitted on sparse input now implements ``decision_function``. + By `Rob Zinkov`_ and `Andreas Müller`_. - - :func:`cross_validation.train_test_split` now preserves the input type, - instead of converting to numpy arrays. +- :func:`cross_validation.train_test_split` now preserves the input type, + instead of converting to numpy arrays. Documentation improvements .......................... - - Added example of using :class:`FeatureUnion` for heterogeneous input. - By :user:`Matt Terry ` +- Added example of using :class:`FeatureUnion` for heterogeneous input. + By :user:`Matt Terry ` - - Documentation on scorers was improved, to highlight the handling of loss - functions. By :user:`Matt Pico `. +- Documentation on scorers was improved, to highlight the handling of loss + functions. By :user:`Matt Pico `. - - A discrepancy between liblinear output and scikit-learn's wrappers - is now noted. By `Manoj Kumar`_. +- A discrepancy between liblinear output and scikit-learn's wrappers + is now noted. By `Manoj Kumar`_. - - Improved documentation generation: examples referring to a class or - function are now shown in a gallery on the class/function's API reference - page. By `Joel Nothman`_. +- Improved documentation generation: examples referring to a class or + function are now shown in a gallery on the class/function's API reference + page. By `Joel Nothman`_. - - More explicit documentation of sample generators and of data - transformation. By `Joel Nothman`_. +- More explicit documentation of sample generators and of data + transformation. By `Joel Nothman`_. - - :class:`sklearn.neighbors.BallTree` and :class:`sklearn.neighbors.KDTree` - used to point to empty pages stating that they are aliases of BinaryTree. - This has been fixed to show the correct class docs. By `Manoj Kumar`_. +- :class:`sklearn.neighbors.BallTree` and :class:`sklearn.neighbors.KDTree` + used to point to empty pages stating that they are aliases of BinaryTree. + This has been fixed to show the correct class docs. By `Manoj Kumar`_. - - Added silhouette plots for analysis of KMeans clustering using - :func:`metrics.silhouette_samples` and :func:`metrics.silhouette_score`. - See :ref:`sphx_glr_auto_examples_cluster_plot_kmeans_silhouette_analysis.py` +- Added silhouette plots for analysis of KMeans clustering using + :func:`metrics.silhouette_samples` and :func:`metrics.silhouette_score`. + See :ref:`sphx_glr_auto_examples_cluster_plot_kmeans_silhouette_analysis.py` Bug fixes ......... - - Metaestimators now support ducktyping for the presence of ``decision_function``, - ``predict_proba`` and other methods. This fixes behavior of - :class:`grid_search.GridSearchCV`, - :class:`grid_search.RandomizedSearchCV`, :class:`pipeline.Pipeline`, - :class:`feature_selection.RFE`, :class:`feature_selection.RFECV` when nested. - By `Joel Nothman`_ - - - The ``scoring`` attribute of grid-search and cross-validation methods is no longer - ignored when a :class:`grid_search.GridSearchCV` is given as a base estimator or - the base estimator doesn't have predict. - - - The function :func:`hierarchical.ward_tree` now returns the children in - the same order for both the structured and unstructured versions. By - `Matteo Visconti di Oleggio Castello`_. - - - :class:`feature_selection.RFECV` now correctly handles cases when - ``step`` is not equal to 1. By :user:`Nikolay Mayorov ` - - - The :class:`decomposition.PCA` now undoes whitening in its - ``inverse_transform``. Also, its ``components_`` now always have unit - length. By :user:`Michael Eickenberg `. - - - Fix incomplete download of the dataset when - :func:`datasets.download_20newsgroups` is called. By `Manoj Kumar`_. - - - Various fixes to the Gaussian processes subpackage by Vincent Dubourg - and Jan Hendrik Metzen. - - - Calling ``partial_fit`` with ``class_weight=='auto'`` throws an - appropriate error message and suggests a work around. - By :user:`Danny Sullivan `. - - - :class:`RBFSampler ` with ``gamma=g`` - formerly approximated :func:`rbf_kernel ` - with ``gamma=g/2.``; the definition of ``gamma`` is now consistent, - which may substantially change your results if you use a fixed value. - (If you cross-validated over ``gamma``, it probably doesn't matter - too much.) By :user:`Dougal Sutherland `. - - - Pipeline object delegate the ``classes_`` attribute to the underlying - estimator. It allows, for instance, to make bagging of a pipeline object. - By `Arnaud Joly`_ - - - :class:`neighbors.NearestCentroid` now uses the median as the centroid - when metric is set to ``manhattan``. It was using the mean before. - By `Manoj Kumar`_ - - - Fix numerical stability issues in :class:`linear_model.SGDClassifier` - and :class:`linear_model.SGDRegressor` by clipping large gradients and - ensuring that weight decay rescaling is always positive (for large - l2 regularization and large learning rate values). - By `Olivier Grisel`_ - - - When `compute_full_tree` is set to "auto", the full tree is - built when n_clusters is high and is early stopped when n_clusters is - low, while the behavior should be vice-versa in - :class:`cluster.AgglomerativeClustering` (and friends). - This has been fixed By `Manoj Kumar`_ - - - Fix lazy centering of data in :func:`linear_model.enet_path` and - :func:`linear_model.lasso_path`. It was centered around one. It has - been changed to be centered around the origin. By `Manoj Kumar`_ - - - Fix handling of precomputed affinity matrices in - :class:`cluster.AgglomerativeClustering` when using connectivity - constraints. By :user:`Cathy Deng ` - - - Correct ``partial_fit`` handling of ``class_prior`` for - :class:`sklearn.naive_bayes.MultinomialNB` and - :class:`sklearn.naive_bayes.BernoulliNB`. By `Trevor Stephens`_. - - - Fixed a crash in :func:`metrics.precision_recall_fscore_support` - when using unsorted ``labels`` in the multi-label setting. - By `Andreas Müller`_. - - - Avoid skipping the first nearest neighbor in the methods ``radius_neighbors``, - ``kneighbors``, ``kneighbors_graph`` and ``radius_neighbors_graph`` in - :class:`sklearn.neighbors.NearestNeighbors` and family, when the query - data is not the same as fit data. By `Manoj Kumar`_. - - - Fix log-density calculation in the :class:`mixture.GMM` with - tied covariance. By `Will Dawson`_ - - - Fixed a scaling error in :class:`feature_selection.SelectFdr` - where a factor ``n_features`` was missing. By `Andrew Tulloch`_ - - - Fix zero division in :class:`neighbors.KNeighborsRegressor` and related - classes when using distance weighting and having identical data points. - By `Garret-R `_. - - - Fixed round off errors with non positive-definite covariance matrices - in GMM. By :user:`Alexis Mignon `. - - - Fixed a error in the computation of conditional probabilities in - :class:`naive_bayes.BernoulliNB`. By `Hanna Wallach`_. - - - Make the method ``radius_neighbors`` of - :class:`neighbors.NearestNeighbors` return the samples lying on the - boundary for ``algorithm='brute'``. By `Yan Yi`_. - - - Flip sign of ``dual_coef_`` of :class:`svm.SVC` - to make it consistent with the documentation and - ``decision_function``. By Artem Sobolev. +- Metaestimators now support ducktyping for the presence of ``decision_function``, + ``predict_proba`` and other methods. This fixes behavior of + :class:`grid_search.GridSearchCV`, + :class:`grid_search.RandomizedSearchCV`, :class:`pipeline.Pipeline`, + :class:`feature_selection.RFE`, :class:`feature_selection.RFECV` when nested. + By `Joel Nothman`_ + +- The ``scoring`` attribute of grid-search and cross-validation methods is no longer + ignored when a :class:`grid_search.GridSearchCV` is given as a base estimator or + the base estimator doesn't have predict. + +- The function :func:`hierarchical.ward_tree` now returns the children in + the same order for both the structured and unstructured versions. By + `Matteo Visconti di Oleggio Castello`_. + +- :class:`feature_selection.RFECV` now correctly handles cases when + ``step`` is not equal to 1. By :user:`Nikolay Mayorov ` + +- The :class:`decomposition.PCA` now undoes whitening in its + ``inverse_transform``. Also, its ``components_`` now always have unit + length. By :user:`Michael Eickenberg `. + +- Fix incomplete download of the dataset when + :func:`datasets.download_20newsgroups` is called. By `Manoj Kumar`_. + +- Various fixes to the Gaussian processes subpackage by Vincent Dubourg + and Jan Hendrik Metzen. + +- Calling ``partial_fit`` with ``class_weight=='auto'`` throws an + appropriate error message and suggests a work around. + By :user:`Danny Sullivan `. + +- :class:`RBFSampler ` with ``gamma=g`` + formerly approximated :func:`rbf_kernel ` + with ``gamma=g/2.``; the definition of ``gamma`` is now consistent, + which may substantially change your results if you use a fixed value. + (If you cross-validated over ``gamma``, it probably doesn't matter + too much.) By :user:`Dougal Sutherland `. + +- Pipeline object delegate the ``classes_`` attribute to the underlying + estimator. It allows, for instance, to make bagging of a pipeline object. + By `Arnaud Joly`_ + +- :class:`neighbors.NearestCentroid` now uses the median as the centroid + when metric is set to ``manhattan``. It was using the mean before. + By `Manoj Kumar`_ + +- Fix numerical stability issues in :class:`linear_model.SGDClassifier` + and :class:`linear_model.SGDRegressor` by clipping large gradients and + ensuring that weight decay rescaling is always positive (for large + l2 regularization and large learning rate values). + By `Olivier Grisel`_ + +- When `compute_full_tree` is set to "auto", the full tree is + built when n_clusters is high and is early stopped when n_clusters is + low, while the behavior should be vice-versa in + :class:`cluster.AgglomerativeClustering` (and friends). + This has been fixed By `Manoj Kumar`_ + +- Fix lazy centering of data in :func:`linear_model.enet_path` and + :func:`linear_model.lasso_path`. It was centered around one. It has + been changed to be centered around the origin. By `Manoj Kumar`_ + +- Fix handling of precomputed affinity matrices in + :class:`cluster.AgglomerativeClustering` when using connectivity + constraints. By :user:`Cathy Deng ` + +- Correct ``partial_fit`` handling of ``class_prior`` for + :class:`sklearn.naive_bayes.MultinomialNB` and + :class:`sklearn.naive_bayes.BernoulliNB`. By `Trevor Stephens`_. + +- Fixed a crash in :func:`metrics.precision_recall_fscore_support` + when using unsorted ``labels`` in the multi-label setting. + By `Andreas Müller`_. + +- Avoid skipping the first nearest neighbor in the methods ``radius_neighbors``, + ``kneighbors``, ``kneighbors_graph`` and ``radius_neighbors_graph`` in + :class:`sklearn.neighbors.NearestNeighbors` and family, when the query + data is not the same as fit data. By `Manoj Kumar`_. + +- Fix log-density calculation in the :class:`mixture.GMM` with + tied covariance. By `Will Dawson`_ + +- Fixed a scaling error in :class:`feature_selection.SelectFdr` + where a factor ``n_features`` was missing. By `Andrew Tulloch`_ + +- Fix zero division in :class:`neighbors.KNeighborsRegressor` and related + classes when using distance weighting and having identical data points. + By `Garret-R `_. + +- Fixed round off errors with non positive-definite covariance matrices + in GMM. By :user:`Alexis Mignon `. + +- Fixed a error in the computation of conditional probabilities in + :class:`naive_bayes.BernoulliNB`. By `Hanna Wallach`_. + +- Make the method ``radius_neighbors`` of + :class:`neighbors.NearestNeighbors` return the samples lying on the + boundary for ``algorithm='brute'``. By `Yan Yi`_. + +- Flip sign of ``dual_coef_`` of :class:`svm.SVC` + to make it consistent with the documentation and + ``decision_function``. By Artem Sobolev. - - Fixed handling of ties in :class:`isotonic.IsotonicRegression`. - We now use the weighted average of targets (secondary method). By - `Andreas Müller`_ and `Michael Bommarito `_. +- Fixed handling of ties in :class:`isotonic.IsotonicRegression`. + We now use the weighted average of targets (secondary method). By + `Andreas Müller`_ and `Michael Bommarito `_. API changes summary ------------------- - - :class:`GridSearchCV ` and - :func:`cross_val_score ` and other - meta-estimators don't convert pandas DataFrames into arrays any more, - allowing DataFrame specific operations in custom estimators. +- :class:`GridSearchCV ` and + :func:`cross_val_score ` and other + meta-estimators don't convert pandas DataFrames into arrays any more, + allowing DataFrame specific operations in custom estimators. - - :func:`multiclass.fit_ovr`, :func:`multiclass.predict_ovr`, - :func:`predict_proba_ovr`, - :func:`multiclass.fit_ovo`, :func:`multiclass.predict_ovo`, - :func:`multiclass.fit_ecoc` and :func:`multiclass.predict_ecoc` - are deprecated. Use the underlying estimators instead. +- :func:`multiclass.fit_ovr`, :func:`multiclass.predict_ovr`, + :func:`predict_proba_ovr`, + :func:`multiclass.fit_ovo`, :func:`multiclass.predict_ovo`, + :func:`multiclass.fit_ecoc` and :func:`multiclass.predict_ecoc` + are deprecated. Use the underlying estimators instead. - - Nearest neighbors estimators used to take arbitrary keyword arguments - and pass these to their distance metric. This will no longer be supported - in scikit-learn 0.18; use the ``metric_params`` argument instead. +- Nearest neighbors estimators used to take arbitrary keyword arguments + and pass these to their distance metric. This will no longer be supported + in scikit-learn 0.18; use the ``metric_params`` argument instead. - - `n_jobs` parameter of the fit method shifted to the constructor of the +- `n_jobs` parameter of the fit method shifted to the constructor of the LinearRegression class. - - The ``predict_proba`` method of :class:`multiclass.OneVsRestClassifier` - now returns two probabilities per sample in the multiclass case; this - is consistent with other estimators and with the method's documentation, - but previous versions accidentally returned only the positive - probability. Fixed by Will Lamond and `Lars Buitinck`_. - - - Change default value of precompute in :class:`ElasticNet` and :class:`Lasso` - to False. Setting precompute to "auto" was found to be slower when - n_samples > n_features since the computation of the Gram matrix is - computationally expensive and outweighs the benefit of fitting the Gram - for just one alpha. - ``precompute="auto"`` is now deprecated and will be removed in 0.18 - By `Manoj Kumar`_. - - - Expose ``positive`` option in :func:`linear_model.enet_path` and - :func:`linear_model.enet_path` which constrains coefficients to be - positive. By `Manoj Kumar`_. - - - Users should now supply an explicit ``average`` parameter to - :func:`sklearn.metrics.f1_score`, :func:`sklearn.metrics.fbeta_score`, - :func:`sklearn.metrics.recall_score` and - :func:`sklearn.metrics.precision_score` when performing multiclass - or multilabel (i.e. not binary) classification. By `Joel Nothman`_. - - - `scoring` parameter for cross validation now accepts `'f1_micro'`, - `'f1_macro'` or `'f1_weighted'`. `'f1'` is now for binary classification - only. Similar changes apply to `'precision'` and `'recall'`. - By `Joel Nothman`_. - - - The ``fit_intercept``, ``normalize`` and ``return_models`` parameters in - :func:`linear_model.enet_path` and :func:`linear_model.lasso_path` have - been removed. They were deprecated since 0.14 - - - From now onwards, all estimators will uniformly raise ``NotFittedError`` - (:class:`utils.validation.NotFittedError`), when any of the ``predict`` - like methods are called before the model is fit. By `Raghav RV`_. - - - Input data validation was refactored for more consistent input - validation. The ``check_arrays`` function was replaced by ``check_array`` - and ``check_X_y``. By `Andreas Müller`_. - - - Allow ``X=None`` in the methods ``radius_neighbors``, ``kneighbors``, - ``kneighbors_graph`` and ``radius_neighbors_graph`` in - :class:`sklearn.neighbors.NearestNeighbors` and family. If set to None, - then for every sample this avoids setting the sample itself as the - first nearest neighbor. By `Manoj Kumar`_. - - - Add parameter ``include_self`` in :func:`neighbors.kneighbors_graph` - and :func:`neighbors.radius_neighbors_graph` which has to be explicitly - set by the user. If set to True, then the sample itself is considered - as the first nearest neighbor. - - - `thresh` parameter is deprecated in favor of new `tol` parameter in - :class:`GMM`, :class:`DPGMM` and :class:`VBGMM`. See `Enhancements` - section for details. By `Hervé Bredin`_. - - - Estimators will treat input with dtype object as numeric when possible. - By `Andreas Müller`_ - - - Estimators now raise `ValueError` consistently when fitted on empty - data (less than 1 sample or less than 1 feature for 2D input). - By `Olivier Grisel`_. - - - - The ``shuffle`` option of :class:`.linear_model.SGDClassifier`, - :class:`linear_model.SGDRegressor`, :class:`linear_model.Perceptron`, - :class:`linear_model.PassiveAgressiveClassifier` and - :class:`linear_model.PassiveAgressiveRegressor` now defaults to ``True``. - - - :class:`cluster.DBSCAN` now uses a deterministic initialization. The - `random_state` parameter is deprecated. By :user:`Erich Schubert `. +- The ``predict_proba`` method of :class:`multiclass.OneVsRestClassifier` + now returns two probabilities per sample in the multiclass case; this + is consistent with other estimators and with the method's documentation, + but previous versions accidentally returned only the positive + probability. Fixed by Will Lamond and `Lars Buitinck`_. + +- Change default value of precompute in :class:`ElasticNet` and :class:`Lasso` + to False. Setting precompute to "auto" was found to be slower when + n_samples > n_features since the computation of the Gram matrix is + computationally expensive and outweighs the benefit of fitting the Gram + for just one alpha. + ``precompute="auto"`` is now deprecated and will be removed in 0.18 + By `Manoj Kumar`_. + +- Expose ``positive`` option in :func:`linear_model.enet_path` and + :func:`linear_model.enet_path` which constrains coefficients to be + positive. By `Manoj Kumar`_. + +- Users should now supply an explicit ``average`` parameter to + :func:`sklearn.metrics.f1_score`, :func:`sklearn.metrics.fbeta_score`, + :func:`sklearn.metrics.recall_score` and + :func:`sklearn.metrics.precision_score` when performing multiclass + or multilabel (i.e. not binary) classification. By `Joel Nothman`_. + +- `scoring` parameter for cross validation now accepts `'f1_micro'`, + `'f1_macro'` or `'f1_weighted'`. `'f1'` is now for binary classification + only. Similar changes apply to `'precision'` and `'recall'`. + By `Joel Nothman`_. + +- The ``fit_intercept``, ``normalize`` and ``return_models`` parameters in + :func:`linear_model.enet_path` and :func:`linear_model.lasso_path` have + been removed. They were deprecated since 0.14 + +- From now onwards, all estimators will uniformly raise ``NotFittedError`` + (:class:`utils.validation.NotFittedError`), when any of the ``predict`` + like methods are called before the model is fit. By `Raghav RV`_. + +- Input data validation was refactored for more consistent input + validation. The ``check_arrays`` function was replaced by ``check_array`` + and ``check_X_y``. By `Andreas Müller`_. + +- Allow ``X=None`` in the methods ``radius_neighbors``, ``kneighbors``, + ``kneighbors_graph`` and ``radius_neighbors_graph`` in + :class:`sklearn.neighbors.NearestNeighbors` and family. If set to None, + then for every sample this avoids setting the sample itself as the + first nearest neighbor. By `Manoj Kumar`_. + +- Add parameter ``include_self`` in :func:`neighbors.kneighbors_graph` + and :func:`neighbors.radius_neighbors_graph` which has to be explicitly + set by the user. If set to True, then the sample itself is considered + as the first nearest neighbor. + +- `thresh` parameter is deprecated in favor of new `tol` parameter in + :class:`GMM`, :class:`DPGMM` and :class:`VBGMM`. See `Enhancements` + section for details. By `Hervé Bredin`_. + +- Estimators will treat input with dtype object as numeric when possible. + By `Andreas Müller`_ + +- Estimators now raise `ValueError` consistently when fitted on empty + data (less than 1 sample or less than 1 feature for 2D input). + By `Olivier Grisel`_. + + +- The ``shuffle`` option of :class:`.linear_model.SGDClassifier`, + :class:`linear_model.SGDRegressor`, :class:`linear_model.Perceptron`, + :class:`linear_model.PassiveAgressiveClassifier` and + :class:`linear_model.PassiveAgressiveRegressor` now defaults to ``True``. + +- :class:`cluster.DBSCAN` now uses a deterministic initialization. The + `random_state` parameter is deprecated. By :user:`Erich Schubert `. Code Contributors ----------------- @@ -2702,41 +2822,41 @@ Version 0.15.2 Bug fixes --------- - - Fixed handling of the ``p`` parameter of the Minkowski distance that was - previously ignored in nearest neighbors models. By :user:`Nikolay - Mayorov `. +- Fixed handling of the ``p`` parameter of the Minkowski distance that was + previously ignored in nearest neighbors models. By :user:`Nikolay + Mayorov `. - - Fixed duplicated alphas in :class:`linear_model.LassoLars` with early - stopping on 32 bit Python. By `Olivier Grisel`_ and `Fabian Pedregosa`_. +- Fixed duplicated alphas in :class:`linear_model.LassoLars` with early + stopping on 32 bit Python. By `Olivier Grisel`_ and `Fabian Pedregosa`_. - - Fixed the build under Windows when scikit-learn is built with MSVC while - NumPy is built with MinGW. By `Olivier Grisel`_ and :user:`Federico - Vaggi `. +- Fixed the build under Windows when scikit-learn is built with MSVC while + NumPy is built with MinGW. By `Olivier Grisel`_ and :user:`Federico + Vaggi `. - - Fixed an array index overflow bug in the coordinate descent solver. By - `Gael Varoquaux`_. +- Fixed an array index overflow bug in the coordinate descent solver. By + `Gael Varoquaux`_. - - Better handling of numpy 1.9 deprecation warnings. By `Gael Varoquaux`_. +- Better handling of numpy 1.9 deprecation warnings. By `Gael Varoquaux`_. - - Removed unnecessary data copy in :class:`cluster.KMeans`. - By `Gael Varoquaux`_. +- Removed unnecessary data copy in :class:`cluster.KMeans`. + By `Gael Varoquaux`_. - - Explicitly close open files to avoid ``ResourceWarnings`` under Python 3. - By Calvin Giles. +- Explicitly close open files to avoid ``ResourceWarnings`` under Python 3. + By Calvin Giles. - - The ``transform`` of :class:`discriminant_analysis.LinearDiscriminantAnalysis` - now projects the input on the most discriminant directions. By Martin Billinger. +- The ``transform`` of :class:`discriminant_analysis.LinearDiscriminantAnalysis` + now projects the input on the most discriminant directions. By Martin Billinger. - - Fixed potential overflow in ``_tree.safe_realloc`` by `Lars Buitinck`_. +- Fixed potential overflow in ``_tree.safe_realloc`` by `Lars Buitinck`_. - - Performance optimization in :class:`isotonic.IsotonicRegression`. - By Robert Bradshaw. +- Performance optimization in :class:`isotonic.IsotonicRegression`. + By Robert Bradshaw. - - ``nose`` is non-longer a runtime dependency to import ``sklearn``, only for - running the tests. By `Joel Nothman`_. +- ``nose`` is non-longer a runtime dependency to import ``sklearn``, only for + running the tests. By `Joel Nothman`_. - - Many documentation and website fixes by `Joel Nothman`_, `Lars Buitinck`_ - :user:`Matt Pico `, and others. +- Many documentation and website fixes by `Joel Nothman`_, `Lars Buitinck`_ + :user:`Matt Pico `, and others. .. _changes_0_15_1: @@ -2748,35 +2868,35 @@ Version 0.15.1 Bug fixes --------- - - Made :func:`cross_validation.cross_val_score` use - :class:`cross_validation.KFold` instead of - :class:`cross_validation.StratifiedKFold` on multi-output classification - problems. By :user:`Nikolay Mayorov `. +- Made :func:`cross_validation.cross_val_score` use + :class:`cross_validation.KFold` instead of + :class:`cross_validation.StratifiedKFold` on multi-output classification + problems. By :user:`Nikolay Mayorov `. - - Support unseen labels :class:`preprocessing.LabelBinarizer` to restore - the default behavior of 0.14.1 for backward compatibility. By - :user:`Hamzeh Alsalhi `. +- Support unseen labels :class:`preprocessing.LabelBinarizer` to restore + the default behavior of 0.14.1 for backward compatibility. By + :user:`Hamzeh Alsalhi `. - - Fixed the :class:`cluster.KMeans` stopping criterion that prevented early - convergence detection. By Edward Raff and `Gael Varoquaux`_. +- Fixed the :class:`cluster.KMeans` stopping criterion that prevented early + convergence detection. By Edward Raff and `Gael Varoquaux`_. - - Fixed the behavior of :class:`multiclass.OneVsOneClassifier`. - in case of ties at the per-class vote level by computing the correct - per-class sum of prediction scores. By `Andreas Müller`_. +- Fixed the behavior of :class:`multiclass.OneVsOneClassifier`. + in case of ties at the per-class vote level by computing the correct + per-class sum of prediction scores. By `Andreas Müller`_. - - Made :func:`cross_validation.cross_val_score` and - :class:`grid_search.GridSearchCV` accept Python lists as input data. - This is especially useful for cross-validation and model selection of - text processing pipelines. By `Andreas Müller`_. +- Made :func:`cross_validation.cross_val_score` and + :class:`grid_search.GridSearchCV` accept Python lists as input data. + This is especially useful for cross-validation and model selection of + text processing pipelines. By `Andreas Müller`_. - - Fixed data input checks of most estimators to accept input data that - implements the NumPy ``__array__`` protocol. This is the case for - for ``pandas.Series`` and ``pandas.DataFrame`` in recent versions of - pandas. By `Gael Varoquaux`_. +- Fixed data input checks of most estimators to accept input data that + implements the NumPy ``__array__`` protocol. This is the case for + for ``pandas.Series`` and ``pandas.DataFrame`` in recent versions of + pandas. By `Gael Varoquaux`_. - - Fixed a regression for :class:`linear_model.SGDClassifier` with - ``class_weight="auto"`` on data with non-contiguous labels. By - `Olivier Grisel`_. +- Fixed a regression for :class:`linear_model.SGDClassifier` with + ``class_weight="auto"`` on data with non-contiguous labels. By + `Olivier Grisel`_. .. _changes_0_15: @@ -2789,22 +2909,22 @@ Version 0.15 Highlights ----------- - - Many speed and memory improvements all across the code +- Many speed and memory improvements all across the code - - Huge speed and memory improvements to random forests (and extra - trees) that also benefit better from parallel computing. +- Huge speed and memory improvements to random forests (and extra + trees) that also benefit better from parallel computing. - - Incremental fit to :class:`BernoulliRBM ` +- Incremental fit to :class:`BernoulliRBM ` - - Added :class:`cluster.AgglomerativeClustering` for hierarchical - agglomerative clustering with average linkage, complete linkage and - ward strategies. +- Added :class:`cluster.AgglomerativeClustering` for hierarchical + agglomerative clustering with average linkage, complete linkage and + ward strategies. - - Added :class:`linear_model.RANSACRegressor` for robust regression - models. +- Added :class:`linear_model.RANSACRegressor` for robust regression + models. - - Added dimensionality reduction with :class:`manifold.TSNE` which can be - used to visualize high-dimensional data. +- Added dimensionality reduction with :class:`manifold.TSNE` which can be + used to visualize high-dimensional data. Changelog @@ -2813,334 +2933,334 @@ Changelog New features ............ - - Added :class:`ensemble.BaggingClassifier` and - :class:`ensemble.BaggingRegressor` meta-estimators for ensembling - any kind of base estimator. See the :ref:`Bagging ` section of - the user guide for details and examples. By `Gilles Louppe`_. +- Added :class:`ensemble.BaggingClassifier` and + :class:`ensemble.BaggingRegressor` meta-estimators for ensembling + any kind of base estimator. See the :ref:`Bagging ` section of + the user guide for details and examples. By `Gilles Louppe`_. - - New unsupervised feature selection algorithm - :class:`feature_selection.VarianceThreshold`, by `Lars Buitinck`_. +- New unsupervised feature selection algorithm + :class:`feature_selection.VarianceThreshold`, by `Lars Buitinck`_. - - Added :class:`linear_model.RANSACRegressor` meta-estimator for the robust - fitting of regression models. By :user:`Johannes Schönberger `. +- Added :class:`linear_model.RANSACRegressor` meta-estimator for the robust + fitting of regression models. By :user:`Johannes Schönberger `. - - Added :class:`cluster.AgglomerativeClustering` for hierarchical - agglomerative clustering with average linkage, complete linkage and - ward strategies, by `Nelle Varoquaux`_ and `Gael Varoquaux`_. +- Added :class:`cluster.AgglomerativeClustering` for hierarchical + agglomerative clustering with average linkage, complete linkage and + ward strategies, by `Nelle Varoquaux`_ and `Gael Varoquaux`_. - - Shorthand constructors :func:`pipeline.make_pipeline` and - :func:`pipeline.make_union` were added by `Lars Buitinck`_. +- Shorthand constructors :func:`pipeline.make_pipeline` and + :func:`pipeline.make_union` were added by `Lars Buitinck`_. - - Shuffle option for :class:`cross_validation.StratifiedKFold`. - By :user:`Jeffrey Blackburne `. +- Shuffle option for :class:`cross_validation.StratifiedKFold`. + By :user:`Jeffrey Blackburne `. - - Incremental learning (``partial_fit``) for Gaussian Naive Bayes by - Imran Haque. +- Incremental learning (``partial_fit``) for Gaussian Naive Bayes by + Imran Haque. - - Added ``partial_fit`` to :class:`BernoulliRBM - ` - By :user:`Danny Sullivan `. +- Added ``partial_fit`` to :class:`BernoulliRBM + ` + By :user:`Danny Sullivan `. - - Added :func:`learning_curve ` utility to - chart performance with respect to training size. See - :ref:`sphx_glr_auto_examples_model_selection_plot_learning_curve.py`. By Alexander Fabisch. +- Added :func:`learning_curve ` utility to + chart performance with respect to training size. See + :ref:`sphx_glr_auto_examples_model_selection_plot_learning_curve.py`. By Alexander Fabisch. - - Add positive option in :class:`LassoCV ` and - :class:`ElasticNetCV `. - By Brian Wignall and `Alexandre Gramfort`_. +- Add positive option in :class:`LassoCV ` and + :class:`ElasticNetCV `. + By Brian Wignall and `Alexandre Gramfort`_. - - Added :class:`linear_model.MultiTaskElasticNetCV` and - :class:`linear_model.MultiTaskLassoCV`. By `Manoj Kumar`_. +- Added :class:`linear_model.MultiTaskElasticNetCV` and + :class:`linear_model.MultiTaskLassoCV`. By `Manoj Kumar`_. - - Added :class:`manifold.TSNE`. By Alexander Fabisch. +- Added :class:`manifold.TSNE`. By Alexander Fabisch. Enhancements ............ - - Add sparse input support to :class:`ensemble.AdaBoostClassifier` and - :class:`ensemble.AdaBoostRegressor` meta-estimators. - By :user:`Hamzeh Alsalhi `. +- Add sparse input support to :class:`ensemble.AdaBoostClassifier` and + :class:`ensemble.AdaBoostRegressor` meta-estimators. + By :user:`Hamzeh Alsalhi `. - - Memory improvements of decision trees, by `Arnaud Joly`_. +- Memory improvements of decision trees, by `Arnaud Joly`_. - - Decision trees can now be built in best-first manner by using ``max_leaf_nodes`` - as the stopping criteria. Refactored the tree code to use either a - stack or a priority queue for tree building. - By `Peter Prettenhofer`_ and `Gilles Louppe`_. +- Decision trees can now be built in best-first manner by using ``max_leaf_nodes`` + as the stopping criteria. Refactored the tree code to use either a + stack or a priority queue for tree building. + By `Peter Prettenhofer`_ and `Gilles Louppe`_. - - Decision trees can now be fitted on fortran- and c-style arrays, and - non-continuous arrays without the need to make a copy. - If the input array has a different dtype than ``np.float32``, a fortran- - style copy will be made since fortran-style memory layout has speed - advantages. By `Peter Prettenhofer`_ and `Gilles Louppe`_. +- Decision trees can now be fitted on fortran- and c-style arrays, and + non-continuous arrays without the need to make a copy. + If the input array has a different dtype than ``np.float32``, a fortran- + style copy will be made since fortran-style memory layout has speed + advantages. By `Peter Prettenhofer`_ and `Gilles Louppe`_. - - Speed improvement of regression trees by optimizing the - the computation of the mean square error criterion. This lead - to speed improvement of the tree, forest and gradient boosting tree - modules. By `Arnaud Joly`_ +- Speed improvement of regression trees by optimizing the + the computation of the mean square error criterion. This lead + to speed improvement of the tree, forest and gradient boosting tree + modules. By `Arnaud Joly`_ - - The ``img_to_graph`` and ``grid_tograph`` functions in - :mod:`sklearn.feature_extraction.image` now return ``np.ndarray`` - instead of ``np.matrix`` when ``return_as=np.ndarray``. See the - Notes section for more information on compatibility. - - - Changed the internal storage of decision trees to use a struct array. - This fixed some small bugs, while improving code and providing a small - speed gain. By `Joel Nothman`_. - - - Reduce memory usage and overhead when fitting and predicting with forests - of randomized trees in parallel with ``n_jobs != 1`` by leveraging new - threading backend of joblib 0.8 and releasing the GIL in the tree fitting - Cython code. By `Olivier Grisel`_ and `Gilles Louppe`_. - - - Speed improvement of the :mod:`sklearn.ensemble.gradient_boosting` module. - By `Gilles Louppe`_ and `Peter Prettenhofer`_. - - - Various enhancements to the :mod:`sklearn.ensemble.gradient_boosting` - module: a ``warm_start`` argument to fit additional trees, - a ``max_leaf_nodes`` argument to fit GBM style trees, - a ``monitor`` fit argument to inspect the estimator during training, and - refactoring of the verbose code. By `Peter Prettenhofer`_. - - - Faster :class:`sklearn.ensemble.ExtraTrees` by caching feature values. - By `Arnaud Joly`_. - - - Faster depth-based tree building algorithm such as decision tree, - random forest, extra trees or gradient tree boosting (with depth based - growing strategy) by avoiding trying to split on found constant features - in the sample subset. By `Arnaud Joly`_. - - - Add ``min_weight_fraction_leaf`` pre-pruning parameter to tree-based - methods: the minimum weighted fraction of the input samples required to be - at a leaf node. By `Noel Dawe`_. - - - Added :func:`metrics.pairwise_distances_argmin_min`, by Philippe Gervais. - - - Added predict method to :class:`cluster.AffinityPropagation` and - :class:`cluster.MeanShift`, by `Mathieu Blondel`_. - - - Vector and matrix multiplications have been optimised throughout the - library by `Denis Engemann`_, and `Alexandre Gramfort`_. - In particular, they should take less memory with older NumPy versions - (prior to 1.7.2). - - - Precision-recall and ROC examples now use train_test_split, and have more - explanation of why these metrics are useful. By `Kyle Kastner`_ - - - The training algorithm for :class:`decomposition.NMF` is faster for - sparse matrices and has much lower memory complexity, meaning it will - scale up gracefully to large datasets. By `Lars Buitinck`_. - - - Added svd_method option with default value to "randomized" to - :class:`decomposition.FactorAnalysis` to save memory and - significantly speedup computation by `Denis Engemann`_, and - `Alexandre Gramfort`_. - - - Changed :class:`cross_validation.StratifiedKFold` to try and - preserve as much of the original ordering of samples as possible so as - not to hide overfitting on datasets with a non-negligible level of - samples dependency. - By `Daniel Nouri`_ and `Olivier Grisel`_. - - - Add multi-output support to :class:`gaussian_process.GaussianProcess` - by John Novak. - - - Support for precomputed distance matrices in nearest neighbor estimators - by `Robert Layton`_ and `Joel Nothman`_. - - - Norm computations optimized for NumPy 1.6 and later versions by - `Lars Buitinck`_. In particular, the k-means algorithm no longer - needs a temporary data structure the size of its input. - - - :class:`dummy.DummyClassifier` can now be used to predict a constant - output value. By `Manoj Kumar`_. - - - :class:`dummy.DummyRegressor` has now a strategy parameter which allows - to predict the mean, the median of the training set or a constant - output value. By :user:`Maheshakya Wijewardena `. - - - Multi-label classification output in multilabel indicator format - is now supported by :func:`metrics.roc_auc_score` and - :func:`metrics.average_precision_score` by `Arnaud Joly`_. - - - Significant performance improvements (more than 100x speedup for - large problems) in :class:`isotonic.IsotonicRegression` by - `Andrew Tulloch`_. - - - Speed and memory usage improvements to the SGD algorithm for linear - models: it now uses threads, not separate processes, when ``n_jobs>1``. - By `Lars Buitinck`_. - - - Grid search and cross validation allow NaNs in the input arrays so that - preprocessors such as :class:`preprocessing.Imputer - ` can be trained within the cross validation loop, - avoiding potentially skewed results. - - - Ridge regression can now deal with sample weights in feature space - (only sample space until then). By :user:`Michael Eickenberg `. - Both solutions are provided by the Cholesky solver. - - - Several classification and regression metrics now support weighted - samples with the new ``sample_weight`` argument: - :func:`metrics.accuracy_score`, - :func:`metrics.zero_one_loss`, - :func:`metrics.precision_score`, - :func:`metrics.average_precision_score`, - :func:`metrics.f1_score`, - :func:`metrics.fbeta_score`, - :func:`metrics.recall_score`, - :func:`metrics.roc_auc_score`, - :func:`metrics.explained_variance_score`, - :func:`metrics.mean_squared_error`, - :func:`metrics.mean_absolute_error`, - :func:`metrics.r2_score`. - By `Noel Dawe`_. - - - Speed up of the sample generator - :func:`datasets.make_multilabel_classification`. By `Joel Nothman`_. +- The ``img_to_graph`` and ``grid_tograph`` functions in + :mod:`sklearn.feature_extraction.image` now return ``np.ndarray`` + instead of ``np.matrix`` when ``return_as=np.ndarray``. See the + Notes section for more information on compatibility. + +- Changed the internal storage of decision trees to use a struct array. + This fixed some small bugs, while improving code and providing a small + speed gain. By `Joel Nothman`_. + +- Reduce memory usage and overhead when fitting and predicting with forests + of randomized trees in parallel with ``n_jobs != 1`` by leveraging new + threading backend of joblib 0.8 and releasing the GIL in the tree fitting + Cython code. By `Olivier Grisel`_ and `Gilles Louppe`_. + +- Speed improvement of the :mod:`sklearn.ensemble.gradient_boosting` module. + By `Gilles Louppe`_ and `Peter Prettenhofer`_. + +- Various enhancements to the :mod:`sklearn.ensemble.gradient_boosting` + module: a ``warm_start`` argument to fit additional trees, + a ``max_leaf_nodes`` argument to fit GBM style trees, + a ``monitor`` fit argument to inspect the estimator during training, and + refactoring of the verbose code. By `Peter Prettenhofer`_. + +- Faster :class:`sklearn.ensemble.ExtraTrees` by caching feature values. + By `Arnaud Joly`_. + +- Faster depth-based tree building algorithm such as decision tree, + random forest, extra trees or gradient tree boosting (with depth based + growing strategy) by avoiding trying to split on found constant features + in the sample subset. By `Arnaud Joly`_. + +- Add ``min_weight_fraction_leaf`` pre-pruning parameter to tree-based + methods: the minimum weighted fraction of the input samples required to be + at a leaf node. By `Noel Dawe`_. + +- Added :func:`metrics.pairwise_distances_argmin_min`, by Philippe Gervais. + +- Added predict method to :class:`cluster.AffinityPropagation` and + :class:`cluster.MeanShift`, by `Mathieu Blondel`_. + +- Vector and matrix multiplications have been optimised throughout the + library by `Denis Engemann`_, and `Alexandre Gramfort`_. + In particular, they should take less memory with older NumPy versions + (prior to 1.7.2). + +- Precision-recall and ROC examples now use train_test_split, and have more + explanation of why these metrics are useful. By `Kyle Kastner`_ + +- The training algorithm for :class:`decomposition.NMF` is faster for + sparse matrices and has much lower memory complexity, meaning it will + scale up gracefully to large datasets. By `Lars Buitinck`_. + +- Added svd_method option with default value to "randomized" to + :class:`decomposition.FactorAnalysis` to save memory and + significantly speedup computation by `Denis Engemann`_, and + `Alexandre Gramfort`_. + +- Changed :class:`cross_validation.StratifiedKFold` to try and + preserve as much of the original ordering of samples as possible so as + not to hide overfitting on datasets with a non-negligible level of + samples dependency. + By `Daniel Nouri`_ and `Olivier Grisel`_. + +- Add multi-output support to :class:`gaussian_process.GaussianProcess` + by John Novak. + +- Support for precomputed distance matrices in nearest neighbor estimators + by `Robert Layton`_ and `Joel Nothman`_. + +- Norm computations optimized for NumPy 1.6 and later versions by + `Lars Buitinck`_. In particular, the k-means algorithm no longer + needs a temporary data structure the size of its input. + +- :class:`dummy.DummyClassifier` can now be used to predict a constant + output value. By `Manoj Kumar`_. + +- :class:`dummy.DummyRegressor` has now a strategy parameter which allows + to predict the mean, the median of the training set or a constant + output value. By :user:`Maheshakya Wijewardena `. + +- Multi-label classification output in multilabel indicator format + is now supported by :func:`metrics.roc_auc_score` and + :func:`metrics.average_precision_score` by `Arnaud Joly`_. + +- Significant performance improvements (more than 100x speedup for + large problems) in :class:`isotonic.IsotonicRegression` by + `Andrew Tulloch`_. + +- Speed and memory usage improvements to the SGD algorithm for linear + models: it now uses threads, not separate processes, when ``n_jobs>1``. + By `Lars Buitinck`_. + +- Grid search and cross validation allow NaNs in the input arrays so that + preprocessors such as :class:`preprocessing.Imputer + ` can be trained within the cross validation loop, + avoiding potentially skewed results. + +- Ridge regression can now deal with sample weights in feature space + (only sample space until then). By :user:`Michael Eickenberg `. + Both solutions are provided by the Cholesky solver. + +- Several classification and regression metrics now support weighted + samples with the new ``sample_weight`` argument: + :func:`metrics.accuracy_score`, + :func:`metrics.zero_one_loss`, + :func:`metrics.precision_score`, + :func:`metrics.average_precision_score`, + :func:`metrics.f1_score`, + :func:`metrics.fbeta_score`, + :func:`metrics.recall_score`, + :func:`metrics.roc_auc_score`, + :func:`metrics.explained_variance_score`, + :func:`metrics.mean_squared_error`, + :func:`metrics.mean_absolute_error`, + :func:`metrics.r2_score`. + By `Noel Dawe`_. + +- Speed up of the sample generator + :func:`datasets.make_multilabel_classification`. By `Joel Nothman`_. Documentation improvements ........................... - - The :ref:`Working With Text Data ` tutorial - has now been worked in to the main documentation's tutorial section. - Includes exercises and skeletons for tutorial presentation. - Original tutorial created by several authors including - `Olivier Grisel`_, Lars Buitinck and many others. - Tutorial integration into the scikit-learn documentation - by `Jaques Grobler`_ - - - Added :ref:`Computational Performance ` - documentation. Discussion and examples of prediction latency / throughput - and different factors that have influence over speed. Additional tips for - building faster models and choosing a relevant compromise between speed - and predictive power. - By :user:`Eustache Diemert `. +- The :ref:`Working With Text Data ` tutorial + has now been worked in to the main documentation's tutorial section. + Includes exercises and skeletons for tutorial presentation. + Original tutorial created by several authors including + `Olivier Grisel`_, Lars Buitinck and many others. + Tutorial integration into the scikit-learn documentation + by `Jaques Grobler`_ + +- Added :ref:`Computational Performance ` + documentation. Discussion and examples of prediction latency / throughput + and different factors that have influence over speed. Additional tips for + building faster models and choosing a relevant compromise between speed + and predictive power. + By :user:`Eustache Diemert `. Bug fixes ......... - - Fixed bug in :class:`decomposition.MiniBatchDictionaryLearning` : - ``partial_fit`` was not working properly. +- Fixed bug in :class:`decomposition.MiniBatchDictionaryLearning` : + ``partial_fit`` was not working properly. - - Fixed bug in :class:`linear_model.stochastic_gradient` : - ``l1_ratio`` was used as ``(1.0 - l1_ratio)`` . +- Fixed bug in :class:`linear_model.stochastic_gradient` : + ``l1_ratio`` was used as ``(1.0 - l1_ratio)`` . - - Fixed bug in :class:`multiclass.OneVsOneClassifier` with string - labels +- Fixed bug in :class:`multiclass.OneVsOneClassifier` with string + labels - - Fixed a bug in :class:`LassoCV ` and - :class:`ElasticNetCV `: they would not - pre-compute the Gram matrix with ``precompute=True`` or - ``precompute="auto"`` and ``n_samples > n_features``. By `Manoj Kumar`_. +- Fixed a bug in :class:`LassoCV ` and + :class:`ElasticNetCV `: they would not + pre-compute the Gram matrix with ``precompute=True`` or + ``precompute="auto"`` and ``n_samples > n_features``. By `Manoj Kumar`_. - - Fixed incorrect estimation of the degrees of freedom in - :func:`feature_selection.f_regression` when variates are not centered. - By :user:`Virgile Fritsch `. +- Fixed incorrect estimation of the degrees of freedom in + :func:`feature_selection.f_regression` when variates are not centered. + By :user:`Virgile Fritsch `. - - Fixed a race condition in parallel processing with - ``pre_dispatch != "all"`` (for instance, in ``cross_val_score``). - By `Olivier Grisel`_. +- Fixed a race condition in parallel processing with + ``pre_dispatch != "all"`` (for instance, in ``cross_val_score``). + By `Olivier Grisel`_. - - Raise error in :class:`cluster.FeatureAgglomeration` and - :class:`cluster.WardAgglomeration` when no samples are given, - rather than returning meaningless clustering. +- Raise error in :class:`cluster.FeatureAgglomeration` and + :class:`cluster.WardAgglomeration` when no samples are given, + rather than returning meaningless clustering. - - Fixed bug in :class:`gradient_boosting.GradientBoostingRegressor` with - ``loss='huber'``: ``gamma`` might have not been initialized. +- Fixed bug in :class:`gradient_boosting.GradientBoostingRegressor` with + ``loss='huber'``: ``gamma`` might have not been initialized. - - Fixed feature importances as computed with a forest of randomized trees - when fit with ``sample_weight != None`` and/or with ``bootstrap=True``. - By `Gilles Louppe`_. +- Fixed feature importances as computed with a forest of randomized trees + when fit with ``sample_weight != None`` and/or with ``bootstrap=True``. + By `Gilles Louppe`_. API changes summary ------------------- - - :mod:`sklearn.hmm` is deprecated. Its removal is planned - for the 0.17 release. - - - Use of :class:`covariance.EllipticEnvelop` has now been removed after - deprecation. - Please use :class:`covariance.EllipticEnvelope` instead. - - - :class:`cluster.Ward` is deprecated. Use - :class:`cluster.AgglomerativeClustering` instead. - - - :class:`cluster.WardClustering` is deprecated. Use - - :class:`cluster.AgglomerativeClustering` instead. - - - :class:`cross_validation.Bootstrap` is deprecated. - :class:`cross_validation.KFold` or - :class:`cross_validation.ShuffleSplit` are recommended instead. - - - Direct support for the sequence of sequences (or list of lists) multilabel - format is deprecated. To convert to and from the supported binary - indicator matrix format, use - :class:`MultiLabelBinarizer `. - By `Joel Nothman`_. - - - Add score method to :class:`PCA ` following the model of - probabilistic PCA and deprecate - :class:`ProbabilisticPCA ` model whose - score implementation is not correct. The computation now also exploits the - matrix inversion lemma for faster computation. By `Alexandre Gramfort`_. - - - The score method of :class:`FactorAnalysis ` - now returns the average log-likelihood of the samples. Use score_samples - to get log-likelihood of each sample. By `Alexandre Gramfort`_. - - - Generating boolean masks (the setting ``indices=False``) - from cross-validation generators is deprecated. - Support for masks will be removed in 0.17. - The generators have produced arrays of indices by default since 0.10. - By `Joel Nothman`_. - - - 1-d arrays containing strings with ``dtype=object`` (as used in Pandas) - are now considered valid classification targets. This fixes a regression - from version 0.13 in some classifiers. By `Joel Nothman`_. - - - Fix wrong ``explained_variance_ratio_`` attribute in - :class:`RandomizedPCA `. - By `Alexandre Gramfort`_. - - - Fit alphas for each ``l1_ratio`` instead of ``mean_l1_ratio`` in - :class:`linear_model.ElasticNetCV` and :class:`linear_model.LassoCV`. - This changes the shape of ``alphas_`` from ``(n_alphas,)`` to - ``(n_l1_ratio, n_alphas)`` if the ``l1_ratio`` provided is a 1-D array like - object of length greater than one. - By `Manoj Kumar`_. - - - Fix :class:`linear_model.ElasticNetCV` and :class:`linear_model.LassoCV` - when fitting intercept and input data is sparse. The automatic grid - of alphas was not computed correctly and the scaling with normalize - was wrong. By `Manoj Kumar`_. - - - Fix wrong maximal number of features drawn (``max_features``) at each split - for decision trees, random forests and gradient tree boosting. - Previously, the count for the number of drawn features started only after - one non constant features in the split. This bug fix will affect - computational and generalization performance of those algorithms in the - presence of constant features. To get back previous generalization - performance, you should modify the value of ``max_features``. - By `Arnaud Joly`_. - - - Fix wrong maximal number of features drawn (``max_features``) at each split - for :class:`ensemble.ExtraTreesClassifier` and - :class:`ensemble.ExtraTreesRegressor`. Previously, only non constant - features in the split was counted as drawn. Now constant features are - counted as drawn. Furthermore at least one feature must be non constant - in order to make a valid split. This bug fix will affect - computational and generalization performance of extra trees in the - presence of constant features. To get back previous generalization - performance, you should modify the value of ``max_features``. - By `Arnaud Joly`_. - - - Fix :func:`utils.compute_class_weight` when ``class_weight=="auto"``. - Previously it was broken for input of non-integer ``dtype`` and the - weighted array that was returned was wrong. By `Manoj Kumar`_. - - - Fix :class:`cross_validation.Bootstrap` to return ``ValueError`` - when ``n_train + n_test > n``. By :user:`Ronald Phlypo `. +- :mod:`sklearn.hmm` is deprecated. Its removal is planned + for the 0.17 release. + +- Use of :class:`covariance.EllipticEnvelop` has now been removed after + deprecation. + Please use :class:`covariance.EllipticEnvelope` instead. + +- :class:`cluster.Ward` is deprecated. Use + :class:`cluster.AgglomerativeClustering` instead. + +- :class:`cluster.WardClustering` is deprecated. Use +- :class:`cluster.AgglomerativeClustering` instead. + +- :class:`cross_validation.Bootstrap` is deprecated. + :class:`cross_validation.KFold` or + :class:`cross_validation.ShuffleSplit` are recommended instead. + +- Direct support for the sequence of sequences (or list of lists) multilabel + format is deprecated. To convert to and from the supported binary + indicator matrix format, use + :class:`MultiLabelBinarizer `. + By `Joel Nothman`_. + +- Add score method to :class:`PCA ` following the model of + probabilistic PCA and deprecate + :class:`ProbabilisticPCA ` model whose + score implementation is not correct. The computation now also exploits the + matrix inversion lemma for faster computation. By `Alexandre Gramfort`_. + +- The score method of :class:`FactorAnalysis ` + now returns the average log-likelihood of the samples. Use score_samples + to get log-likelihood of each sample. By `Alexandre Gramfort`_. + +- Generating boolean masks (the setting ``indices=False``) + from cross-validation generators is deprecated. + Support for masks will be removed in 0.17. + The generators have produced arrays of indices by default since 0.10. + By `Joel Nothman`_. + +- 1-d arrays containing strings with ``dtype=object`` (as used in Pandas) + are now considered valid classification targets. This fixes a regression + from version 0.13 in some classifiers. By `Joel Nothman`_. + +- Fix wrong ``explained_variance_ratio_`` attribute in + :class:`RandomizedPCA `. + By `Alexandre Gramfort`_. + +- Fit alphas for each ``l1_ratio`` instead of ``mean_l1_ratio`` in + :class:`linear_model.ElasticNetCV` and :class:`linear_model.LassoCV`. + This changes the shape of ``alphas_`` from ``(n_alphas,)`` to + ``(n_l1_ratio, n_alphas)`` if the ``l1_ratio`` provided is a 1-D array like + object of length greater than one. + By `Manoj Kumar`_. + +- Fix :class:`linear_model.ElasticNetCV` and :class:`linear_model.LassoCV` + when fitting intercept and input data is sparse. The automatic grid + of alphas was not computed correctly and the scaling with normalize + was wrong. By `Manoj Kumar`_. + +- Fix wrong maximal number of features drawn (``max_features``) at each split + for decision trees, random forests and gradient tree boosting. + Previously, the count for the number of drawn features started only after + one non constant features in the split. This bug fix will affect + computational and generalization performance of those algorithms in the + presence of constant features. To get back previous generalization + performance, you should modify the value of ``max_features``. + By `Arnaud Joly`_. + +- Fix wrong maximal number of features drawn (``max_features``) at each split + for :class:`ensemble.ExtraTreesClassifier` and + :class:`ensemble.ExtraTreesRegressor`. Previously, only non constant + features in the split was counted as drawn. Now constant features are + counted as drawn. Furthermore at least one feature must be non constant + in order to make a valid split. This bug fix will affect + computational and generalization performance of extra trees in the + presence of constant features. To get back previous generalization + performance, you should modify the value of ``max_features``. + By `Arnaud Joly`_. + +- Fix :func:`utils.compute_class_weight` when ``class_weight=="auto"``. + Previously it was broken for input of non-integer ``dtype`` and the + weighted array that was returned was wrong. By `Manoj Kumar`_. + +- Fix :class:`cross_validation.Bootstrap` to return ``ValueError`` + when ``n_train + n_test > n``. By :user:`Ronald Phlypo `. People @@ -3322,287 +3442,287 @@ Version 0.14 Changelog --------- - - Missing values with sparse and dense matrices can be imputed with the - transformer :class:`preprocessing.Imputer` by `Nicolas Trésegnie`_. - - - The core implementation of decisions trees has been rewritten from - scratch, allowing for faster tree induction and lower memory - consumption in all tree-based estimators. By `Gilles Louppe`_. - - - Added :class:`ensemble.AdaBoostClassifier` and - :class:`ensemble.AdaBoostRegressor`, by `Noel Dawe`_ and - `Gilles Louppe`_. See the :ref:`AdaBoost ` section of the user - guide for details and examples. - - - Added :class:`grid_search.RandomizedSearchCV` and - :class:`grid_search.ParameterSampler` for randomized hyperparameter - optimization. By `Andreas Müller`_. - - - Added :ref:`biclustering ` algorithms - (:class:`sklearn.cluster.bicluster.SpectralCoclustering` and - :class:`sklearn.cluster.bicluster.SpectralBiclustering`), data - generation methods (:func:`sklearn.datasets.make_biclusters` and - :func:`sklearn.datasets.make_checkerboard`), and scoring metrics - (:func:`sklearn.metrics.consensus_score`). By `Kemal Eren`_. - - - Added :ref:`Restricted Boltzmann Machines` - (:class:`neural_network.BernoulliRBM`). By `Yann Dauphin`_. - - - Python 3 support by :user:`Justin Vincent `, `Lars Buitinck`_, - :user:`Subhodeep Moitra ` and `Olivier Grisel`_. All tests now pass under - Python 3.3. - - - Ability to pass one penalty (alpha value) per target in - :class:`linear_model.Ridge`, by @eickenberg and `Mathieu Blondel`_. - - - Fixed :mod:`sklearn.linear_model.stochastic_gradient.py` L2 regularization - issue (minor practical significance). - By :user:`Norbert Crombach ` and `Mathieu Blondel`_ . - - - Added an interactive version of `Andreas Müller`_'s - `Machine Learning Cheat Sheet (for scikit-learn) - `_ - to the documentation. See :ref:`Choosing the right estimator `. - By `Jaques Grobler`_. - - - :class:`grid_search.GridSearchCV` and - :func:`cross_validation.cross_val_score` now support the use of advanced - scoring function such as area under the ROC curve and f-beta scores. - See :ref:`scoring_parameter` for details. By `Andreas Müller`_ - and `Lars Buitinck`_. - Passing a function from :mod:`sklearn.metrics` as ``score_func`` is - deprecated. - - - Multi-label classification output is now supported by - :func:`metrics.accuracy_score`, :func:`metrics.zero_one_loss`, - :func:`metrics.f1_score`, :func:`metrics.fbeta_score`, - :func:`metrics.classification_report`, - :func:`metrics.precision_score` and :func:`metrics.recall_score` - by `Arnaud Joly`_. - - - Two new metrics :func:`metrics.hamming_loss` and - :func:`metrics.jaccard_similarity_score` - are added with multi-label support by `Arnaud Joly`_. - - - Speed and memory usage improvements in - :class:`feature_extraction.text.CountVectorizer` and - :class:`feature_extraction.text.TfidfVectorizer`, - by Jochen Wersdörfer and Roman Sinayev. - - - The ``min_df`` parameter in - :class:`feature_extraction.text.CountVectorizer` and - :class:`feature_extraction.text.TfidfVectorizer`, which used to be 2, - has been reset to 1 to avoid unpleasant surprises (empty vocabularies) - for novice users who try it out on tiny document collections. - A value of at least 2 is still recommended for practical use. - - - :class:`svm.LinearSVC`, :class:`linear_model.SGDClassifier` and - :class:`linear_model.SGDRegressor` now have a ``sparsify`` method that - converts their ``coef_`` into a sparse matrix, meaning stored models - trained using these estimators can be made much more compact. - - - :class:`linear_model.SGDClassifier` now produces multiclass probability - estimates when trained under log loss or modified Huber loss. - - - Hyperlinks to documentation in example code on the website by - :user:`Martin Luessi `. - - - Fixed bug in :class:`preprocessing.MinMaxScaler` causing incorrect scaling - of the features for non-default ``feature_range`` settings. By `Andreas - Müller`_. - - - ``max_features`` in :class:`tree.DecisionTreeClassifier`, - :class:`tree.DecisionTreeRegressor` and all derived ensemble estimators - now supports percentage values. By `Gilles Louppe`_. - - - Performance improvements in :class:`isotonic.IsotonicRegression` by - `Nelle Varoquaux`_. - - - :func:`metrics.accuracy_score` has an option normalize to return - the fraction or the number of correctly classified sample - by `Arnaud Joly`_. - - - Added :func:`metrics.log_loss` that computes log loss, aka cross-entropy - loss. By Jochen Wersdörfer and `Lars Buitinck`_. +- Missing values with sparse and dense matrices can be imputed with the + transformer :class:`preprocessing.Imputer` by `Nicolas Trésegnie`_. + +- The core implementation of decisions trees has been rewritten from + scratch, allowing for faster tree induction and lower memory + consumption in all tree-based estimators. By `Gilles Louppe`_. + +- Added :class:`ensemble.AdaBoostClassifier` and + :class:`ensemble.AdaBoostRegressor`, by `Noel Dawe`_ and + `Gilles Louppe`_. See the :ref:`AdaBoost ` section of the user + guide for details and examples. + +- Added :class:`grid_search.RandomizedSearchCV` and + :class:`grid_search.ParameterSampler` for randomized hyperparameter + optimization. By `Andreas Müller`_. + +- Added :ref:`biclustering ` algorithms + (:class:`sklearn.cluster.bicluster.SpectralCoclustering` and + :class:`sklearn.cluster.bicluster.SpectralBiclustering`), data + generation methods (:func:`sklearn.datasets.make_biclusters` and + :func:`sklearn.datasets.make_checkerboard`), and scoring metrics + (:func:`sklearn.metrics.consensus_score`). By `Kemal Eren`_. + +- Added :ref:`Restricted Boltzmann Machines` + (:class:`neural_network.BernoulliRBM`). By `Yann Dauphin`_. + +- Python 3 support by :user:`Justin Vincent `, `Lars Buitinck`_, + :user:`Subhodeep Moitra ` and `Olivier Grisel`_. All tests now pass under + Python 3.3. + +- Ability to pass one penalty (alpha value) per target in + :class:`linear_model.Ridge`, by @eickenberg and `Mathieu Blondel`_. + +- Fixed :mod:`sklearn.linear_model.stochastic_gradient.py` L2 regularization + issue (minor practical significance). + By :user:`Norbert Crombach ` and `Mathieu Blondel`_ . + +- Added an interactive version of `Andreas Müller`_'s + `Machine Learning Cheat Sheet (for scikit-learn) + `_ + to the documentation. See :ref:`Choosing the right estimator `. + By `Jaques Grobler`_. + +- :class:`grid_search.GridSearchCV` and + :func:`cross_validation.cross_val_score` now support the use of advanced + scoring function such as area under the ROC curve and f-beta scores. + See :ref:`scoring_parameter` for details. By `Andreas Müller`_ + and `Lars Buitinck`_. + Passing a function from :mod:`sklearn.metrics` as ``score_func`` is + deprecated. + +- Multi-label classification output is now supported by + :func:`metrics.accuracy_score`, :func:`metrics.zero_one_loss`, + :func:`metrics.f1_score`, :func:`metrics.fbeta_score`, + :func:`metrics.classification_report`, + :func:`metrics.precision_score` and :func:`metrics.recall_score` + by `Arnaud Joly`_. + +- Two new metrics :func:`metrics.hamming_loss` and + :func:`metrics.jaccard_similarity_score` + are added with multi-label support by `Arnaud Joly`_. + +- Speed and memory usage improvements in + :class:`feature_extraction.text.CountVectorizer` and + :class:`feature_extraction.text.TfidfVectorizer`, + by Jochen Wersdörfer and Roman Sinayev. + +- The ``min_df`` parameter in + :class:`feature_extraction.text.CountVectorizer` and + :class:`feature_extraction.text.TfidfVectorizer`, which used to be 2, + has been reset to 1 to avoid unpleasant surprises (empty vocabularies) + for novice users who try it out on tiny document collections. + A value of at least 2 is still recommended for practical use. + +- :class:`svm.LinearSVC`, :class:`linear_model.SGDClassifier` and + :class:`linear_model.SGDRegressor` now have a ``sparsify`` method that + converts their ``coef_`` into a sparse matrix, meaning stored models + trained using these estimators can be made much more compact. + +- :class:`linear_model.SGDClassifier` now produces multiclass probability + estimates when trained under log loss or modified Huber loss. + +- Hyperlinks to documentation in example code on the website by + :user:`Martin Luessi `. + +- Fixed bug in :class:`preprocessing.MinMaxScaler` causing incorrect scaling + of the features for non-default ``feature_range`` settings. By `Andreas + Müller`_. + +- ``max_features`` in :class:`tree.DecisionTreeClassifier`, + :class:`tree.DecisionTreeRegressor` and all derived ensemble estimators + now supports percentage values. By `Gilles Louppe`_. + +- Performance improvements in :class:`isotonic.IsotonicRegression` by + `Nelle Varoquaux`_. + +- :func:`metrics.accuracy_score` has an option normalize to return + the fraction or the number of correctly classified sample + by `Arnaud Joly`_. + +- Added :func:`metrics.log_loss` that computes log loss, aka cross-entropy + loss. By Jochen Wersdörfer and `Lars Buitinck`_. - - A bug that caused :class:`ensemble.AdaBoostClassifier`'s to output - incorrect probabilities has been fixed. - - - Feature selectors now share a mixin providing consistent ``transform``, - ``inverse_transform`` and ``get_support`` methods. By `Joel Nothman`_. - - - A fitted :class:`grid_search.GridSearchCV` or - :class:`grid_search.RandomizedSearchCV` can now generally be pickled. - By `Joel Nothman`_. - - - Refactored and vectorized implementation of :func:`metrics.roc_curve` - and :func:`metrics.precision_recall_curve`. By `Joel Nothman`_. +- A bug that caused :class:`ensemble.AdaBoostClassifier`'s to output + incorrect probabilities has been fixed. + +- Feature selectors now share a mixin providing consistent ``transform``, + ``inverse_transform`` and ``get_support`` methods. By `Joel Nothman`_. + +- A fitted :class:`grid_search.GridSearchCV` or + :class:`grid_search.RandomizedSearchCV` can now generally be pickled. + By `Joel Nothman`_. + +- Refactored and vectorized implementation of :func:`metrics.roc_curve` + and :func:`metrics.precision_recall_curve`. By `Joel Nothman`_. - - The new estimator :class:`sklearn.decomposition.TruncatedSVD` - performs dimensionality reduction using SVD on sparse matrices, - and can be used for latent semantic analysis (LSA). - By `Lars Buitinck`_. +- The new estimator :class:`sklearn.decomposition.TruncatedSVD` + performs dimensionality reduction using SVD on sparse matrices, + and can be used for latent semantic analysis (LSA). + By `Lars Buitinck`_. - - Added self-contained example of out-of-core learning on text data - :ref:`sphx_glr_auto_examples_applications_plot_out_of_core_classification.py`. - By :user:`Eustache Diemert `. +- Added self-contained example of out-of-core learning on text data + :ref:`sphx_glr_auto_examples_applications_plot_out_of_core_classification.py`. + By :user:`Eustache Diemert `. - - The default number of components for - :class:`sklearn.decomposition.RandomizedPCA` is now correctly documented - to be ``n_features``. This was the default behavior, so programs using it - will continue to work as they did. +- The default number of components for + :class:`sklearn.decomposition.RandomizedPCA` is now correctly documented + to be ``n_features``. This was the default behavior, so programs using it + will continue to work as they did. - - :class:`sklearn.cluster.KMeans` now fits several orders of magnitude - faster on sparse data (the speedup depends on the sparsity). By - `Lars Buitinck`_. - - - Reduce memory footprint of FastICA by `Denis Engemann`_ and - `Alexandre Gramfort`_. +- :class:`sklearn.cluster.KMeans` now fits several orders of magnitude + faster on sparse data (the speedup depends on the sparsity). By + `Lars Buitinck`_. + +- Reduce memory footprint of FastICA by `Denis Engemann`_ and + `Alexandre Gramfort`_. - - Verbose output in :mod:`sklearn.ensemble.gradient_boosting` now uses - a column format and prints progress in decreasing frequency. - It also shows the remaining time. By `Peter Prettenhofer`_. +- Verbose output in :mod:`sklearn.ensemble.gradient_boosting` now uses + a column format and prints progress in decreasing frequency. + It also shows the remaining time. By `Peter Prettenhofer`_. - - :mod:`sklearn.ensemble.gradient_boosting` provides out-of-bag improvement - :attr:`~sklearn.ensemble.GradientBoostingRegressor.oob_improvement_` - rather than the OOB score for model selection. An example that shows - how to use OOB estimates to select the number of trees was added. - By `Peter Prettenhofer`_. +- :mod:`sklearn.ensemble.gradient_boosting` provides out-of-bag improvement + :attr:`~sklearn.ensemble.GradientBoostingRegressor.oob_improvement_` + rather than the OOB score for model selection. An example that shows + how to use OOB estimates to select the number of trees was added. + By `Peter Prettenhofer`_. - - Most metrics now support string labels for multiclass classification - by `Arnaud Joly`_ and `Lars Buitinck`_. +- Most metrics now support string labels for multiclass classification + by `Arnaud Joly`_ and `Lars Buitinck`_. - - New OrthogonalMatchingPursuitCV class by `Alexandre Gramfort`_ - and `Vlad Niculae`_. +- New OrthogonalMatchingPursuitCV class by `Alexandre Gramfort`_ + and `Vlad Niculae`_. - - Fixed a bug in :class:`sklearn.covariance.GraphLassoCV`: the - 'alphas' parameter now works as expected when given a list of - values. By Philippe Gervais. +- Fixed a bug in :class:`sklearn.covariance.GraphLassoCV`: the + 'alphas' parameter now works as expected when given a list of + values. By Philippe Gervais. - - Fixed an important bug in :class:`sklearn.covariance.GraphLassoCV` - that prevented all folds provided by a CV object to be used (only - the first 3 were used). When providing a CV object, execution - time may thus increase significantly compared to the previous - version (bug results are correct now). By Philippe Gervais. +- Fixed an important bug in :class:`sklearn.covariance.GraphLassoCV` + that prevented all folds provided by a CV object to be used (only + the first 3 were used). When providing a CV object, execution + time may thus increase significantly compared to the previous + version (bug results are correct now). By Philippe Gervais. - - :class:`cross_validation.cross_val_score` and the :mod:`grid_search` - module is now tested with multi-output data by `Arnaud Joly`_. +- :class:`cross_validation.cross_val_score` and the :mod:`grid_search` + module is now tested with multi-output data by `Arnaud Joly`_. - - :func:`datasets.make_multilabel_classification` can now return - the output in label indicator multilabel format by `Arnaud Joly`_. +- :func:`datasets.make_multilabel_classification` can now return + the output in label indicator multilabel format by `Arnaud Joly`_. - - K-nearest neighbors, :class:`neighbors.KNeighborsRegressor` - and :class:`neighbors.RadiusNeighborsRegressor`, - and radius neighbors, :class:`neighbors.RadiusNeighborsRegressor` and - :class:`neighbors.RadiusNeighborsClassifier` support multioutput data - by `Arnaud Joly`_. +- K-nearest neighbors, :class:`neighbors.KNeighborsRegressor` + and :class:`neighbors.RadiusNeighborsRegressor`, + and radius neighbors, :class:`neighbors.RadiusNeighborsRegressor` and + :class:`neighbors.RadiusNeighborsClassifier` support multioutput data + by `Arnaud Joly`_. - - Random state in LibSVM-based estimators (:class:`svm.SVC`, :class:`NuSVC`, - :class:`OneClassSVM`, :class:`svm.SVR`, :class:`svm.NuSVR`) can now be - controlled. This is useful to ensure consistency in the probability - estimates for the classifiers trained with ``probability=True``. By - `Vlad Niculae`_. +- Random state in LibSVM-based estimators (:class:`svm.SVC`, :class:`NuSVC`, + :class:`OneClassSVM`, :class:`svm.SVR`, :class:`svm.NuSVR`) can now be + controlled. This is useful to ensure consistency in the probability + estimates for the classifiers trained with ``probability=True``. By + `Vlad Niculae`_. - - Out-of-core learning support for discrete naive Bayes classifiers - :class:`sklearn.naive_bayes.MultinomialNB` and - :class:`sklearn.naive_bayes.BernoulliNB` by adding the ``partial_fit`` - method by `Olivier Grisel`_. +- Out-of-core learning support for discrete naive Bayes classifiers + :class:`sklearn.naive_bayes.MultinomialNB` and + :class:`sklearn.naive_bayes.BernoulliNB` by adding the ``partial_fit`` + method by `Olivier Grisel`_. - - New website design and navigation by `Gilles Louppe`_, `Nelle Varoquaux`_, - Vincent Michel and `Andreas Müller`_. +- New website design and navigation by `Gilles Louppe`_, `Nelle Varoquaux`_, + Vincent Michel and `Andreas Müller`_. - - Improved documentation on :ref:`multi-class, multi-label and multi-output - classification ` by `Yannick Schwartz`_ and `Arnaud Joly`_. +- Improved documentation on :ref:`multi-class, multi-label and multi-output + classification ` by `Yannick Schwartz`_ and `Arnaud Joly`_. - - Better input and error handling in the :mod:`metrics` module by - `Arnaud Joly`_ and `Joel Nothman`_. +- Better input and error handling in the :mod:`metrics` module by + `Arnaud Joly`_ and `Joel Nothman`_. - - Speed optimization of the :mod:`hmm` module by :user:`Mikhail Korobov ` +- Speed optimization of the :mod:`hmm` module by :user:`Mikhail Korobov ` - - Significant speed improvements for :class:`sklearn.cluster.DBSCAN` - by `cleverless `_ +- Significant speed improvements for :class:`sklearn.cluster.DBSCAN` + by `cleverless `_ API changes summary ------------------- - - The :func:`auc_score` was renamed :func:`roc_auc_score`. +- The :func:`auc_score` was renamed :func:`roc_auc_score`. - - Testing scikit-learn with ``sklearn.test()`` is deprecated. Use - ``nosetests sklearn`` from the command line. +- Testing scikit-learn with ``sklearn.test()`` is deprecated. Use + ``nosetests sklearn`` from the command line. - - Feature importances in :class:`tree.DecisionTreeClassifier`, - :class:`tree.DecisionTreeRegressor` and all derived ensemble estimators - are now computed on the fly when accessing the ``feature_importances_`` - attribute. Setting ``compute_importances=True`` is no longer required. - By `Gilles Louppe`_. +- Feature importances in :class:`tree.DecisionTreeClassifier`, + :class:`tree.DecisionTreeRegressor` and all derived ensemble estimators + are now computed on the fly when accessing the ``feature_importances_`` + attribute. Setting ``compute_importances=True`` is no longer required. + By `Gilles Louppe`_. - - :class:`linear_model.lasso_path` and - :class:`linear_model.enet_path` can return its results in the same - format as that of :class:`linear_model.lars_path`. This is done by - setting the ``return_models`` parameter to ``False``. By - `Jaques Grobler`_ and `Alexandre Gramfort`_ +- :class:`linear_model.lasso_path` and + :class:`linear_model.enet_path` can return its results in the same + format as that of :class:`linear_model.lars_path`. This is done by + setting the ``return_models`` parameter to ``False``. By + `Jaques Grobler`_ and `Alexandre Gramfort`_ - - :class:`grid_search.IterGrid` was renamed to - :class:`grid_search.ParameterGrid`. +- :class:`grid_search.IterGrid` was renamed to + :class:`grid_search.ParameterGrid`. - - Fixed bug in :class:`KFold` causing imperfect class balance in some - cases. By `Alexandre Gramfort`_ and Tadej Janež. +- Fixed bug in :class:`KFold` causing imperfect class balance in some + cases. By `Alexandre Gramfort`_ and Tadej Janež. - - :class:`sklearn.neighbors.BallTree` has been refactored, and a - :class:`sklearn.neighbors.KDTree` has been - added which shares the same interface. The Ball Tree now works with - a wide variety of distance metrics. Both classes have many new - methods, including single-tree and dual-tree queries, breadth-first - and depth-first searching, and more advanced queries such as - kernel density estimation and 2-point correlation functions. - By `Jake Vanderplas`_ +- :class:`sklearn.neighbors.BallTree` has been refactored, and a + :class:`sklearn.neighbors.KDTree` has been + added which shares the same interface. The Ball Tree now works with + a wide variety of distance metrics. Both classes have many new + methods, including single-tree and dual-tree queries, breadth-first + and depth-first searching, and more advanced queries such as + kernel density estimation and 2-point correlation functions. + By `Jake Vanderplas`_ - - Support for scipy.spatial.cKDTree within neighbors queries has been - removed, and the functionality replaced with the new :class:`KDTree` - class. +- Support for scipy.spatial.cKDTree within neighbors queries has been + removed, and the functionality replaced with the new :class:`KDTree` + class. - - :class:`sklearn.neighbors.KernelDensity` has been added, which performs - efficient kernel density estimation with a variety of kernels. +- :class:`sklearn.neighbors.KernelDensity` has been added, which performs + efficient kernel density estimation with a variety of kernels. - - :class:`sklearn.decomposition.KernelPCA` now always returns output with - ``n_components`` components, unless the new parameter ``remove_zero_eig`` - is set to ``True``. This new behavior is consistent with the way - kernel PCA was always documented; previously, the removal of components - with zero eigenvalues was tacitly performed on all data. +- :class:`sklearn.decomposition.KernelPCA` now always returns output with + ``n_components`` components, unless the new parameter ``remove_zero_eig`` + is set to ``True``. This new behavior is consistent with the way + kernel PCA was always documented; previously, the removal of components + with zero eigenvalues was tacitly performed on all data. - - ``gcv_mode="auto"`` no longer tries to perform SVD on a densified - sparse matrix in :class:`sklearn.linear_model.RidgeCV`. +- ``gcv_mode="auto"`` no longer tries to perform SVD on a densified + sparse matrix in :class:`sklearn.linear_model.RidgeCV`. - - Sparse matrix support in :class:`sklearn.decomposition.RandomizedPCA` - is now deprecated in favor of the new ``TruncatedSVD``. +- Sparse matrix support in :class:`sklearn.decomposition.RandomizedPCA` + is now deprecated in favor of the new ``TruncatedSVD``. - - :class:`cross_validation.KFold` and - :class:`cross_validation.StratifiedKFold` now enforce `n_folds >= 2` - otherwise a ``ValueError`` is raised. By `Olivier Grisel`_. +- :class:`cross_validation.KFold` and + :class:`cross_validation.StratifiedKFold` now enforce `n_folds >= 2` + otherwise a ``ValueError`` is raised. By `Olivier Grisel`_. - - :func:`datasets.load_files`'s ``charset`` and ``charset_errors`` - parameters were renamed ``encoding`` and ``decode_errors``. +- :func:`datasets.load_files`'s ``charset`` and ``charset_errors`` + parameters were renamed ``encoding`` and ``decode_errors``. - - Attribute ``oob_score_`` in :class:`sklearn.ensemble.GradientBoostingRegressor` - and :class:`sklearn.ensemble.GradientBoostingClassifier` - is deprecated and has been replaced by ``oob_improvement_`` . +- Attribute ``oob_score_`` in :class:`sklearn.ensemble.GradientBoostingRegressor` + and :class:`sklearn.ensemble.GradientBoostingClassifier` + is deprecated and has been replaced by ``oob_improvement_`` . - - Attributes in OrthogonalMatchingPursuit have been deprecated - (copy_X, Gram, ...) and precompute_gram renamed precompute - for consistency. See #2224. +- Attributes in OrthogonalMatchingPursuit have been deprecated + (copy_X, Gram, ...) and precompute_gram renamed precompute + for consistency. See #2224. - - :class:`sklearn.preprocessing.StandardScaler` now converts integer input - to float, and raises a warning. Previously it rounded for dense integer - input. +- :class:`sklearn.preprocessing.StandardScaler` now converts integer input + to float, and raises a warning. Previously it rounded for dense integer + input. - - :class:`sklearn.multiclass.OneVsRestClassifier` now has a - ``decision_function`` method. This will return the distance of each - sample from the decision boundary for each class, as long as the - underlying estimators implement the ``decision_function`` method. - By `Kyle Kastner`_. +- :class:`sklearn.multiclass.OneVsRestClassifier` now has a + ``decision_function`` method. This will return the distance of each + sample from the decision boundary for each class, as long as the + underlying estimators implement the ``decision_function`` method. + By `Kyle Kastner`_. - - Better input validation, warning on unexpected shapes for y. +- Better input validation, warning on unexpected shapes for y. People ------ @@ -3709,21 +3829,21 @@ The 0.13.1 release only fixes some bugs and does not add any new functionality. Changelog --------- - - Fixed a testing error caused by the function :func:`cross_validation.train_test_split` being - interpreted as a test by `Yaroslav Halchenko`_. +- Fixed a testing error caused by the function :func:`cross_validation.train_test_split` being + interpreted as a test by `Yaroslav Halchenko`_. - - Fixed a bug in the reassignment of small clusters in the :class:`cluster.MiniBatchKMeans` - by `Gael Varoquaux`_. +- Fixed a bug in the reassignment of small clusters in the :class:`cluster.MiniBatchKMeans` + by `Gael Varoquaux`_. - - Fixed default value of ``gamma`` in :class:`decomposition.KernelPCA` by `Lars Buitinck`_. +- Fixed default value of ``gamma`` in :class:`decomposition.KernelPCA` by `Lars Buitinck`_. - - Updated joblib to ``0.7.0d`` by `Gael Varoquaux`_. +- Updated joblib to ``0.7.0d`` by `Gael Varoquaux`_. - - Fixed scaling of the deviance in :class:`ensemble.GradientBoostingClassifier` by `Peter Prettenhofer`_. +- Fixed scaling of the deviance in :class:`ensemble.GradientBoostingClassifier` by `Peter Prettenhofer`_. - - Better tie-breaking in :class:`multiclass.OneVsOneClassifier` by `Andreas Müller`_. +- Better tie-breaking in :class:`multiclass.OneVsOneClassifier` by `Andreas Müller`_. - - Other small improvements to tests and documentation. +- Other small improvements to tests and documentation. People ------ @@ -3755,263 +3875,263 @@ Version 0.13 New Estimator Classes --------------------- - - :class:`dummy.DummyClassifier` and :class:`dummy.DummyRegressor`, two - data-independent predictors by `Mathieu Blondel`_. Useful to sanity-check - your estimators. See :ref:`dummy_estimators` in the user guide. - Multioutput support added by `Arnaud Joly`_. +- :class:`dummy.DummyClassifier` and :class:`dummy.DummyRegressor`, two + data-independent predictors by `Mathieu Blondel`_. Useful to sanity-check + your estimators. See :ref:`dummy_estimators` in the user guide. + Multioutput support added by `Arnaud Joly`_. - - :class:`decomposition.FactorAnalysis`, a transformer implementing the - classical factor analysis, by `Christian Osendorfer`_ and `Alexandre - Gramfort`_. See :ref:`FA` in the user guide. +- :class:`decomposition.FactorAnalysis`, a transformer implementing the + classical factor analysis, by `Christian Osendorfer`_ and `Alexandre + Gramfort`_. See :ref:`FA` in the user guide. - - :class:`feature_extraction.FeatureHasher`, a transformer implementing the - "hashing trick" for fast, low-memory feature extraction from string fields - by `Lars Buitinck`_ and :class:`feature_extraction.text.HashingVectorizer` - for text documents by `Olivier Grisel`_ See :ref:`feature_hashing` and - :ref:`hashing_vectorizer` for the documentation and sample usage. +- :class:`feature_extraction.FeatureHasher`, a transformer implementing the + "hashing trick" for fast, low-memory feature extraction from string fields + by `Lars Buitinck`_ and :class:`feature_extraction.text.HashingVectorizer` + for text documents by `Olivier Grisel`_ See :ref:`feature_hashing` and + :ref:`hashing_vectorizer` for the documentation and sample usage. - - :class:`pipeline.FeatureUnion`, a transformer that concatenates - results of several other transformers by `Andreas Müller`_. See - :ref:`feature_union` in the user guide. +- :class:`pipeline.FeatureUnion`, a transformer that concatenates + results of several other transformers by `Andreas Müller`_. See + :ref:`feature_union` in the user guide. - - :class:`random_projection.GaussianRandomProjection`, - :class:`random_projection.SparseRandomProjection` and the function - :func:`random_projection.johnson_lindenstrauss_min_dim`. The first two are - transformers implementing Gaussian and sparse random projection matrix - by `Olivier Grisel`_ and `Arnaud Joly`_. - See :ref:`random_projection` in the user guide. +- :class:`random_projection.GaussianRandomProjection`, + :class:`random_projection.SparseRandomProjection` and the function + :func:`random_projection.johnson_lindenstrauss_min_dim`. The first two are + transformers implementing Gaussian and sparse random projection matrix + by `Olivier Grisel`_ and `Arnaud Joly`_. + See :ref:`random_projection` in the user guide. - - :class:`kernel_approximation.Nystroem`, a transformer for approximating - arbitrary kernels by `Andreas Müller`_. See - :ref:`nystroem_kernel_approx` in the user guide. +- :class:`kernel_approximation.Nystroem`, a transformer for approximating + arbitrary kernels by `Andreas Müller`_. See + :ref:`nystroem_kernel_approx` in the user guide. - - :class:`preprocessing.OneHotEncoder`, a transformer that computes binary - encodings of categorical features by `Andreas Müller`_. See - :ref:`preprocessing_categorical_features` in the user guide. +- :class:`preprocessing.OneHotEncoder`, a transformer that computes binary + encodings of categorical features by `Andreas Müller`_. See + :ref:`preprocessing_categorical_features` in the user guide. - - :class:`linear_model.PassiveAggressiveClassifier` and - :class:`linear_model.PassiveAggressiveRegressor`, predictors implementing - an efficient stochastic optimization for linear models by `Rob Zinkov`_ and - `Mathieu Blondel`_. See :ref:`passive_aggressive` in the user - guide. +- :class:`linear_model.PassiveAggressiveClassifier` and + :class:`linear_model.PassiveAggressiveRegressor`, predictors implementing + an efficient stochastic optimization for linear models by `Rob Zinkov`_ and + `Mathieu Blondel`_. See :ref:`passive_aggressive` in the user + guide. - - :class:`ensemble.RandomTreesEmbedding`, a transformer for creating high-dimensional - sparse representations using ensembles of totally random trees by `Andreas Müller`_. - See :ref:`random_trees_embedding` in the user guide. +- :class:`ensemble.RandomTreesEmbedding`, a transformer for creating high-dimensional + sparse representations using ensembles of totally random trees by `Andreas Müller`_. + See :ref:`random_trees_embedding` in the user guide. - - :class:`manifold.SpectralEmbedding` and function - :func:`manifold.spectral_embedding`, implementing the "laplacian - eigenmaps" transformation for non-linear dimensionality reduction by Wei - Li. See :ref:`spectral_embedding` in the user guide. +- :class:`manifold.SpectralEmbedding` and function + :func:`manifold.spectral_embedding`, implementing the "laplacian + eigenmaps" transformation for non-linear dimensionality reduction by Wei + Li. See :ref:`spectral_embedding` in the user guide. - - :class:`isotonic.IsotonicRegression` by `Fabian Pedregosa`_, `Alexandre Gramfort`_ - and `Nelle Varoquaux`_, +- :class:`isotonic.IsotonicRegression` by `Fabian Pedregosa`_, `Alexandre Gramfort`_ + and `Nelle Varoquaux`_, Changelog --------- - - :func:`metrics.zero_one_loss` (formerly ``metrics.zero_one``) now has - option for normalized output that reports the fraction of - misclassifications, rather than the raw number of misclassifications. By - Kyle Beauchamp. +- :func:`metrics.zero_one_loss` (formerly ``metrics.zero_one``) now has + option for normalized output that reports the fraction of + misclassifications, rather than the raw number of misclassifications. By + Kyle Beauchamp. - - :class:`tree.DecisionTreeClassifier` and all derived ensemble models now - support sample weighting, by `Noel Dawe`_ and `Gilles Louppe`_. +- :class:`tree.DecisionTreeClassifier` and all derived ensemble models now + support sample weighting, by `Noel Dawe`_ and `Gilles Louppe`_. - - Speedup improvement when using bootstrap samples in forests of randomized - trees, by `Peter Prettenhofer`_ and `Gilles Louppe`_. +- Speedup improvement when using bootstrap samples in forests of randomized + trees, by `Peter Prettenhofer`_ and `Gilles Louppe`_. - - Partial dependence plots for :ref:`gradient_boosting` in - :func:`ensemble.partial_dependence.partial_dependence` by `Peter - Prettenhofer`_. See :ref:`sphx_glr_auto_examples_ensemble_plot_partial_dependence.py` for an - example. +- Partial dependence plots for :ref:`gradient_boosting` in + :func:`ensemble.partial_dependence.partial_dependence` by `Peter + Prettenhofer`_. See :ref:`sphx_glr_auto_examples_ensemble_plot_partial_dependence.py` for an + example. - - The table of contents on the website has now been made expandable by - `Jaques Grobler`_. +- The table of contents on the website has now been made expandable by + `Jaques Grobler`_. - - :class:`feature_selection.SelectPercentile` now breaks ties - deterministically instead of returning all equally ranked features. +- :class:`feature_selection.SelectPercentile` now breaks ties + deterministically instead of returning all equally ranked features. - - :class:`feature_selection.SelectKBest` and - :class:`feature_selection.SelectPercentile` are more numerically stable - since they use scores, rather than p-values, to rank results. This means - that they might sometimes select different features than they did - previously. +- :class:`feature_selection.SelectKBest` and + :class:`feature_selection.SelectPercentile` are more numerically stable + since they use scores, rather than p-values, to rank results. This means + that they might sometimes select different features than they did + previously. - - Ridge regression and ridge classification fitting with ``sparse_cg`` solver - no longer has quadratic memory complexity, by `Lars Buitinck`_ and - `Fabian Pedregosa`_. +- Ridge regression and ridge classification fitting with ``sparse_cg`` solver + no longer has quadratic memory complexity, by `Lars Buitinck`_ and + `Fabian Pedregosa`_. - - Ridge regression and ridge classification now support a new fast solver - called ``lsqr``, by `Mathieu Blondel`_. +- Ridge regression and ridge classification now support a new fast solver + called ``lsqr``, by `Mathieu Blondel`_. - - Speed up of :func:`metrics.precision_recall_curve` by Conrad Lee. +- Speed up of :func:`metrics.precision_recall_curve` by Conrad Lee. - - Added support for reading/writing svmlight files with pairwise - preference attribute (qid in svmlight file format) in - :func:`datasets.dump_svmlight_file` and - :func:`datasets.load_svmlight_file` by `Fabian Pedregosa`_. +- Added support for reading/writing svmlight files with pairwise + preference attribute (qid in svmlight file format) in + :func:`datasets.dump_svmlight_file` and + :func:`datasets.load_svmlight_file` by `Fabian Pedregosa`_. - - Faster and more robust :func:`metrics.confusion_matrix` and - :ref:`clustering_evaluation` by Wei Li. +- Faster and more robust :func:`metrics.confusion_matrix` and + :ref:`clustering_evaluation` by Wei Li. - - :func:`cross_validation.cross_val_score` now works with precomputed kernels - and affinity matrices, by `Andreas Müller`_. +- :func:`cross_validation.cross_val_score` now works with precomputed kernels + and affinity matrices, by `Andreas Müller`_. - - LARS algorithm made more numerically stable with heuristics to drop - regressors too correlated as well as to stop the path when - numerical noise becomes predominant, by `Gael Varoquaux`_. +- LARS algorithm made more numerically stable with heuristics to drop + regressors too correlated as well as to stop the path when + numerical noise becomes predominant, by `Gael Varoquaux`_. - - Faster implementation of :func:`metrics.precision_recall_curve` by - Conrad Lee. +- Faster implementation of :func:`metrics.precision_recall_curve` by + Conrad Lee. - - New kernel :class:`metrics.chi2_kernel` by `Andreas Müller`_, often used - in computer vision applications. +- New kernel :class:`metrics.chi2_kernel` by `Andreas Müller`_, often used + in computer vision applications. - - Fix of longstanding bug in :class:`naive_bayes.BernoulliNB` fixed by - Shaun Jackman. +- Fix of longstanding bug in :class:`naive_bayes.BernoulliNB` fixed by + Shaun Jackman. - - Implemented ``predict_proba`` in :class:`multiclass.OneVsRestClassifier`, - by Andrew Winterman. +- Implemented ``predict_proba`` in :class:`multiclass.OneVsRestClassifier`, + by Andrew Winterman. - - Improve consistency in gradient boosting: estimators - :class:`ensemble.GradientBoostingRegressor` and - :class:`ensemble.GradientBoostingClassifier` use the estimator - :class:`tree.DecisionTreeRegressor` instead of the - :class:`tree._tree.Tree` data structure by `Arnaud Joly`_. +- Improve consistency in gradient boosting: estimators + :class:`ensemble.GradientBoostingRegressor` and + :class:`ensemble.GradientBoostingClassifier` use the estimator + :class:`tree.DecisionTreeRegressor` instead of the + :class:`tree._tree.Tree` data structure by `Arnaud Joly`_. - - Fixed a floating point exception in the :ref:`decision trees ` - module, by Seberg. +- Fixed a floating point exception in the :ref:`decision trees ` + module, by Seberg. - - Fix :func:`metrics.roc_curve` fails when y_true has only one class - by Wei Li. +- Fix :func:`metrics.roc_curve` fails when y_true has only one class + by Wei Li. - - Add the :func:`metrics.mean_absolute_error` function which computes the - mean absolute error. The :func:`metrics.mean_squared_error`, - :func:`metrics.mean_absolute_error` and - :func:`metrics.r2_score` metrics support multioutput by `Arnaud Joly`_. +- Add the :func:`metrics.mean_absolute_error` function which computes the + mean absolute error. The :func:`metrics.mean_squared_error`, + :func:`metrics.mean_absolute_error` and + :func:`metrics.r2_score` metrics support multioutput by `Arnaud Joly`_. - - Fixed ``class_weight`` support in :class:`svm.LinearSVC` and - :class:`linear_model.LogisticRegression` by `Andreas Müller`_. The meaning - of ``class_weight`` was reversed as erroneously higher weight meant less - positives of a given class in earlier releases. +- Fixed ``class_weight`` support in :class:`svm.LinearSVC` and + :class:`linear_model.LogisticRegression` by `Andreas Müller`_. The meaning + of ``class_weight`` was reversed as erroneously higher weight meant less + positives of a given class in earlier releases. - - Improve narrative documentation and consistency in - :mod:`sklearn.metrics` for regression and classification metrics - by `Arnaud Joly`_. +- Improve narrative documentation and consistency in + :mod:`sklearn.metrics` for regression and classification metrics + by `Arnaud Joly`_. - - Fixed a bug in :class:`sklearn.svm.SVC` when using csr-matrices with - unsorted indices by Xinfan Meng and `Andreas Müller`_. +- Fixed a bug in :class:`sklearn.svm.SVC` when using csr-matrices with + unsorted indices by Xinfan Meng and `Andreas Müller`_. - - :class:`MiniBatchKMeans`: Add random reassignment of cluster centers - with little observations attached to them, by `Gael Varoquaux`_. +- :class:`MiniBatchKMeans`: Add random reassignment of cluster centers + with little observations attached to them, by `Gael Varoquaux`_. API changes summary ------------------- - - Renamed all occurrences of ``n_atoms`` to ``n_components`` for consistency. - This applies to :class:`decomposition.DictionaryLearning`, - :class:`decomposition.MiniBatchDictionaryLearning`, - :func:`decomposition.dict_learning`, :func:`decomposition.dict_learning_online`. +- Renamed all occurrences of ``n_atoms`` to ``n_components`` for consistency. + This applies to :class:`decomposition.DictionaryLearning`, + :class:`decomposition.MiniBatchDictionaryLearning`, + :func:`decomposition.dict_learning`, :func:`decomposition.dict_learning_online`. - - Renamed all occurrences of ``max_iters`` to ``max_iter`` for consistency. - This applies to :class:`semi_supervised.LabelPropagation` and - :class:`semi_supervised.label_propagation.LabelSpreading`. +- Renamed all occurrences of ``max_iters`` to ``max_iter`` for consistency. + This applies to :class:`semi_supervised.LabelPropagation` and + :class:`semi_supervised.label_propagation.LabelSpreading`. - - Renamed all occurrences of ``learn_rate`` to ``learning_rate`` for - consistency in :class:`ensemble.BaseGradientBoosting` and - :class:`ensemble.GradientBoostingRegressor`. +- Renamed all occurrences of ``learn_rate`` to ``learning_rate`` for + consistency in :class:`ensemble.BaseGradientBoosting` and + :class:`ensemble.GradientBoostingRegressor`. - - The module ``sklearn.linear_model.sparse`` is gone. Sparse matrix support - was already integrated into the "regular" linear models. +- The module ``sklearn.linear_model.sparse`` is gone. Sparse matrix support + was already integrated into the "regular" linear models. - - :func:`sklearn.metrics.mean_square_error`, which incorrectly returned the - accumulated error, was removed. Use ``mean_squared_error`` instead. +- :func:`sklearn.metrics.mean_square_error`, which incorrectly returned the + accumulated error, was removed. Use ``mean_squared_error`` instead. - - Passing ``class_weight`` parameters to ``fit`` methods is no longer - supported. Pass them to estimator constructors instead. +- Passing ``class_weight`` parameters to ``fit`` methods is no longer + supported. Pass them to estimator constructors instead. - - GMMs no longer have ``decode`` and ``rvs`` methods. Use the ``score``, - ``predict`` or ``sample`` methods instead. +- GMMs no longer have ``decode`` and ``rvs`` methods. Use the ``score``, + ``predict`` or ``sample`` methods instead. - - The ``solver`` fit option in Ridge regression and classification is now - deprecated and will be removed in v0.14. Use the constructor option - instead. +- The ``solver`` fit option in Ridge regression and classification is now + deprecated and will be removed in v0.14. Use the constructor option + instead. - - :class:`feature_extraction.text.DictVectorizer` now returns sparse - matrices in the CSR format, instead of COO. +- :class:`feature_extraction.text.DictVectorizer` now returns sparse + matrices in the CSR format, instead of COO. - - Renamed ``k`` in :class:`cross_validation.KFold` and - :class:`cross_validation.StratifiedKFold` to ``n_folds``, renamed - ``n_bootstraps`` to ``n_iter`` in ``cross_validation.Bootstrap``. +- Renamed ``k`` in :class:`cross_validation.KFold` and + :class:`cross_validation.StratifiedKFold` to ``n_folds``, renamed + ``n_bootstraps`` to ``n_iter`` in ``cross_validation.Bootstrap``. - - Renamed all occurrences of ``n_iterations`` to ``n_iter`` for consistency. - This applies to :class:`cross_validation.ShuffleSplit`, - :class:`cross_validation.StratifiedShuffleSplit`, - :func:`utils.randomized_range_finder` and :func:`utils.randomized_svd`. +- Renamed all occurrences of ``n_iterations`` to ``n_iter`` for consistency. + This applies to :class:`cross_validation.ShuffleSplit`, + :class:`cross_validation.StratifiedShuffleSplit`, + :func:`utils.randomized_range_finder` and :func:`utils.randomized_svd`. - - Replaced ``rho`` in :class:`linear_model.ElasticNet` and - :class:`linear_model.SGDClassifier` by ``l1_ratio``. The ``rho`` parameter - had different meanings; ``l1_ratio`` was introduced to avoid confusion. - It has the same meaning as previously ``rho`` in - :class:`linear_model.ElasticNet` and ``(1-rho)`` in - :class:`linear_model.SGDClassifier`. +- Replaced ``rho`` in :class:`linear_model.ElasticNet` and + :class:`linear_model.SGDClassifier` by ``l1_ratio``. The ``rho`` parameter + had different meanings; ``l1_ratio`` was introduced to avoid confusion. + It has the same meaning as previously ``rho`` in + :class:`linear_model.ElasticNet` and ``(1-rho)`` in + :class:`linear_model.SGDClassifier`. - - :class:`linear_model.LassoLars` and :class:`linear_model.Lars` now - store a list of paths in the case of multiple targets, rather than - an array of paths. +- :class:`linear_model.LassoLars` and :class:`linear_model.Lars` now + store a list of paths in the case of multiple targets, rather than + an array of paths. - - The attribute ``gmm`` of :class:`hmm.GMMHMM` was renamed to ``gmm_`` - to adhere more strictly with the API. +- The attribute ``gmm`` of :class:`hmm.GMMHMM` was renamed to ``gmm_`` + to adhere more strictly with the API. - - :func:`cluster.spectral_embedding` was moved to - :func:`manifold.spectral_embedding`. +- :func:`cluster.spectral_embedding` was moved to + :func:`manifold.spectral_embedding`. - - Renamed ``eig_tol`` in :func:`manifold.spectral_embedding`, - :class:`cluster.SpectralClustering` to ``eigen_tol``, renamed ``mode`` - to ``eigen_solver``. +- Renamed ``eig_tol`` in :func:`manifold.spectral_embedding`, + :class:`cluster.SpectralClustering` to ``eigen_tol``, renamed ``mode`` + to ``eigen_solver``. - - Renamed ``mode`` in :func:`manifold.spectral_embedding` and - :class:`cluster.SpectralClustering` to ``eigen_solver``. +- Renamed ``mode`` in :func:`manifold.spectral_embedding` and + :class:`cluster.SpectralClustering` to ``eigen_solver``. - - ``classes_`` and ``n_classes_`` attributes of - :class:`tree.DecisionTreeClassifier` and all derived ensemble models are - now flat in case of single output problems and nested in case of - multi-output problems. +- ``classes_`` and ``n_classes_`` attributes of + :class:`tree.DecisionTreeClassifier` and all derived ensemble models are + now flat in case of single output problems and nested in case of + multi-output problems. - - The ``estimators_`` attribute of - :class:`ensemble.gradient_boosting.GradientBoostingRegressor` and - :class:`ensemble.gradient_boosting.GradientBoostingClassifier` is now an - array of :class:'tree.DecisionTreeRegressor'. +- The ``estimators_`` attribute of + :class:`ensemble.gradient_boosting.GradientBoostingRegressor` and + :class:`ensemble.gradient_boosting.GradientBoostingClassifier` is now an + array of :class:'tree.DecisionTreeRegressor'. - - Renamed ``chunk_size`` to ``batch_size`` in - :class:`decomposition.MiniBatchDictionaryLearning` and - :class:`decomposition.MiniBatchSparsePCA` for consistency. +- Renamed ``chunk_size`` to ``batch_size`` in + :class:`decomposition.MiniBatchDictionaryLearning` and + :class:`decomposition.MiniBatchSparsePCA` for consistency. - - :class:`svm.SVC` and :class:`svm.NuSVC` now provide a ``classes_`` - attribute and support arbitrary dtypes for labels ``y``. - Also, the dtype returned by ``predict`` now reflects the dtype of - ``y`` during ``fit`` (used to be ``np.float``). +- :class:`svm.SVC` and :class:`svm.NuSVC` now provide a ``classes_`` + attribute and support arbitrary dtypes for labels ``y``. + Also, the dtype returned by ``predict`` now reflects the dtype of + ``y`` during ``fit`` (used to be ``np.float``). - - Changed default test_size in :func:`cross_validation.train_test_split` - to None, added possibility to infer ``test_size`` from ``train_size`` in - :class:`cross_validation.ShuffleSplit` and - :class:`cross_validation.StratifiedShuffleSplit`. +- Changed default test_size in :func:`cross_validation.train_test_split` + to None, added possibility to infer ``test_size`` from ``train_size`` in + :class:`cross_validation.ShuffleSplit` and + :class:`cross_validation.StratifiedShuffleSplit`. - - Renamed function :func:`sklearn.metrics.zero_one` to - :func:`sklearn.metrics.zero_one_loss`. Be aware that the default behavior - in :func:`sklearn.metrics.zero_one_loss` is different from - :func:`sklearn.metrics.zero_one`: ``normalize=False`` is changed to - ``normalize=True``. +- Renamed function :func:`sklearn.metrics.zero_one` to + :func:`sklearn.metrics.zero_one_loss`. Be aware that the default behavior + in :func:`sklearn.metrics.zero_one_loss` is different from + :func:`sklearn.metrics.zero_one`: ``normalize=False`` is changed to + ``normalize=True``. - - Renamed function :func:`metrics.zero_one_score` to - :func:`metrics.accuracy_score`. +- Renamed function :func:`metrics.zero_one_score` to + :func:`metrics.accuracy_score`. - - :func:`datasets.make_circles` now has the same number of inner and outer points. +- :func:`datasets.make_circles` now has the same number of inner and outer points. - - In the Naive Bayes classifiers, the ``class_prior`` parameter was moved - from ``fit`` to ``__init__``. +- In the Naive Bayes classifiers, the ``class_prior`` parameter was moved + from ``fit`` to ``__init__``. People ------ @@ -4098,27 +4218,27 @@ instead a set of bug fixes Changelog ---------- - - Improved numerical stability in spectral embedding by `Gael - Varoquaux`_ +- Improved numerical stability in spectral embedding by `Gael + Varoquaux`_ - - Doctest under windows 64bit by `Gael Varoquaux`_ +- Doctest under windows 64bit by `Gael Varoquaux`_ - - Documentation fixes for elastic net by `Andreas Müller`_ and - `Alexandre Gramfort`_ +- Documentation fixes for elastic net by `Andreas Müller`_ and + `Alexandre Gramfort`_ - - Proper behavior with fortran-ordered NumPy arrays by `Gael Varoquaux`_ +- Proper behavior with fortran-ordered NumPy arrays by `Gael Varoquaux`_ - - Make GridSearchCV work with non-CSR sparse matrix by `Lars Buitinck`_ +- Make GridSearchCV work with non-CSR sparse matrix by `Lars Buitinck`_ - - Fix parallel computing in MDS by `Gael Varoquaux`_ +- Fix parallel computing in MDS by `Gael Varoquaux`_ - - Fix Unicode support in count vectorizer by `Andreas Müller`_ +- Fix Unicode support in count vectorizer by `Andreas Müller`_ - - Fix MinCovDet breaking with X.shape = (3, 1) by :user:`Virgile Fritsch ` +- Fix MinCovDet breaking with X.shape = (3, 1) by :user:`Virgile Fritsch ` - - Fix clone of SGD objects by `Peter Prettenhofer`_ +- Fix clone of SGD objects by `Peter Prettenhofer`_ - - Stabilize GMM by :user:`Virgile Fritsch ` +- Stabilize GMM by :user:`Virgile Fritsch ` People ------ @@ -4142,137 +4262,137 @@ Version 0.12 Changelog --------- - - Various speed improvements of the :ref:`decision trees ` module, by - `Gilles Louppe`_. +- Various speed improvements of the :ref:`decision trees ` module, by + `Gilles Louppe`_. - - :class:`ensemble.GradientBoostingRegressor` and - :class:`ensemble.GradientBoostingClassifier` now support feature subsampling - via the ``max_features`` argument, by `Peter Prettenhofer`_. +- :class:`ensemble.GradientBoostingRegressor` and + :class:`ensemble.GradientBoostingClassifier` now support feature subsampling + via the ``max_features`` argument, by `Peter Prettenhofer`_. - - Added Huber and Quantile loss functions to - :class:`ensemble.GradientBoostingRegressor`, by `Peter Prettenhofer`_. +- Added Huber and Quantile loss functions to + :class:`ensemble.GradientBoostingRegressor`, by `Peter Prettenhofer`_. - - :ref:`Decision trees ` and :ref:`forests of randomized trees ` - now support multi-output classification and regression problems, by - `Gilles Louppe`_. +- :ref:`Decision trees ` and :ref:`forests of randomized trees ` + now support multi-output classification and regression problems, by + `Gilles Louppe`_. - - Added :class:`preprocessing.LabelEncoder`, a simple utility class to - normalize labels or transform non-numerical labels, by `Mathieu Blondel`_. +- Added :class:`preprocessing.LabelEncoder`, a simple utility class to + normalize labels or transform non-numerical labels, by `Mathieu Blondel`_. - - Added the epsilon-insensitive loss and the ability to make probabilistic - predictions with the modified huber loss in :ref:`sgd`, by - `Mathieu Blondel`_. +- Added the epsilon-insensitive loss and the ability to make probabilistic + predictions with the modified huber loss in :ref:`sgd`, by + `Mathieu Blondel`_. - - Added :ref:`multidimensional_scaling`, by Nelle Varoquaux. +- Added :ref:`multidimensional_scaling`, by Nelle Varoquaux. - - SVMlight file format loader now detects compressed (gzip/bzip2) files and - decompresses them on the fly, by `Lars Buitinck`_. +- SVMlight file format loader now detects compressed (gzip/bzip2) files and + decompresses them on the fly, by `Lars Buitinck`_. - - SVMlight file format serializer now preserves double precision floating - point values, by `Olivier Grisel`_. +- SVMlight file format serializer now preserves double precision floating + point values, by `Olivier Grisel`_. - - A common testing framework for all estimators was added, by `Andreas Müller`_. +- A common testing framework for all estimators was added, by `Andreas Müller`_. - - Understandable error messages for estimators that do not accept - sparse input by `Gael Varoquaux`_ +- Understandable error messages for estimators that do not accept + sparse input by `Gael Varoquaux`_ - - Speedups in hierarchical clustering by `Gael Varoquaux`_. In - particular building the tree now supports early stopping. This is - useful when the number of clusters is not small compared to the - number of samples. +- Speedups in hierarchical clustering by `Gael Varoquaux`_. In + particular building the tree now supports early stopping. This is + useful when the number of clusters is not small compared to the + number of samples. - - Add MultiTaskLasso and MultiTaskElasticNet for joint feature selection, - by `Alexandre Gramfort`_. +- Add MultiTaskLasso and MultiTaskElasticNet for joint feature selection, + by `Alexandre Gramfort`_. - - Added :func:`metrics.auc_score` and - :func:`metrics.average_precision_score` convenience functions by `Andreas - Müller`_. +- Added :func:`metrics.auc_score` and + :func:`metrics.average_precision_score` convenience functions by `Andreas + Müller`_. - - Improved sparse matrix support in the :ref:`feature_selection` - module by `Andreas Müller`_. +- Improved sparse matrix support in the :ref:`feature_selection` + module by `Andreas Müller`_. - - New word boundaries-aware character n-gram analyzer for the - :ref:`text_feature_extraction` module by :user:`@kernc `. +- New word boundaries-aware character n-gram analyzer for the + :ref:`text_feature_extraction` module by :user:`@kernc `. - - Fixed bug in spectral clustering that led to single point clusters - by `Andreas Müller`_. +- Fixed bug in spectral clustering that led to single point clusters + by `Andreas Müller`_. - - In :class:`feature_extraction.text.CountVectorizer`, added an option to - ignore infrequent words, ``min_df`` by `Andreas Müller`_. +- In :class:`feature_extraction.text.CountVectorizer`, added an option to + ignore infrequent words, ``min_df`` by `Andreas Müller`_. - - Add support for multiple targets in some linear models (ElasticNet, Lasso - and OrthogonalMatchingPursuit) by `Vlad Niculae`_ and - `Alexandre Gramfort`_. +- Add support for multiple targets in some linear models (ElasticNet, Lasso + and OrthogonalMatchingPursuit) by `Vlad Niculae`_ and + `Alexandre Gramfort`_. - - Fixes in :class:`decomposition.ProbabilisticPCA` score function by Wei Li. +- Fixes in :class:`decomposition.ProbabilisticPCA` score function by Wei Li. - - Fixed feature importance computation in - :ref:`gradient_boosting`. +- Fixed feature importance computation in + :ref:`gradient_boosting`. API changes summary ------------------- - - The old ``scikits.learn`` package has disappeared; all code should import - from ``sklearn`` instead, which was introduced in 0.9. +- The old ``scikits.learn`` package has disappeared; all code should import + from ``sklearn`` instead, which was introduced in 0.9. - - In :func:`metrics.roc_curve`, the ``thresholds`` array is now returned - with it's order reversed, in order to keep it consistent with the order - of the returned ``fpr`` and ``tpr``. +- In :func:`metrics.roc_curve`, the ``thresholds`` array is now returned + with it's order reversed, in order to keep it consistent with the order + of the returned ``fpr`` and ``tpr``. - - In :class:`hmm` objects, like :class:`hmm.GaussianHMM`, - :class:`hmm.MultinomialHMM`, etc., all parameters must be passed to the - object when initialising it and not through ``fit``. Now ``fit`` will - only accept the data as an input parameter. +- In :class:`hmm` objects, like :class:`hmm.GaussianHMM`, + :class:`hmm.MultinomialHMM`, etc., all parameters must be passed to the + object when initialising it and not through ``fit``. Now ``fit`` will + only accept the data as an input parameter. - - For all SVM classes, a faulty behavior of ``gamma`` was fixed. Previously, - the default gamma value was only computed the first time ``fit`` was called - and then stored. It is now recalculated on every call to ``fit``. +- For all SVM classes, a faulty behavior of ``gamma`` was fixed. Previously, + the default gamma value was only computed the first time ``fit`` was called + and then stored. It is now recalculated on every call to ``fit``. - - All ``Base`` classes are now abstract meta classes so that they can not be - instantiated. +- All ``Base`` classes are now abstract meta classes so that they can not be + instantiated. - - :func:`cluster.ward_tree` now also returns the parent array. This is - necessary for early-stopping in which case the tree is not - completely built. +- :func:`cluster.ward_tree` now also returns the parent array. This is + necessary for early-stopping in which case the tree is not + completely built. - - In :class:`feature_extraction.text.CountVectorizer` the parameters - ``min_n`` and ``max_n`` were joined to the parameter ``n_gram_range`` to - enable grid-searching both at once. +- In :class:`feature_extraction.text.CountVectorizer` the parameters + ``min_n`` and ``max_n`` were joined to the parameter ``n_gram_range`` to + enable grid-searching both at once. - - In :class:`feature_extraction.text.CountVectorizer`, words that appear - only in one document are now ignored by default. To reproduce - the previous behavior, set ``min_df=1``. +- In :class:`feature_extraction.text.CountVectorizer`, words that appear + only in one document are now ignored by default. To reproduce + the previous behavior, set ``min_df=1``. - - Fixed API inconsistency: :meth:`linear_model.SGDClassifier.predict_proba` now - returns 2d array when fit on two classes. +- Fixed API inconsistency: :meth:`linear_model.SGDClassifier.predict_proba` now + returns 2d array when fit on two classes. - - Fixed API inconsistency: :meth:`discriminant_analysis.QuadraticDiscriminantAnalysis.decision_function` - and :meth:`discriminant_analysis.LinearDiscriminantAnalysis.decision_function` now return 1d arrays - when fit on two classes. +- Fixed API inconsistency: :meth:`discriminant_analysis.QuadraticDiscriminantAnalysis.decision_function` + and :meth:`discriminant_analysis.LinearDiscriminantAnalysis.decision_function` now return 1d arrays + when fit on two classes. - - Grid of alphas used for fitting :class:`linear_model.LassoCV` and - :class:`linear_model.ElasticNetCV` is now stored - in the attribute ``alphas_`` rather than overriding the init parameter - ``alphas``. +- Grid of alphas used for fitting :class:`linear_model.LassoCV` and + :class:`linear_model.ElasticNetCV` is now stored + in the attribute ``alphas_`` rather than overriding the init parameter + ``alphas``. - - Linear models when alpha is estimated by cross-validation store - the estimated value in the ``alpha_`` attribute rather than just - ``alpha`` or ``best_alpha``. +- Linear models when alpha is estimated by cross-validation store + the estimated value in the ``alpha_`` attribute rather than just + ``alpha`` or ``best_alpha``. - - :class:`ensemble.GradientBoostingClassifier` now supports - :meth:`ensemble.GradientBoostingClassifier.staged_predict_proba`, and - :meth:`ensemble.GradientBoostingClassifier.staged_predict`. +- :class:`ensemble.GradientBoostingClassifier` now supports + :meth:`ensemble.GradientBoostingClassifier.staged_predict_proba`, and + :meth:`ensemble.GradientBoostingClassifier.staged_predict`. - - :class:`svm.sparse.SVC` and other sparse SVM classes are now deprecated. - The all classes in the :ref:`svm` module now automatically select the - sparse or dense representation base on the input. +- :class:`svm.sparse.SVC` and other sparse SVM classes are now deprecated. + The all classes in the :ref:`svm` module now automatically select the + sparse or dense representation base on the input. - - All clustering algorithms now interpret the array ``X`` given to ``fit`` as - input data, in particular :class:`cluster.SpectralClustering` and - :class:`cluster.AffinityPropagation` which previously expected affinity matrices. +- All clustering algorithms now interpret the array ``X`` given to ``fit`` as + input data, in particular :class:`cluster.SpectralClustering` and + :class:`cluster.AffinityPropagation` which previously expected affinity matrices. - - For clustering algorithms that take the desired number of clusters as a parameter, - this parameter is now called ``n_clusters``. +- For clustering algorithms that take the desired number of clusters as a parameter, + this parameter is now called ``n_clusters``. People @@ -4340,176 +4460,176 @@ Changelog Highlights ............. - - Gradient boosted regression trees (:ref:`gradient_boosting`) - for classification and regression by `Peter Prettenhofer`_ - and `Scott White`_ . +- Gradient boosted regression trees (:ref:`gradient_boosting`) + for classification and regression by `Peter Prettenhofer`_ + and `Scott White`_ . - - Simple dict-based feature loader with support for categorical variables - (:class:`feature_extraction.DictVectorizer`) by `Lars Buitinck`_. +- Simple dict-based feature loader with support for categorical variables + (:class:`feature_extraction.DictVectorizer`) by `Lars Buitinck`_. - - Added Matthews correlation coefficient (:func:`metrics.matthews_corrcoef`) - and added macro and micro average options to - :func:`metrics.precision_score`, :func:`metrics.recall_score` and - :func:`metrics.f1_score` by `Satrajit Ghosh`_. +- Added Matthews correlation coefficient (:func:`metrics.matthews_corrcoef`) + and added macro and micro average options to + :func:`metrics.precision_score`, :func:`metrics.recall_score` and + :func:`metrics.f1_score` by `Satrajit Ghosh`_. - - :ref:`out_of_bag` of generalization error for :ref:`ensemble` - by `Andreas Müller`_. +- :ref:`out_of_bag` of generalization error for :ref:`ensemble` + by `Andreas Müller`_. - - :ref:`randomized_l1`: Randomized sparse linear models for feature - selection, by `Alexandre Gramfort`_ and `Gael Varoquaux`_ +- Randomized sparse linear models for feature + selection, by `Alexandre Gramfort`_ and `Gael Varoquaux`_ - - :ref:`label_propagation` for semi-supervised learning, by Clay - Woolam. **Note** the semi-supervised API is still work in progress, - and may change. +- :ref:`label_propagation` for semi-supervised learning, by Clay + Woolam. **Note** the semi-supervised API is still work in progress, + and may change. - - Added BIC/AIC model selection to classical :ref:`gmm` and unified - the API with the remainder of scikit-learn, by `Bertrand Thirion`_ +- Added BIC/AIC model selection to classical :ref:`gmm` and unified + the API with the remainder of scikit-learn, by `Bertrand Thirion`_ - - Added :class:`sklearn.cross_validation.StratifiedShuffleSplit`, which is - a :class:`sklearn.cross_validation.ShuffleSplit` with balanced splits, - by Yannick Schwartz. +- Added :class:`sklearn.cross_validation.StratifiedShuffleSplit`, which is + a :class:`sklearn.cross_validation.ShuffleSplit` with balanced splits, + by Yannick Schwartz. - - :class:`sklearn.neighbors.NearestCentroid` classifier added, along with a - ``shrink_threshold`` parameter, which implements **shrunken centroid - classification**, by `Robert Layton`_. +- :class:`sklearn.neighbors.NearestCentroid` classifier added, along with a + ``shrink_threshold`` parameter, which implements **shrunken centroid + classification**, by `Robert Layton`_. Other changes .............. - - Merged dense and sparse implementations of :ref:`sgd` module and - exposed utility extension types for sequential - datasets ``seq_dataset`` and weight vectors ``weight_vector`` - by `Peter Prettenhofer`_. +- Merged dense and sparse implementations of :ref:`sgd` module and + exposed utility extension types for sequential + datasets ``seq_dataset`` and weight vectors ``weight_vector`` + by `Peter Prettenhofer`_. - - Added ``partial_fit`` (support for online/minibatch learning) and - warm_start to the :ref:`sgd` module by `Mathieu Blondel`_. +- Added ``partial_fit`` (support for online/minibatch learning) and + warm_start to the :ref:`sgd` module by `Mathieu Blondel`_. - - Dense and sparse implementations of :ref:`svm` classes and - :class:`linear_model.LogisticRegression` merged by `Lars Buitinck`_. +- Dense and sparse implementations of :ref:`svm` classes and + :class:`linear_model.LogisticRegression` merged by `Lars Buitinck`_. - - Regressors can now be used as base estimator in the :ref:`multiclass` - module by `Mathieu Blondel`_. +- Regressors can now be used as base estimator in the :ref:`multiclass` + module by `Mathieu Blondel`_. - - Added n_jobs option to :func:`metrics.pairwise.pairwise_distances` - and :func:`metrics.pairwise.pairwise_kernels` for parallel computation, - by `Mathieu Blondel`_. +- Added n_jobs option to :func:`metrics.pairwise.pairwise_distances` + and :func:`metrics.pairwise.pairwise_kernels` for parallel computation, + by `Mathieu Blondel`_. - - :ref:`k_means` can now be run in parallel, using the ``n_jobs`` argument - to either :ref:`k_means` or :class:`KMeans`, by `Robert Layton`_. +- :ref:`k_means` can now be run in parallel, using the ``n_jobs`` argument + to either :ref:`k_means` or :class:`KMeans`, by `Robert Layton`_. - - Improved :ref:`cross_validation` and :ref:`grid_search` documentation - and introduced the new :func:`cross_validation.train_test_split` - helper function by `Olivier Grisel`_ +- Improved :ref:`cross_validation` and :ref:`grid_search` documentation + and introduced the new :func:`cross_validation.train_test_split` + helper function by `Olivier Grisel`_ - - :class:`svm.SVC` members ``coef_`` and ``intercept_`` changed sign for - consistency with ``decision_function``; for ``kernel==linear``, - ``coef_`` was fixed in the one-vs-one case, by `Andreas Müller`_. +- :class:`svm.SVC` members ``coef_`` and ``intercept_`` changed sign for + consistency with ``decision_function``; for ``kernel==linear``, + ``coef_`` was fixed in the one-vs-one case, by `Andreas Müller`_. - - Performance improvements to efficient leave-one-out cross-validated - Ridge regression, esp. for the ``n_samples > n_features`` case, in - :class:`linear_model.RidgeCV`, by Reuben Fletcher-Costin. +- Performance improvements to efficient leave-one-out cross-validated + Ridge regression, esp. for the ``n_samples > n_features`` case, in + :class:`linear_model.RidgeCV`, by Reuben Fletcher-Costin. - - Refactoring and simplification of the :ref:`text_feature_extraction` - API and fixed a bug that caused possible negative IDF, - by `Olivier Grisel`_. +- Refactoring and simplification of the :ref:`text_feature_extraction` + API and fixed a bug that caused possible negative IDF, + by `Olivier Grisel`_. - - Beam pruning option in :class:`_BaseHMM` module has been removed since it - is difficult to Cythonize. If you are interested in contributing a Cython - version, you can use the python version in the git history as a reference. +- Beam pruning option in :class:`_BaseHMM` module has been removed since it + is difficult to Cythonize. If you are interested in contributing a Cython + version, you can use the python version in the git history as a reference. - - Classes in :ref:`neighbors` now support arbitrary Minkowski metric for - nearest neighbors searches. The metric can be specified by argument ``p``. +- Classes in :ref:`neighbors` now support arbitrary Minkowski metric for + nearest neighbors searches. The metric can be specified by argument ``p``. API changes summary ------------------- - - :class:`covariance.EllipticEnvelop` is now deprecated - Please use :class:`covariance.EllipticEnvelope` - instead. +- :class:`covariance.EllipticEnvelop` is now deprecated - Please use :class:`covariance.EllipticEnvelope` + instead. - - ``NeighborsClassifier`` and ``NeighborsRegressor`` are gone in the module - :ref:`neighbors`. Use the classes :class:`KNeighborsClassifier`, - :class:`RadiusNeighborsClassifier`, :class:`KNeighborsRegressor` - and/or :class:`RadiusNeighborsRegressor` instead. +- ``NeighborsClassifier`` and ``NeighborsRegressor`` are gone in the module + :ref:`neighbors`. Use the classes :class:`KNeighborsClassifier`, + :class:`RadiusNeighborsClassifier`, :class:`KNeighborsRegressor` + and/or :class:`RadiusNeighborsRegressor` instead. - - Sparse classes in the :ref:`sgd` module are now deprecated. +- Sparse classes in the :ref:`sgd` module are now deprecated. - - In :class:`mixture.GMM`, :class:`mixture.DPGMM` and :class:`mixture.VBGMM`, - parameters must be passed to an object when initialising it and not through - ``fit``. Now ``fit`` will only accept the data as an input parameter. +- In :class:`mixture.GMM`, :class:`mixture.DPGMM` and :class:`mixture.VBGMM`, + parameters must be passed to an object when initialising it and not through + ``fit``. Now ``fit`` will only accept the data as an input parameter. - - methods ``rvs`` and ``decode`` in :class:`GMM` module are now deprecated. - ``sample`` and ``score`` or ``predict`` should be used instead. +- methods ``rvs`` and ``decode`` in :class:`GMM` module are now deprecated. + ``sample`` and ``score`` or ``predict`` should be used instead. - - attribute ``_scores`` and ``_pvalues`` in univariate feature selection - objects are now deprecated. - ``scores_`` or ``pvalues_`` should be used instead. +- attribute ``_scores`` and ``_pvalues`` in univariate feature selection + objects are now deprecated. + ``scores_`` or ``pvalues_`` should be used instead. - - In :class:`LogisticRegression`, :class:`LinearSVC`, :class:`SVC` and - :class:`NuSVC`, the ``class_weight`` parameter is now an initialization - parameter, not a parameter to fit. This makes grid searches - over this parameter possible. +- In :class:`LogisticRegression`, :class:`LinearSVC`, :class:`SVC` and + :class:`NuSVC`, the ``class_weight`` parameter is now an initialization + parameter, not a parameter to fit. This makes grid searches + over this parameter possible. - - LFW ``data`` is now always shape ``(n_samples, n_features)`` to be - consistent with the Olivetti faces dataset. Use ``images`` and - ``pairs`` attribute to access the natural images shapes instead. +- LFW ``data`` is now always shape ``(n_samples, n_features)`` to be + consistent with the Olivetti faces dataset. Use ``images`` and + ``pairs`` attribute to access the natural images shapes instead. - - In :class:`svm.LinearSVC`, the meaning of the ``multi_class`` parameter - changed. Options now are ``'ovr'`` and ``'crammer_singer'``, with - ``'ovr'`` being the default. This does not change the default behavior - but hopefully is less confusing. +- In :class:`svm.LinearSVC`, the meaning of the ``multi_class`` parameter + changed. Options now are ``'ovr'`` and ``'crammer_singer'``, with + ``'ovr'`` being the default. This does not change the default behavior + but hopefully is less confusing. - - Class :class:`feature_selection.text.Vectorizer` is deprecated and - replaced by :class:`feature_selection.text.TfidfVectorizer`. +- Class :class:`feature_selection.text.Vectorizer` is deprecated and + replaced by :class:`feature_selection.text.TfidfVectorizer`. - - The preprocessor / analyzer nested structure for text feature - extraction has been removed. All those features are - now directly passed as flat constructor arguments - to :class:`feature_selection.text.TfidfVectorizer` and - :class:`feature_selection.text.CountVectorizer`, in particular the - following parameters are now used: +- The preprocessor / analyzer nested structure for text feature + extraction has been removed. All those features are + now directly passed as flat constructor arguments + to :class:`feature_selection.text.TfidfVectorizer` and + :class:`feature_selection.text.CountVectorizer`, in particular the + following parameters are now used: - - ``analyzer`` can be ``'word'`` or ``'char'`` to switch the default - analysis scheme, or use a specific python callable (as previously). +- ``analyzer`` can be ``'word'`` or ``'char'`` to switch the default + analysis scheme, or use a specific python callable (as previously). - - ``tokenizer`` and ``preprocessor`` have been introduced to make it - still possible to customize those steps with the new API. +- ``tokenizer`` and ``preprocessor`` have been introduced to make it + still possible to customize those steps with the new API. - - ``input`` explicitly control how to interpret the sequence passed to - ``fit`` and ``predict``: filenames, file objects or direct (byte or - Unicode) strings. +- ``input`` explicitly control how to interpret the sequence passed to + ``fit`` and ``predict``: filenames, file objects or direct (byte or + Unicode) strings. - - charset decoding is explicit and strict by default. +- charset decoding is explicit and strict by default. - - the ``vocabulary``, fitted or not is now stored in the - ``vocabulary_`` attribute to be consistent with the project - conventions. +- the ``vocabulary``, fitted or not is now stored in the + ``vocabulary_`` attribute to be consistent with the project + conventions. - - Class :class:`feature_selection.text.TfidfVectorizer` now derives directly - from :class:`feature_selection.text.CountVectorizer` to make grid - search trivial. +- Class :class:`feature_selection.text.TfidfVectorizer` now derives directly + from :class:`feature_selection.text.CountVectorizer` to make grid + search trivial. - - methods ``rvs`` in :class:`_BaseHMM` module are now deprecated. - ``sample`` should be used instead. +- methods ``rvs`` in :class:`_BaseHMM` module are now deprecated. + ``sample`` should be used instead. - - Beam pruning option in :class:`_BaseHMM` module is removed since it is - difficult to be Cythonized. If you are interested, you can look in the - history codes by git. +- Beam pruning option in :class:`_BaseHMM` module is removed since it is + difficult to be Cythonized. If you are interested, you can look in the + history codes by git. - - The SVMlight format loader now supports files with both zero-based and - one-based column indices, since both occur "in the wild". +- The SVMlight format loader now supports files with both zero-based and + one-based column indices, since both occur "in the wild". - - Arguments in class :class:`ShuffleSplit` are now consistent with - :class:`StratifiedShuffleSplit`. Arguments ``test_fraction`` and - ``train_fraction`` are deprecated and renamed to ``test_size`` and - ``train_size`` and can accept both ``float`` and ``int``. +- Arguments in class :class:`ShuffleSplit` are now consistent with + :class:`StratifiedShuffleSplit`. Arguments ``test_fraction`` and + ``train_fraction`` are deprecated and renamed to ``test_size`` and + ``train_size`` and can accept both ``float`` and ``int``. - - Arguments in class :class:`Bootstrap` are now consistent with - :class:`StratifiedShuffleSplit`. Arguments ``n_test`` and - ``n_train`` are deprecated and renamed to ``test_size`` and - ``train_size`` and can accept both ``float`` and ``int``. +- Arguments in class :class:`Bootstrap` are now consistent with + :class:`StratifiedShuffleSplit`. Arguments ``n_test`` and + ``n_train`` are deprecated and renamed to ``test_size`` and + ``train_size`` and can accept both ``float`` and ``int``. - - Argument ``p`` added to classes in :ref:`neighbors` to specify an - arbitrary Minkowski metric for nearest neighbors searches. +- Argument ``p`` added to classes in :ref:`neighbors` to specify an + arbitrary Minkowski metric for nearest neighbors searches. People @@ -4574,85 +4694,85 @@ Version 0.10 Changelog --------- - - Python 2.5 compatibility was dropped; the minimum Python version needed - to use scikit-learn is now 2.6. +- Python 2.5 compatibility was dropped; the minimum Python version needed + to use scikit-learn is now 2.6. - - :ref:`sparse_inverse_covariance` estimation using the graph Lasso, with - associated cross-validated estimator, by `Gael Varoquaux`_ +- :ref:`sparse_inverse_covariance` estimation using the graph Lasso, with + associated cross-validated estimator, by `Gael Varoquaux`_ - - New :ref:`Tree ` module by `Brian Holt`_, `Peter Prettenhofer`_, - `Satrajit Ghosh`_ and `Gilles Louppe`_. The module comes with complete - documentation and examples. +- New :ref:`Tree ` module by `Brian Holt`_, `Peter Prettenhofer`_, + `Satrajit Ghosh`_ and `Gilles Louppe`_. The module comes with complete + documentation and examples. - - Fixed a bug in the RFE module by `Gilles Louppe`_ (issue #378). +- Fixed a bug in the RFE module by `Gilles Louppe`_ (issue #378). - - Fixed a memory leak in :ref:`svm` module by `Brian Holt`_ (issue #367). +- Fixed a memory leak in :ref:`svm` module by `Brian Holt`_ (issue #367). - - Faster tests by `Fabian Pedregosa`_ and others. +- Faster tests by `Fabian Pedregosa`_ and others. - - Silhouette Coefficient cluster analysis evaluation metric added as - :func:`sklearn.metrics.silhouette_score` by Robert Layton. +- Silhouette Coefficient cluster analysis evaluation metric added as + :func:`sklearn.metrics.silhouette_score` by Robert Layton. - - Fixed a bug in :ref:`k_means` in the handling of the ``n_init`` parameter: - the clustering algorithm used to be run ``n_init`` times but the last - solution was retained instead of the best solution by `Olivier Grisel`_. +- Fixed a bug in :ref:`k_means` in the handling of the ``n_init`` parameter: + the clustering algorithm used to be run ``n_init`` times but the last + solution was retained instead of the best solution by `Olivier Grisel`_. - - Minor refactoring in :ref:`sgd` module; consolidated dense and sparse - predict methods; Enhanced test time performance by converting model - parameters to fortran-style arrays after fitting (only multi-class). +- Minor refactoring in :ref:`sgd` module; consolidated dense and sparse + predict methods; Enhanced test time performance by converting model + parameters to fortran-style arrays after fitting (only multi-class). - - Adjusted Mutual Information metric added as - :func:`sklearn.metrics.adjusted_mutual_info_score` by Robert Layton. +- Adjusted Mutual Information metric added as + :func:`sklearn.metrics.adjusted_mutual_info_score` by Robert Layton. - - Models like SVC/SVR/LinearSVC/LogisticRegression from libsvm/liblinear - now support scaling of C regularization parameter by the number of - samples by `Alexandre Gramfort`_. +- Models like SVC/SVR/LinearSVC/LogisticRegression from libsvm/liblinear + now support scaling of C regularization parameter by the number of + samples by `Alexandre Gramfort`_. - - New :ref:`Ensemble Methods ` module by `Gilles Louppe`_ and - `Brian Holt`_. The module comes with the random forest algorithm and the - extra-trees method, along with documentation and examples. +- New :ref:`Ensemble Methods ` module by `Gilles Louppe`_ and + `Brian Holt`_. The module comes with the random forest algorithm and the + extra-trees method, along with documentation and examples. - - :ref:`outlier_detection`: outlier and novelty detection, by - :user:`Virgile Fritsch `. +- :ref:`outlier_detection`: outlier and novelty detection, by + :user:`Virgile Fritsch `. - - :ref:`kernel_approximation`: a transform implementing kernel - approximation for fast SGD on non-linear kernels by - `Andreas Müller`_. +- :ref:`kernel_approximation`: a transform implementing kernel + approximation for fast SGD on non-linear kernels by + `Andreas Müller`_. - - Fixed a bug due to atom swapping in :ref:`OMP` by `Vlad Niculae`_. +- Fixed a bug due to atom swapping in :ref:`OMP` by `Vlad Niculae`_. - - :ref:`SparseCoder` by `Vlad Niculae`_. +- :ref:`SparseCoder` by `Vlad Niculae`_. - - :ref:`mini_batch_kmeans` performance improvements by `Olivier Grisel`_. +- :ref:`mini_batch_kmeans` performance improvements by `Olivier Grisel`_. - - :ref:`k_means` support for sparse matrices by `Mathieu Blondel`_. +- :ref:`k_means` support for sparse matrices by `Mathieu Blondel`_. - - Improved documentation for developers and for the :mod:`sklearn.utils` - module, by `Jake Vanderplas`_. +- Improved documentation for developers and for the :mod:`sklearn.utils` + module, by `Jake Vanderplas`_. - - Vectorized 20newsgroups dataset loader - (:func:`sklearn.datasets.fetch_20newsgroups_vectorized`) by - `Mathieu Blondel`_. +- Vectorized 20newsgroups dataset loader + (:func:`sklearn.datasets.fetch_20newsgroups_vectorized`) by + `Mathieu Blondel`_. - - :ref:`multiclass` by `Lars Buitinck`_. +- :ref:`multiclass` by `Lars Buitinck`_. - - Utilities for fast computation of mean and variance for sparse matrices - by `Mathieu Blondel`_. +- Utilities for fast computation of mean and variance for sparse matrices + by `Mathieu Blondel`_. - - Make :func:`sklearn.preprocessing.scale` and - :class:`sklearn.preprocessing.Scaler` work on sparse matrices by - `Olivier Grisel`_ +- Make :func:`sklearn.preprocessing.scale` and + :class:`sklearn.preprocessing.Scaler` work on sparse matrices by + `Olivier Grisel`_ - - Feature importances using decision trees and/or forest of trees, - by `Gilles Louppe`_. +- Feature importances using decision trees and/or forest of trees, + by `Gilles Louppe`_. - - Parallel implementation of forests of randomized trees by - `Gilles Louppe`_. +- Parallel implementation of forests of randomized trees by + `Gilles Louppe`_. - - :class:`sklearn.cross_validation.ShuffleSplit` can subsample the train - sets as well as the test sets by `Olivier Grisel`_. +- :class:`sklearn.cross_validation.ShuffleSplit` can subsample the train + sets as well as the test sets by `Olivier Grisel`_. - - Errors in the build of the documentation fixed by `Andreas Müller`_. +- Errors in the build of the documentation fixed by `Andreas Müller`_. API changes summary @@ -4661,55 +4781,55 @@ API changes summary Here are the code migration instructions when upgrading from scikit-learn version 0.9: - - Some estimators that may overwrite their inputs to save memory previously - had ``overwrite_`` parameters; these have been replaced with ``copy_`` - parameters with exactly the opposite meaning. +- Some estimators that may overwrite their inputs to save memory previously + had ``overwrite_`` parameters; these have been replaced with ``copy_`` + parameters with exactly the opposite meaning. - This particularly affects some of the estimators in :mod:`linear_model`. - The default behavior is still to copy everything passed in. + This particularly affects some of the estimators in :mod:`linear_model`. + The default behavior is still to copy everything passed in. - - The SVMlight dataset loader :func:`sklearn.datasets.load_svmlight_file` no - longer supports loading two files at once; use ``load_svmlight_files`` - instead. Also, the (unused) ``buffer_mb`` parameter is gone. +- The SVMlight dataset loader :func:`sklearn.datasets.load_svmlight_file` no + longer supports loading two files at once; use ``load_svmlight_files`` + instead. Also, the (unused) ``buffer_mb`` parameter is gone. - - Sparse estimators in the :ref:`sgd` module use dense parameter vector - ``coef_`` instead of ``sparse_coef_``. This significantly improves - test time performance. +- Sparse estimators in the :ref:`sgd` module use dense parameter vector + ``coef_`` instead of ``sparse_coef_``. This significantly improves + test time performance. - - The :ref:`covariance` module now has a robust estimator of - covariance, the Minimum Covariance Determinant estimator. +- The :ref:`covariance` module now has a robust estimator of + covariance, the Minimum Covariance Determinant estimator. - - Cluster evaluation metrics in :mod:`metrics.cluster` have been refactored - but the changes are backwards compatible. They have been moved to the - :mod:`metrics.cluster.supervised`, along with - :mod:`metrics.cluster.unsupervised` which contains the Silhouette - Coefficient. +- Cluster evaluation metrics in :mod:`metrics.cluster` have been refactored + but the changes are backwards compatible. They have been moved to the + :mod:`metrics.cluster.supervised`, along with + :mod:`metrics.cluster.unsupervised` which contains the Silhouette + Coefficient. - - The ``permutation_test_score`` function now behaves the same way as - ``cross_val_score`` (i.e. uses the mean score across the folds.) +- The ``permutation_test_score`` function now behaves the same way as + ``cross_val_score`` (i.e. uses the mean score across the folds.) - - Cross Validation generators now use integer indices (``indices=True``) - by default instead of boolean masks. This make it more intuitive to - use with sparse matrix data. +- Cross Validation generators now use integer indices (``indices=True``) + by default instead of boolean masks. This make it more intuitive to + use with sparse matrix data. - - The functions used for sparse coding, ``sparse_encode`` and - ``sparse_encode_parallel`` have been combined into - :func:`sklearn.decomposition.sparse_encode`, and the shapes of the arrays - have been transposed for consistency with the matrix factorization setting, - as opposed to the regression setting. +- The functions used for sparse coding, ``sparse_encode`` and + ``sparse_encode_parallel`` have been combined into + :func:`sklearn.decomposition.sparse_encode`, and the shapes of the arrays + have been transposed for consistency with the matrix factorization setting, + as opposed to the regression setting. - - Fixed an off-by-one error in the SVMlight/LibSVM file format handling; - files generated using :func:`sklearn.datasets.dump_svmlight_file` should be - re-generated. (They should continue to work, but accidentally had one - extra column of zeros prepended.) +- Fixed an off-by-one error in the SVMlight/LibSVM file format handling; + files generated using :func:`sklearn.datasets.dump_svmlight_file` should be + re-generated. (They should continue to work, but accidentally had one + extra column of zeros prepended.) - - ``BaseDictionaryLearning`` class replaced by ``SparseCodingMixin``. +- ``BaseDictionaryLearning`` class replaced by ``SparseCodingMixin``. - - :func:`sklearn.utils.extmath.fast_svd` has been renamed - :func:`sklearn.utils.extmath.randomized_svd` and the default - oversampling is now fixed to 10 additional random vectors instead - of doubling the number of components to extract. The new behavior - follows the reference paper. +- :func:`sklearn.utils.extmath.fast_svd` has been renamed + :func:`sklearn.utils.extmath.randomized_svd` and the default + oversampling is now fixed to 10 additional random vectors instead + of doubling the number of components to extract. The new behavior + follows the reference paper. People @@ -4791,84 +4911,84 @@ This release also includes the dictionary-learning work developed by Changelog --------- - - New :ref:`manifold` module by `Jake Vanderplas`_ and - `Fabian Pedregosa`_. +- New :ref:`manifold` module by `Jake Vanderplas`_ and + `Fabian Pedregosa`_. - - New :ref:`Dirichlet Process ` Gaussian Mixture - Model by `Alexandre Passos`_ +- New :ref:`Dirichlet Process ` Gaussian Mixture + Model by `Alexandre Passos`_ - - :ref:`neighbors` module refactoring by `Jake Vanderplas`_ : - general refactoring, support for sparse matrices in input, speed and - documentation improvements. See the next section for a full list of API - changes. +- :ref:`neighbors` module refactoring by `Jake Vanderplas`_ : + general refactoring, support for sparse matrices in input, speed and + documentation improvements. See the next section for a full list of API + changes. - - Improvements on the :ref:`feature_selection` module by - `Gilles Louppe`_ : refactoring of the RFE classes, documentation - rewrite, increased efficiency and minor API changes. +- Improvements on the :ref:`feature_selection` module by + `Gilles Louppe`_ : refactoring of the RFE classes, documentation + rewrite, increased efficiency and minor API changes. - - :ref:`SparsePCA` by `Vlad Niculae`_, `Gael Varoquaux`_ and - `Alexandre Gramfort`_ +- :ref:`SparsePCA` by `Vlad Niculae`_, `Gael Varoquaux`_ and + `Alexandre Gramfort`_ - - Printing an estimator now behaves independently of architectures - and Python version thanks to :user:`Jean Kossaifi `. +- Printing an estimator now behaves independently of architectures + and Python version thanks to :user:`Jean Kossaifi `. - - :ref:`Loader for libsvm/svmlight format ` by - `Mathieu Blondel`_ and `Lars Buitinck`_ +- :ref:`Loader for libsvm/svmlight format ` by + `Mathieu Blondel`_ and `Lars Buitinck`_ - - Documentation improvements: thumbnails in - :ref:`example gallery ` by `Fabian Pedregosa`_. +- Documentation improvements: thumbnails in + example gallery by `Fabian Pedregosa`_. - - Important bugfixes in :ref:`svm` module (segfaults, bad - performance) by `Fabian Pedregosa`_. +- Important bugfixes in :ref:`svm` module (segfaults, bad + performance) by `Fabian Pedregosa`_. - - Added :ref:`multinomial_naive_bayes` and :ref:`bernoulli_naive_bayes` - by `Lars Buitinck`_ +- Added :ref:`multinomial_naive_bayes` and :ref:`bernoulli_naive_bayes` + by `Lars Buitinck`_ - - Text feature extraction optimizations by Lars Buitinck +- Text feature extraction optimizations by Lars Buitinck - - Chi-Square feature selection - (:func:`feature_selection.univariate_selection.chi2`) by `Lars Buitinck`_. +- Chi-Square feature selection + (:func:`feature_selection.univariate_selection.chi2`) by `Lars Buitinck`_. - - :ref:`sample_generators` module refactoring by `Gilles Louppe`_ +- :ref:`sample_generators` module refactoring by `Gilles Louppe`_ - - :ref:`multiclass` by `Mathieu Blondel`_ +- :ref:`multiclass` by `Mathieu Blondel`_ - - Ball tree rewrite by `Jake Vanderplas`_ +- Ball tree rewrite by `Jake Vanderplas`_ - - Implementation of :ref:`dbscan` algorithm by Robert Layton +- Implementation of :ref:`dbscan` algorithm by Robert Layton - - Kmeans predict and transform by Robert Layton +- Kmeans predict and transform by Robert Layton - - Preprocessing module refactoring by `Olivier Grisel`_ +- Preprocessing module refactoring by `Olivier Grisel`_ - - Faster mean shift by Conrad Lee +- Faster mean shift by Conrad Lee - - New ``Bootstrap``, :ref:`ShuffleSplit` and various other - improvements in cross validation schemes by `Olivier Grisel`_ and - `Gael Varoquaux`_ +- New ``Bootstrap``, :ref:`ShuffleSplit` and various other + improvements in cross validation schemes by `Olivier Grisel`_ and + `Gael Varoquaux`_ - - Adjusted Rand index and V-Measure clustering evaluation metrics by `Olivier Grisel`_ +- Adjusted Rand index and V-Measure clustering evaluation metrics by `Olivier Grisel`_ - - Added :class:`Orthogonal Matching Pursuit ` by `Vlad Niculae`_ +- Added :class:`Orthogonal Matching Pursuit ` by `Vlad Niculae`_ - - Added 2D-patch extractor utilities in the :ref:`feature_extraction` module by `Vlad Niculae`_ +- Added 2D-patch extractor utilities in the :ref:`feature_extraction` module by `Vlad Niculae`_ - - Implementation of :class:`linear_model.LassoLarsCV` - (cross-validated Lasso solver using the Lars algorithm) and - :class:`linear_model.LassoLarsIC` (BIC/AIC model - selection in Lars) by `Gael Varoquaux`_ - and `Alexandre Gramfort`_ +- Implementation of :class:`linear_model.LassoLarsCV` + (cross-validated Lasso solver using the Lars algorithm) and + :class:`linear_model.LassoLarsIC` (BIC/AIC model + selection in Lars) by `Gael Varoquaux`_ + and `Alexandre Gramfort`_ - - Scalability improvements to :func:`metrics.roc_curve` by Olivier Hervieu +- Scalability improvements to :func:`metrics.roc_curve` by Olivier Hervieu - - Distance helper functions :func:`metrics.pairwise.pairwise_distances` - and :func:`metrics.pairwise.pairwise_kernels` by Robert Layton +- Distance helper functions :func:`metrics.pairwise.pairwise_distances` + and :func:`metrics.pairwise.pairwise_kernels` by Robert Layton - - :class:`Mini-Batch K-Means ` by Nelle Varoquaux and Peter Prettenhofer. +- :class:`Mini-Batch K-Means ` by Nelle Varoquaux and Peter Prettenhofer. - - :ref:`mldata` utilities by Pietro Berkes. +- :ref:`mldata` utilities by Pietro Berkes. - - :ref:`olivetti_faces` by `David Warde-Farley`_. +- :ref:`olivetti_faces` by `David Warde-Farley`_. API changes summary @@ -4877,71 +4997,71 @@ API changes summary Here are the code migration instructions when upgrading from scikit-learn version 0.8: - - The ``scikits.learn`` package was renamed ``sklearn``. There is - still a ``scikits.learn`` package alias for backward compatibility. +- The ``scikits.learn`` package was renamed ``sklearn``. There is + still a ``scikits.learn`` package alias for backward compatibility. - Third-party projects with a dependency on scikit-learn 0.9+ should - upgrade their codebase. For instance, under Linux / MacOSX just run - (make a backup first!):: + Third-party projects with a dependency on scikit-learn 0.9+ should + upgrade their codebase. For instance, under Linux / MacOSX just run + (make a backup first!):: find -name "*.py" | xargs sed -i 's/\bscikits.learn\b/sklearn/g' - - Estimators no longer accept model parameters as ``fit`` arguments: - instead all parameters must be only be passed as constructor - arguments or using the now public ``set_params`` method inherited - from :class:`base.BaseEstimator`. +- Estimators no longer accept model parameters as ``fit`` arguments: + instead all parameters must be only be passed as constructor + arguments or using the now public ``set_params`` method inherited + from :class:`base.BaseEstimator`. - Some estimators can still accept keyword arguments on the ``fit`` - but this is restricted to data-dependent values (e.g. a Gram matrix - or an affinity matrix that are precomputed from the ``X`` data matrix. + Some estimators can still accept keyword arguments on the ``fit`` + but this is restricted to data-dependent values (e.g. a Gram matrix + or an affinity matrix that are precomputed from the ``X`` data matrix. - - The ``cross_val`` package has been renamed to ``cross_validation`` - although there is also a ``cross_val`` package alias in place for - backward compatibility. +- The ``cross_val`` package has been renamed to ``cross_validation`` + although there is also a ``cross_val`` package alias in place for + backward compatibility. - Third-party projects with a dependency on scikit-learn 0.9+ should - upgrade their codebase. For instance, under Linux / MacOSX just run - (make a backup first!):: + Third-party projects with a dependency on scikit-learn 0.9+ should + upgrade their codebase. For instance, under Linux / MacOSX just run + (make a backup first!):: find -name "*.py" | xargs sed -i 's/\bcross_val\b/cross_validation/g' - - The ``score_func`` argument of the - ``sklearn.cross_validation.cross_val_score`` function is now expected - to accept ``y_test`` and ``y_predicted`` as only arguments for - classification and regression tasks or ``X_test`` for unsupervised - estimators. +- The ``score_func`` argument of the + ``sklearn.cross_validation.cross_val_score`` function is now expected + to accept ``y_test`` and ``y_predicted`` as only arguments for + classification and regression tasks or ``X_test`` for unsupervised + estimators. - - ``gamma`` parameter for support vector machine algorithms is set - to ``1 / n_features`` by default, instead of ``1 / n_samples``. +- ``gamma`` parameter for support vector machine algorithms is set + to ``1 / n_features`` by default, instead of ``1 / n_samples``. - - The ``sklearn.hmm`` has been marked as orphaned: it will be removed - from scikit-learn in version 0.11 unless someone steps up to - contribute documentation, examples and fix lurking numerical - stability issues. +- The ``sklearn.hmm`` has been marked as orphaned: it will be removed + from scikit-learn in version 0.11 unless someone steps up to + contribute documentation, examples and fix lurking numerical + stability issues. - - ``sklearn.neighbors`` has been made into a submodule. The two previously - available estimators, ``NeighborsClassifier`` and ``NeighborsRegressor`` - have been marked as deprecated. Their functionality has been divided - among five new classes: ``NearestNeighbors`` for unsupervised neighbors - searches, ``KNeighborsClassifier`` & ``RadiusNeighborsClassifier`` - for supervised classification problems, and ``KNeighborsRegressor`` - & ``RadiusNeighborsRegressor`` for supervised regression problems. +- ``sklearn.neighbors`` has been made into a submodule. The two previously + available estimators, ``NeighborsClassifier`` and ``NeighborsRegressor`` + have been marked as deprecated. Their functionality has been divided + among five new classes: ``NearestNeighbors`` for unsupervised neighbors + searches, ``KNeighborsClassifier`` & ``RadiusNeighborsClassifier`` + for supervised classification problems, and ``KNeighborsRegressor`` + & ``RadiusNeighborsRegressor`` for supervised regression problems. - - ``sklearn.ball_tree.BallTree`` has been moved to - ``sklearn.neighbors.BallTree``. Using the former will generate a warning. +- ``sklearn.ball_tree.BallTree`` has been moved to + ``sklearn.neighbors.BallTree``. Using the former will generate a warning. - - ``sklearn.linear_model.LARS()`` and related classes (LassoLARS, - LassoLARSCV, etc.) have been renamed to - ``sklearn.linear_model.Lars()``. +- ``sklearn.linear_model.LARS()`` and related classes (LassoLARS, + LassoLARSCV, etc.) have been renamed to + ``sklearn.linear_model.Lars()``. - - All distance metrics and kernels in ``sklearn.metrics.pairwise`` now have a Y - parameter, which by default is None. If not given, the result is the distance - (or kernel similarity) between each sample in Y. If given, the result is the - pairwise distance (or kernel similarity) between samples in X to Y. +- All distance metrics and kernels in ``sklearn.metrics.pairwise`` now have a Y + parameter, which by default is None. If not given, the result is the distance + (or kernel similarity) between each sample in Y. If given, the result is the + pairwise distance (or kernel similarity) between samples in X to Y. - - ``sklearn.metrics.pairwise.l1_distance`` is now called ``manhattan_distance``, - and by default returns the pairwise distance. For the component wise distance, - set the parameter ``sum_over_features`` to ``False``. +- ``sklearn.metrics.pairwise.l1_distance`` is now called ``manhattan_distance``, + and by default returns the pairwise distance. For the component wise distance, + set the parameter ``sum_over_features`` to ``False``. Backward compatibility package aliases and other deprecated classes and functions will be removed in version 0.11. @@ -4952,42 +5072,42 @@ People 38 people contributed to this release. - - 387 `Vlad Niculae`_ - - 320 `Olivier Grisel`_ - - 192 `Lars Buitinck`_ - - 179 `Gael Varoquaux`_ - - 168 `Fabian Pedregosa`_ (`INRIA`_, `Parietal Team`_) - - 127 `Jake Vanderplas`_ - - 120 `Mathieu Blondel`_ - - 85 `Alexandre Passos`_ - - 67 `Alexandre Gramfort`_ - - 57 `Peter Prettenhofer`_ - - 56 `Gilles Louppe`_ - - 42 Robert Layton - - 38 Nelle Varoquaux - - 32 :user:`Jean Kossaifi ` - - 30 Conrad Lee - - 22 Pietro Berkes - - 18 andy - - 17 David Warde-Farley - - 12 Brian Holt - - 11 Robert - - 8 Amit Aides - - 8 :user:`Virgile Fritsch ` - - 7 `Yaroslav Halchenko`_ - - 6 Salvatore Masecchia - - 5 Paolo Losi - - 4 Vincent Schut - - 3 Alexis Metaireau - - 3 Bryan Silverthorn - - 3 `Andreas Müller`_ - - 2 Minwoo Jake Lee - - 1 Emmanuelle Gouillart - - 1 Keith Goodman - - 1 Lucas Wiman - - 1 `Nicolas Pinto`_ - - 1 Thouis (Ray) Jones - - 1 Tim Sheerman-Chase +- 387 `Vlad Niculae`_ +- 320 `Olivier Grisel`_ +- 192 `Lars Buitinck`_ +- 179 `Gael Varoquaux`_ +- 168 `Fabian Pedregosa`_ (`INRIA`_, `Parietal Team`_) +- 127 `Jake Vanderplas`_ +- 120 `Mathieu Blondel`_ +- 85 `Alexandre Passos`_ +- 67 `Alexandre Gramfort`_ +- 57 `Peter Prettenhofer`_ +- 56 `Gilles Louppe`_ +- 42 Robert Layton +- 38 Nelle Varoquaux +- 32 :user:`Jean Kossaifi ` +- 30 Conrad Lee +- 22 Pietro Berkes +- 18 andy +- 17 David Warde-Farley +- 12 Brian Holt +- 11 Robert +- 8 Amit Aides +- 8 :user:`Virgile Fritsch ` +- 7 `Yaroslav Halchenko`_ +- 6 Salvatore Masecchia +- 5 Paolo Losi +- 4 Vincent Schut +- 3 Alexis Metaireau +- 3 Bryan Silverthorn +- 3 `Andreas Müller`_ +- 2 Minwoo Jake Lee +- 1 Emmanuelle Gouillart +- 1 Keith Goodman +- 1 Lucas Wiman +- 1 `Nicolas Pinto`_ +- 1 Thouis (Ray) Jones +- 1 Tim Sheerman-Chase .. _changes_0_8: @@ -5010,53 +5130,53 @@ Changelog Several new modules where introduced during this release: - - New :ref:`hierarchical_clustering` module by Vincent Michel, - `Bertrand Thirion`_, `Alexandre Gramfort`_ and `Gael Varoquaux`_. +- New :ref:`hierarchical_clustering` module by Vincent Michel, + `Bertrand Thirion`_, `Alexandre Gramfort`_ and `Gael Varoquaux`_. - - :ref:`kernel_pca` implementation by `Mathieu Blondel`_ +- :ref:`kernel_pca` implementation by `Mathieu Blondel`_ - - :ref:`labeled_faces_in_the_wild` by `Olivier Grisel`_. +- :ref:`labeled_faces_in_the_wild` by `Olivier Grisel`_. - - New :ref:`cross_decomposition` module by `Edouard Duchesnay`_. +- New :ref:`cross_decomposition` module by `Edouard Duchesnay`_. - - :ref:`NMF` module `Vlad Niculae`_ +- :ref:`NMF` module `Vlad Niculae`_ - - Implementation of the :ref:`oracle_approximating_shrinkage` algorithm by - :user:`Virgile Fritsch ` in the :ref:`covariance` module. +- Implementation of the :ref:`oracle_approximating_shrinkage` algorithm by + :user:`Virgile Fritsch ` in the :ref:`covariance` module. Some other modules benefited from significant improvements or cleanups. - - Initial support for Python 3: builds and imports cleanly, - some modules are usable while others have failing tests by `Fabian Pedregosa`_. +- Initial support for Python 3: builds and imports cleanly, + some modules are usable while others have failing tests by `Fabian Pedregosa`_. - - :class:`decomposition.PCA` is now usable from the Pipeline object by `Olivier Grisel`_. +- :class:`decomposition.PCA` is now usable from the Pipeline object by `Olivier Grisel`_. - - Guide :ref:`performance-howto` by `Olivier Grisel`_. +- Guide :ref:`performance-howto` by `Olivier Grisel`_. - - Fixes for memory leaks in libsvm bindings, 64-bit safer BallTree by Lars Buitinck. +- Fixes for memory leaks in libsvm bindings, 64-bit safer BallTree by Lars Buitinck. - - bug and style fixing in :ref:`k_means` algorithm by Jan Schlüter. +- bug and style fixing in :ref:`k_means` algorithm by Jan Schlüter. - - Add attribute converged to Gaussian Mixture Models by Vincent Schut. +- Add attribute converged to Gaussian Mixture Models by Vincent Schut. - - Implemented ``transform``, ``predict_log_proba`` in - :class:`discriminant_analysis.LinearDiscriminantAnalysis` By `Mathieu Blondel`_. +- Implemented ``transform``, ``predict_log_proba`` in + :class:`discriminant_analysis.LinearDiscriminantAnalysis` By `Mathieu Blondel`_. - - Refactoring in the :ref:`svm` module and bug fixes by `Fabian Pedregosa`_, - `Gael Varoquaux`_ and Amit Aides. +- Refactoring in the :ref:`svm` module and bug fixes by `Fabian Pedregosa`_, + `Gael Varoquaux`_ and Amit Aides. - - Refactored SGD module (removed code duplication, better variable naming), - added interface for sample weight by `Peter Prettenhofer`_. +- Refactored SGD module (removed code duplication, better variable naming), + added interface for sample weight by `Peter Prettenhofer`_. - - Wrapped BallTree with Cython by Thouis (Ray) Jones. +- Wrapped BallTree with Cython by Thouis (Ray) Jones. - - Added function :func:`svm.l1_min_c` by Paolo Losi. +- Added function :func:`svm.l1_min_c` by Paolo Losi. - - Typos, doc style, etc. by `Yaroslav Halchenko`_, `Gael Varoquaux`_, - `Olivier Grisel`_, Yann Malet, `Nicolas Pinto`_, Lars Buitinck and - `Fabian Pedregosa`_. +- Typos, doc style, etc. by `Yaroslav Halchenko`_, `Gael Varoquaux`_, + `Olivier Grisel`_, Yann Malet, `Nicolas Pinto`_, Lars Buitinck and + `Fabian Pedregosa`_. People @@ -5065,17 +5185,17 @@ People People that made this release possible preceded by number of commits: - - 159 `Olivier Grisel`_ - - 96 `Gael Varoquaux`_ - - 96 `Vlad Niculae`_ - - 94 `Fabian Pedregosa`_ - - 36 `Alexandre Gramfort`_ - - 32 Paolo Losi - - 31 `Edouard Duchesnay`_ - - 30 `Mathieu Blondel`_ - - 25 `Peter Prettenhofer`_ - - 22 `Nicolas Pinto`_ - - 11 :user:`Virgile Fritsch ` +- 159 `Olivier Grisel`_ +- 96 `Gael Varoquaux`_ +- 96 `Vlad Niculae`_ +- 94 `Fabian Pedregosa`_ +- 36 `Alexandre Gramfort`_ +- 32 Paolo Losi +- 31 `Edouard Duchesnay`_ +- 30 `Mathieu Blondel`_ +- 25 `Peter Prettenhofer`_ +- 22 `Nicolas Pinto`_ +- 11 :user:`Virgile Fritsch ` - 7 Lars Buitinck - 6 Vincent Michel - 5 `Bertrand Thirion`_ @@ -5109,56 +5229,56 @@ preceding release, no new modules where added to this release. Changelog --------- - - Performance improvements for Gaussian Mixture Model sampling [Jan - Schlüter]. +- Performance improvements for Gaussian Mixture Model sampling [Jan + Schlüter]. - - Implementation of efficient leave-one-out cross-validated Ridge in - :class:`linear_model.RidgeCV` [`Mathieu Blondel`_] +- Implementation of efficient leave-one-out cross-validated Ridge in + :class:`linear_model.RidgeCV` [`Mathieu Blondel`_] - - Better handling of collinearity and early stopping in - :func:`linear_model.lars_path` [`Alexandre Gramfort`_ and `Fabian - Pedregosa`_]. +- Better handling of collinearity and early stopping in + :func:`linear_model.lars_path` [`Alexandre Gramfort`_ and `Fabian + Pedregosa`_]. - - Fixes for liblinear ordering of labels and sign of coefficients - [Dan Yamins, Paolo Losi, `Mathieu Blondel`_ and `Fabian Pedregosa`_]. +- Fixes for liblinear ordering of labels and sign of coefficients + [Dan Yamins, Paolo Losi, `Mathieu Blondel`_ and `Fabian Pedregosa`_]. - - Performance improvements for Nearest Neighbors algorithm in - high-dimensional spaces [`Fabian Pedregosa`_]. +- Performance improvements for Nearest Neighbors algorithm in + high-dimensional spaces [`Fabian Pedregosa`_]. - - Performance improvements for :class:`cluster.KMeans` [`Gael - Varoquaux`_ and `James Bergstra`_]. +- Performance improvements for :class:`cluster.KMeans` [`Gael + Varoquaux`_ and `James Bergstra`_]. - - Sanity checks for SVM-based classes [`Mathieu Blondel`_]. +- Sanity checks for SVM-based classes [`Mathieu Blondel`_]. - - Refactoring of :class:`neighbors.NeighborsClassifier` and - :func:`neighbors.kneighbors_graph`: added different algorithms for - the k-Nearest Neighbor Search and implemented a more stable - algorithm for finding barycenter weights. Also added some - developer documentation for this module, see - `notes_neighbors - `_ for more information [`Fabian Pedregosa`_]. +- Refactoring of :class:`neighbors.NeighborsClassifier` and + :func:`neighbors.kneighbors_graph`: added different algorithms for + the k-Nearest Neighbor Search and implemented a more stable + algorithm for finding barycenter weights. Also added some + developer documentation for this module, see + `notes_neighbors + `_ for more information [`Fabian Pedregosa`_]. - - Documentation improvements: Added :class:`pca.RandomizedPCA` and - :class:`linear_model.LogisticRegression` to the class - reference. Also added references of matrices used for clustering - and other fixes [`Gael Varoquaux`_, `Fabian Pedregosa`_, `Mathieu - Blondel`_, `Olivier Grisel`_, Virgile Fritsch , Emmanuelle - Gouillart] +- Documentation improvements: Added :class:`pca.RandomizedPCA` and + :class:`linear_model.LogisticRegression` to the class + reference. Also added references of matrices used for clustering + and other fixes [`Gael Varoquaux`_, `Fabian Pedregosa`_, `Mathieu + Blondel`_, `Olivier Grisel`_, Virgile Fritsch , Emmanuelle + Gouillart] - - Binded decision_function in classes that make use of liblinear_, - dense and sparse variants, like :class:`svm.LinearSVC` or - :class:`linear_model.LogisticRegression` [`Fabian Pedregosa`_]. +- Binded decision_function in classes that make use of liblinear_, + dense and sparse variants, like :class:`svm.LinearSVC` or + :class:`linear_model.LogisticRegression` [`Fabian Pedregosa`_]. - - Performance and API improvements to - :func:`metrics.euclidean_distances` and to - :class:`pca.RandomizedPCA` [`James Bergstra`_]. +- Performance and API improvements to + :func:`metrics.euclidean_distances` and to + :class:`pca.RandomizedPCA` [`James Bergstra`_]. - - Fix compilation issues under NetBSD [Kamel Ibn Hassen Derouiche] +- Fix compilation issues under NetBSD [Kamel Ibn Hassen Derouiche] - - Allow input sequences of different lengths in :class:`hmm.GaussianHMM` - [`Ron Weiss`_]. +- Allow input sequences of different lengths in :class:`hmm.GaussianHMM` + [`Ron Weiss`_]. - - Fix bug in affinity propagation caused by incorrect indexing [Xinfan Meng] +- Fix bug in affinity propagation caused by incorrect indexing [Xinfan Meng] People @@ -5166,23 +5286,23 @@ People People that made this release possible preceded by number of commits: - - 85 `Fabian Pedregosa`_ - - 67 `Mathieu Blondel`_ - - 20 `Alexandre Gramfort`_ - - 19 `James Bergstra`_ - - 14 Dan Yamins - - 13 `Olivier Grisel`_ - - 12 `Gael Varoquaux`_ - - 4 `Edouard Duchesnay`_ - - 4 `Ron Weiss`_ - - 2 Satrajit Ghosh - - 2 Vincent Dubourg - - 1 Emmanuelle Gouillart - - 1 Kamel Ibn Hassen Derouiche - - 1 Paolo Losi - - 1 VirgileFritsch - - 1 `Yaroslav Halchenko`_ - - 1 Xinfan Meng +- 85 `Fabian Pedregosa`_ +- 67 `Mathieu Blondel`_ +- 20 `Alexandre Gramfort`_ +- 19 `James Bergstra`_ +- 14 Dan Yamins +- 13 `Olivier Grisel`_ +- 12 `Gael Varoquaux`_ +- 4 `Edouard Duchesnay`_ +- 4 `Ron Weiss`_ +- 2 Satrajit Ghosh +- 2 Vincent Dubourg +- 1 Emmanuelle Gouillart +- 1 Kamel Ibn Hassen Derouiche +- 1 Paolo Losi +- 1 VirgileFritsch +- 1 `Yaroslav Halchenko`_ +- 1 Xinfan Meng .. _changes_0_6: @@ -5201,56 +5321,56 @@ applications to real-world datasets. Changelog --------- - - New `stochastic gradient - `_ descent - module by Peter Prettenhofer. The module comes with complete - documentation and examples. +- New `stochastic gradient + `_ descent + module by Peter Prettenhofer. The module comes with complete + documentation and examples. - - Improved svm module: memory consumption has been reduced by 50%, - heuristic to automatically set class weights, possibility to - assign weights to samples (see - :ref:`sphx_glr_auto_examples_svm_plot_weighted_samples.py` for an example). +- Improved svm module: memory consumption has been reduced by 50%, + heuristic to automatically set class weights, possibility to + assign weights to samples (see + :ref:`sphx_glr_auto_examples_svm_plot_weighted_samples.py` for an example). - - New :ref:`gaussian_process` module by Vincent Dubourg. This module - also has great documentation and some very neat examples. See - example_gaussian_process_plot_gp_regression.py or - example_gaussian_process_plot_gp_probabilistic_classification_after_regression.py - for a taste of what can be done. +- New :ref:`gaussian_process` module by Vincent Dubourg. This module + also has great documentation and some very neat examples. See + example_gaussian_process_plot_gp_regression.py or + example_gaussian_process_plot_gp_probabilistic_classification_after_regression.py + for a taste of what can be done. - - It is now possible to use liblinear’s Multi-class SVC (option - multi_class in :class:`svm.LinearSVC`) +- It is now possible to use liblinear’s Multi-class SVC (option + multi_class in :class:`svm.LinearSVC`) - - New features and performance improvements of text feature - extraction. +- New features and performance improvements of text feature + extraction. - - Improved sparse matrix support, both in main classes - (:class:`grid_search.GridSearchCV`) as in modules - sklearn.svm.sparse and sklearn.linear_model.sparse. +- Improved sparse matrix support, both in main classes + (:class:`grid_search.GridSearchCV`) as in modules + sklearn.svm.sparse and sklearn.linear_model.sparse. - - Lots of cool new examples and a new section that uses real-world - datasets was created. These include: - :ref:`sphx_glr_auto_examples_applications_plot_face_recognition.py`, - :ref:`sphx_glr_auto_examples_applications_plot_species_distribution_modeling.py`, - :ref:`sphx_glr_auto_examples_applications_svm_gui.py`, - :ref:`sphx_glr_auto_examples_applications_wikipedia_principal_eigenvector.py` and - others. +- Lots of cool new examples and a new section that uses real-world + datasets was created. These include: + :ref:`sphx_glr_auto_examples_applications_plot_face_recognition.py`, + :ref:`sphx_glr_auto_examples_applications_plot_species_distribution_modeling.py`, + :ref:`sphx_glr_auto_examples_applications_svm_gui.py`, + :ref:`sphx_glr_auto_examples_applications_wikipedia_principal_eigenvector.py` and + others. - - Faster :ref:`least_angle_regression` algorithm. It is now 2x - faster than the R version on worst case and up to 10x times faster - on some cases. +- Faster :ref:`least_angle_regression` algorithm. It is now 2x + faster than the R version on worst case and up to 10x times faster + on some cases. - - Faster coordinate descent algorithm. In particular, the full path - version of lasso (:func:`linear_model.lasso_path`) is more than - 200x times faster than before. +- Faster coordinate descent algorithm. In particular, the full path + version of lasso (:func:`linear_model.lasso_path`) is more than + 200x times faster than before. - - It is now possible to get probability estimates from a - :class:`linear_model.LogisticRegression` model. +- It is now possible to get probability estimates from a + :class:`linear_model.LogisticRegression` model. - - module renaming: the glm module has been renamed to linear_model, - the gmm module has been included into the more general mixture - model and the sgd module has been included in linear_model. +- module renaming: the glm module has been renamed to linear_model, + the gmm module has been included into the more general mixture + model and the sgd module has been included in linear_model. - - Lots of bug fixes and documentation improvements. +- Lots of bug fixes and documentation improvements. People @@ -5300,86 +5420,86 @@ Changelog New classes ----------- - - Support for sparse matrices in some classifiers of modules - ``svm`` and ``linear_model`` (see :class:`svm.sparse.SVC`, - :class:`svm.sparse.SVR`, :class:`svm.sparse.LinearSVC`, - :class:`linear_model.sparse.Lasso`, :class:`linear_model.sparse.ElasticNet`) +- Support for sparse matrices in some classifiers of modules + ``svm`` and ``linear_model`` (see :class:`svm.sparse.SVC`, + :class:`svm.sparse.SVR`, :class:`svm.sparse.LinearSVC`, + :class:`linear_model.sparse.Lasso`, :class:`linear_model.sparse.ElasticNet`) - - New :class:`pipeline.Pipeline` object to compose different estimators. +- New :class:`pipeline.Pipeline` object to compose different estimators. - - Recursive Feature Elimination routines in module - :ref:`feature_selection`. +- Recursive Feature Elimination routines in module + :ref:`feature_selection`. - - Addition of various classes capable of cross validation in the - linear_model module (:class:`linear_model.LassoCV`, :class:`linear_model.ElasticNetCV`, - etc.). +- Addition of various classes capable of cross validation in the + linear_model module (:class:`linear_model.LassoCV`, :class:`linear_model.ElasticNetCV`, + etc.). - - New, more efficient LARS algorithm implementation. The Lasso - variant of the algorithm is also implemented. See - :class:`linear_model.lars_path`, :class:`linear_model.Lars` and - :class:`linear_model.LassoLars`. +- New, more efficient LARS algorithm implementation. The Lasso + variant of the algorithm is also implemented. See + :class:`linear_model.lars_path`, :class:`linear_model.Lars` and + :class:`linear_model.LassoLars`. - - New Hidden Markov Models module (see classes - :class:`hmm.GaussianHMM`, :class:`hmm.MultinomialHMM`, - :class:`hmm.GMMHMM`) +- New Hidden Markov Models module (see classes + :class:`hmm.GaussianHMM`, :class:`hmm.MultinomialHMM`, + :class:`hmm.GMMHMM`) - - New module feature_extraction (see :ref:`class reference - `) +- New module feature_extraction (see :ref:`class reference + `) - - New FastICA algorithm in module sklearn.fastica +- New FastICA algorithm in module sklearn.fastica Documentation ------------- - - Improved documentation for many modules, now separating - narrative documentation from the class reference. As an example, - see `documentation for the SVM module - `_ and the - complete `class reference - `_. +- Improved documentation for many modules, now separating + narrative documentation from the class reference. As an example, + see `documentation for the SVM module + `_ and the + complete `class reference + `_. Fixes ----- - - API changes: adhere variable names to PEP-8, give more - meaningful names. +- API changes: adhere variable names to PEP-8, give more + meaningful names. - - Fixes for svm module to run on a shared memory context - (multiprocessing). +- Fixes for svm module to run on a shared memory context + (multiprocessing). - - It is again possible to generate latex (and thus PDF) from the - sphinx docs. +- It is again possible to generate latex (and thus PDF) from the + sphinx docs. Examples -------- - - new examples using some of the mlcomp datasets: - ``sphx_glr_auto_examples_mlcomp_sparse_document_classification.py`` (since removed) and - :ref:`sphx_glr_auto_examples_text_document_classification_20newsgroups.py` +- new examples using some of the mlcomp datasets: + ``sphx_glr_auto_examples_mlcomp_sparse_document_classification.py`` (since removed) and + :ref:`sphx_glr_auto_examples_text_document_classification_20newsgroups.py` - - Many more examples. `See here - `_ - the full list of examples. +- Many more examples. `See here + `_ + the full list of examples. External dependencies --------------------- - - Joblib is now a dependency of this package, although it is - shipped with (sklearn.externals.joblib). +- Joblib is now a dependency of this package, although it is + shipped with (sklearn.externals.joblib). Removed modules --------------- - - Module ann (Artificial Neural Networks) has been removed from - the distribution. Users wanting this sort of algorithms should - take a look into pybrain. +- Module ann (Artificial Neural Networks) has been removed from + the distribution. Users wanting this sort of algorithms should + take a look into pybrain. Misc ---- - - New sphinx theme for the web page. +- New sphinx theme for the web page. Authors @@ -5413,37 +5533,37 @@ Changelog Major changes in this release include: - - Coordinate Descent algorithm (Lasso, ElasticNet) refactoring & - speed improvements (roughly 100x times faster). +- Coordinate Descent algorithm (Lasso, ElasticNet) refactoring & + speed improvements (roughly 100x times faster). - - Coordinate Descent Refactoring (and bug fixing) for consistency - with R's package GLMNET. +- Coordinate Descent Refactoring (and bug fixing) for consistency + with R's package GLMNET. - - New metrics module. +- New metrics module. - - New GMM module contributed by Ron Weiss. +- New GMM module contributed by Ron Weiss. - - Implementation of the LARS algorithm (without Lasso variant for now). +- Implementation of the LARS algorithm (without Lasso variant for now). - - feature_selection module redesign. +- feature_selection module redesign. - - Migration to GIT as version control system. +- Migration to GIT as version control system. - - Removal of obsolete attrselect module. +- Removal of obsolete attrselect module. - - Rename of private compiled extensions (added underscore). +- Rename of private compiled extensions (added underscore). - - Removal of legacy unmaintained code. +- Removal of legacy unmaintained code. - - Documentation improvements (both docstring and rst). +- Documentation improvements (both docstring and rst). - - Improvement of the build system to (optionally) link with MKL. - Also, provide a lite BLAS implementation in case no system-wide BLAS is - found. +- Improvement of the build system to (optionally) link with MKL. + Also, provide a lite BLAS implementation in case no system-wide BLAS is + found. - - Lots of new examples. +- Lots of new examples. - - Many, many bug fixes ... +- Many, many bug fixes ... Authors