From d382bb4e67fa7d92143509e0ed2317e25760f4be Mon Sep 17 00:00:00 2001
From: Wally <wallygauze@gmail.com>
Date: Sat, 8 Jul 2017 21:03:32 +0100
Subject: [PATCH 01/17] fixed bug (not tested), writing test

---
 sklearn/decomposition/incremental_pca.py            | 7 ++++++-
 sklearn/decomposition/tests/test_incremental_pca.py | 4 ++++
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/sklearn/decomposition/incremental_pca.py b/sklearn/decomposition/incremental_pca.py
index 9b23d1f16e1fd..9ad3d91d37f72 100644
--- a/sklearn/decomposition/incremental_pca.py
+++ b/sklearn/decomposition/incremental_pca.py
@@ -208,11 +208,16 @@ def partial_fit(self, X, y=None, check_input=True):
             self.components_ = None
 
         if self.n_components is None:
-            self.n_components_ = n_features
+            self.n_components_ = min(n_samples, n_features)
         elif not 1 <= self.n_components <= n_features:
             raise ValueError("n_components=%r invalid for n_features=%d, need "
                              "more rows than columns for IncrementalPCA "
                              "processing" % (self.n_components, n_features))
+        elif not 1 <= self.n_components <= n_samples:
+            raise ValueError("n_components=%r must be less or equal to "
+                             "the batch number of samples %d. You can change "
+                             "either one depending on what you "
+                             "want." % (self.n_components, n_samples))
         else:
             self.n_components_ = self.n_components
 
diff --git a/sklearn/decomposition/tests/test_incremental_pca.py b/sklearn/decomposition/tests/test_incremental_pca.py
index 87e7f9d7683e1..c03657c6791ca 100644
--- a/sklearn/decomposition/tests/test_incremental_pca.py
+++ b/sklearn/decomposition/tests/test_incremental_pca.py
@@ -77,6 +77,10 @@ def test_incremental_pca_validation():
     for n_components in [-1, 0, .99, 3]:
         assert_raises(ValueError, IncrementalPCA(n_components,
                                                  batch_size=10).fit, X)
+    X = [[0, 1], [1, 0]]
+    for n_components in [-1, 0, .99, 3]:
+        assert_raises(ValueError, IncrementalPCA(n_components,
+                                                 batch_size=1).fit, X)
 
 
 def test_incremental_pca_set_params():

From fcb2768b870f83b4fbec9edfca8f9f00a625bab1 Mon Sep 17 00:00:00 2001
From: Wally <wallygauze@gmail.com>
Date: Sat, 8 Jul 2017 22:15:11 +0100
Subject: [PATCH 02/17] removed lower interval comparison check from fix, more
 work on test

---
 sklearn/decomposition/incremental_pca.py            |  2 +-
 sklearn/decomposition/tests/test_incremental_pca.py | 10 +++++++---
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/sklearn/decomposition/incremental_pca.py b/sklearn/decomposition/incremental_pca.py
index 9ad3d91d37f72..e267fb8ab0fac 100644
--- a/sklearn/decomposition/incremental_pca.py
+++ b/sklearn/decomposition/incremental_pca.py
@@ -213,7 +213,7 @@ def partial_fit(self, X, y=None, check_input=True):
             raise ValueError("n_components=%r invalid for n_features=%d, need "
                              "more rows than columns for IncrementalPCA "
                              "processing" % (self.n_components, n_features))
-        elif not 1 <= self.n_components <= n_samples:
+        elif not self.n_components <= n_samples:
             raise ValueError("n_components=%r must be less or equal to "
                              "the batch number of samples %d. You can change "
                              "either one depending on what you "
diff --git a/sklearn/decomposition/tests/test_incremental_pca.py b/sklearn/decomposition/tests/test_incremental_pca.py
index c03657c6791ca..c7f9db522b497 100644
--- a/sklearn/decomposition/tests/test_incremental_pca.py
+++ b/sklearn/decomposition/tests/test_incremental_pca.py
@@ -4,6 +4,7 @@
 from sklearn.utils.testing import assert_almost_equal
 from sklearn.utils.testing import assert_array_almost_equal
 from sklearn.utils.testing import assert_raises
+from sklearn.utils.testing import assert_raises_regex
 
 from sklearn import datasets
 from sklearn.decomposition import PCA, IncrementalPCA
@@ -77,10 +78,13 @@ def test_incremental_pca_validation():
     for n_components in [-1, 0, .99, 3]:
         assert_raises(ValueError, IncrementalPCA(n_components,
                                                  batch_size=10).fit, X)
-    X = [[0, 1], [1, 0]]
     for n_components in [-1, 0, .99, 3]:
-        assert_raises(ValueError, IncrementalPCA(n_components,
-                                                 batch_size=1).fit, X)
+        X2 = [[0, 1, 0], [1, 0, 0]]
+        assert_raises_regex(ValueError,
+                            "n_components\=.* be less or equal to "
+                            "the batch number of samples .*\. You can change "
+                            "either one depending on what you want\.",
+                            IncrementalPCA(n_components).partial_fit, X2)
 
 
 def test_incremental_pca_set_params():

From d4bd366359465bfa50155c537a581ca3f7df5147 Mon Sep 17 00:00:00 2001
From: Wally <wallygauze@gmail.com>
Date: Sat, 8 Jul 2017 23:13:48 +0100
Subject: [PATCH 03/17] fix was failing another test, + finished test for fix

---
 sklearn/decomposition/incremental_pca.py      |  5 +++-
 .../tests/test_incremental_pca.py             | 23 +++++++++++--------
 2 files changed, 17 insertions(+), 11 deletions(-)

diff --git a/sklearn/decomposition/incremental_pca.py b/sklearn/decomposition/incremental_pca.py
index e267fb8ab0fac..7d8708596da0b 100644
--- a/sklearn/decomposition/incremental_pca.py
+++ b/sklearn/decomposition/incremental_pca.py
@@ -208,7 +208,10 @@ def partial_fit(self, X, y=None, check_input=True):
             self.components_ = None
 
         if self.n_components is None:
-            self.n_components_ = min(n_samples, n_features)
+            if self.components_ is None:
+                self.n_components_ = min(n_samples, n_features)
+            else:
+                self.n_components_ = self.components_.shape[0]
         elif not 1 <= self.n_components <= n_features:
             raise ValueError("n_components=%r invalid for n_features=%d, need "
                              "more rows than columns for IncrementalPCA "
diff --git a/sklearn/decomposition/tests/test_incremental_pca.py b/sklearn/decomposition/tests/test_incremental_pca.py
index c7f9db522b497..5f23f3440e42c 100644
--- a/sklearn/decomposition/tests/test_incremental_pca.py
+++ b/sklearn/decomposition/tests/test_incremental_pca.py
@@ -74,17 +74,20 @@ def test_incremental_pca_inverse():
 
 def test_incremental_pca_validation():
     # Test that n_components is >=1 and <= n_features.
-    X = [[0, 1], [1, 0]]
-    for n_components in [-1, 0, .99, 3]:
-        assert_raises(ValueError, IncrementalPCA(n_components,
-                                                 batch_size=10).fit, X)
-    for n_components in [-1, 0, .99, 3]:
-        X2 = [[0, 1, 0], [1, 0, 0]]
+    X = [[0, 1, 0], [1, 0, 0]]
+    for n_components in [-1, 0, .99, 4]:
         assert_raises_regex(ValueError,
-                            "n_components\=.* be less or equal to "
-                            "the batch number of samples .*\. You can change "
-                            "either one depending on what you want\.",
-                            IncrementalPCA(n_components).partial_fit, X2)
+                            "n_components\=.* invalid for n_features\=.*, need"
+                            " more rows than columns for IncrementalPCA "
+                            "processing",
+                            IncrementalPCA(n_components, batch_size=10).fit, X)
+
+    # Tests that n_components is also <= n_samples.
+    assert_raises_regex(ValueError,
+                        "n_components\=.* be less or equal to "
+                        "the batch number of samples .*\. You can change "
+                        "either one depending on what you want\.",
+                        IncrementalPCA(n_components=3).partial_fit, X)
 
 
 def test_incremental_pca_set_params():

From 2cff58d02ef6d450504f9108e9dd09742beb6955 Mon Sep 17 00:00:00 2001
From: Wally <wallygauze@gmail.com>
Date: Fri, 14 Jul 2017 15:58:43 +0100
Subject: [PATCH 04/17] Revert "Merge branch 'master' of
 https://github.com/scikit-learn/scikit-learn into n_samples6452"

This reverts commit 71c5a730c9c43cafe4bea38c18a65e61277dd7a7, reversing
changes made to d4bd366359465bfa50155c537a581ca3f7df5147.
---
 doc/whats_new.rst                               |  5 +----
 sklearn/datasets/kddcup99.py                    |  6 +++++-
 sklearn/linear_model/ridge.py                   |  3 +--
 sklearn/linear_model/tests/test_ridge.py        | 11 -----------
 sklearn/model_selection/_split.py               |  2 +-
 sklearn/neural_network/multilayer_perceptron.py |  2 +-
 6 files changed, 9 insertions(+), 20 deletions(-)

diff --git a/doc/whats_new.rst b/doc/whats_new.rst
index 3c87d4174c388..0c5608d6b5970 100644
--- a/doc/whats_new.rst
+++ b/doc/whats_new.rst
@@ -471,13 +471,10 @@ Bug fixes
      by :user:`Andre Ambrosio Boechat <boechat107>`, :user:`Utkarsh Upadhyay
      <musically-ut>`, and `Joel Nothman`_.
 
+
    - Add ``data_home`` parameter to
      :func:`sklearn.datasets.fetch_kddcup99` by `Loic Esteve`_.
 
-   - Fix inconsistent results between :class:`linear_model.RidgeCV`
-     and :class:`linear_model.Ridge` when using ``normalize=True``
-     by `Alexandre Gramfort`_.
-
 API changes summary
 -------------------
 
diff --git a/sklearn/datasets/kddcup99.py b/sklearn/datasets/kddcup99.py
index 6d52c5b6214b2..89c74238bc4f3 100644
--- a/sklearn/datasets/kddcup99.py
+++ b/sklearn/datasets/kddcup99.py
@@ -222,7 +222,7 @@ def fetch_kddcup99(subset=None, data_home=None, shuffle=False,
     return Bunch(data=data, target=target)
 
 
-def _fetch_brute_kddcup99(data_home=None,
+def _fetch_brute_kddcup99(subset=None, data_home=None,
                           download_if_missing=True, random_state=None,
                           shuffle=False, percent10=True):
 
@@ -230,6 +230,10 @@ def _fetch_brute_kddcup99(data_home=None,
 
     Parameters
     ----------
+    subset : None, 'SA', 'SF', 'http', 'smtp'
+        To return the corresponding classical subsets of kddcup 99.
+        If None, return the entire kddcup 99 dataset.
+
     data_home : string, optional
         Specify another download and cache folder for the datasets. By default
         all scikit-learn data is stored in '~/scikit_learn_data' subfolders.
diff --git a/sklearn/linear_model/ridge.py b/sklearn/linear_model/ridge.py
index caf2f9eed64c2..e0c7b6f188037 100644
--- a/sklearn/linear_model/ridge.py
+++ b/sklearn/linear_model/ridge.py
@@ -1119,8 +1119,7 @@ def fit(self, X, y, sample_weight=None):
                 raise ValueError("cv!=None and store_cv_values=True "
                                  " are incompatible")
             parameters = {'alpha': self.alphas}
-            gs = GridSearchCV(Ridge(fit_intercept=self.fit_intercept,
-                                    normalize=self.normalize),
+            gs = GridSearchCV(Ridge(fit_intercept=self.fit_intercept),
                               parameters, cv=self.cv, scoring=self.scoring)
             gs.fit(X, y, sample_weight=sample_weight)
             estimator = gs.best_estimator_
diff --git a/sklearn/linear_model/tests/test_ridge.py b/sklearn/linear_model/tests/test_ridge.py
index ee44da5d56b86..4879e02deff50 100644
--- a/sklearn/linear_model/tests/test_ridge.py
+++ b/sklearn/linear_model/tests/test_ridge.py
@@ -383,16 +383,6 @@ def _test_ridge_loo(filter_):
     return ret
 
 
-def _test_ridge_cv_normalize(filter_):
-    ridge_cv = RidgeCV(normalize=True, cv=3)
-    ridge_cv.fit(filter_(10. * X_diabetes), y_diabetes)
-
-    gs = GridSearchCV(Ridge(normalize=True), cv=3,
-                      param_grid={'alpha': ridge_cv.alphas})
-    gs.fit(filter_(10. * X_diabetes), y_diabetes)
-    assert_equal(gs.best_estimator_.alpha, ridge_cv.alpha_)
-
-
 def _test_ridge_cv(filter_):
     ridge_cv = RidgeCV()
     ridge_cv.fit(filter_(X_diabetes), y_diabetes)
@@ -472,7 +462,6 @@ def check_dense_sparse(test_func):
 def test_dense_sparse():
     for test_func in (_test_ridge_loo,
                       _test_ridge_cv,
-                      _test_ridge_cv_normalize,
                       _test_ridge_diabetes,
                       _test_multi_ridge_diabetes,
                       _test_ridge_classifiers,
diff --git a/sklearn/model_selection/_split.py b/sklearn/model_selection/_split.py
index 4bcc0ae1c5349..3f228e85c43e8 100644
--- a/sklearn/model_selection/_split.py
+++ b/sklearn/model_selection/_split.py
@@ -566,7 +566,7 @@ class StratifiedKFold(_BaseKFold):
     def __init__(self, n_splits=3, shuffle=False, random_state=None):
         super(StratifiedKFold, self).__init__(n_splits, shuffle, random_state)
 
-    def _make_test_folds(self, X, y=None):
+    def _make_test_folds(self, X, y=None, groups=None):
         if self.shuffle:
             rng = check_random_state(self.random_state)
         else:
diff --git a/sklearn/neural_network/multilayer_perceptron.py b/sklearn/neural_network/multilayer_perceptron.py
index d4adfd9107f6e..ec1196a3e2ac6 100644
--- a/sklearn/neural_network/multilayer_perceptron.py
+++ b/sklearn/neural_network/multilayer_perceptron.py
@@ -640,7 +640,7 @@ def partial_fit(self):
                                  % self.solver)
         return self._partial_fit
 
-    def _partial_fit(self, X, y):
+    def _partial_fit(self, X, y, classes=None):
         return self._fit(X, y, incremental=True)
 
     def _predict(self, X):

From 5b250ce3484cec5dcfc0c54bdd5c3971beb2644f Mon Sep 17 00:00:00 2001
From: wallygauze <wallygauze@yahoo.fr>
Date: Fri, 14 Jul 2017 17:51:50 +0100
Subject: [PATCH 05/17] Correcting side-effects from reverting merge

---
 sklearn/datasets/kddcup99.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/sklearn/datasets/kddcup99.py b/sklearn/datasets/kddcup99.py
index 4c05c7fdf8886..56cf3c4181c7c 100644
--- a/sklearn/datasets/kddcup99.py
+++ b/sklearn/datasets/kddcup99.py
@@ -222,7 +222,7 @@ def fetch_kddcup99(subset=None, data_home=None, shuffle=False,
     return Bunch(data=data, target=target)
 
 
-def _fetch_brute_kddcup99(subset=None, data_home=None,
+def _fetch_brute_kddcup99(data_home=None,
                           download_if_missing=True, random_state=None,
                           shuffle=False, percent10=True):
 
@@ -230,10 +230,6 @@ def _fetch_brute_kddcup99(subset=None, data_home=None,
 
     Parameters
     ----------
-    subset : None, 'SA', 'SF', 'http', 'smtp'
-        To return the corresponding classical subsets of kddcup 99.
-        If None, return the entire kddcup 99 dataset.
-
     data_home : string, optional
         Specify another download and cache folder for the datasets. By default
         all scikit-learn data is stored in '~/scikit_learn_data' subfolders.

From c508034ff4a31c6b430e0f1997334f8cc317e6b7 Mon Sep 17 00:00:00 2001
From: wallygauze <wallygauze@yahoo.fr>
Date: Fri, 14 Jul 2017 17:55:38 +0100
Subject: [PATCH 06/17] Correction number 2

---
 sklearn/linear_model/ridge.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sklearn/linear_model/ridge.py b/sklearn/linear_model/ridge.py
index 6bfcad836c5b8..3e584a78ad93a 100644
--- a/sklearn/linear_model/ridge.py
+++ b/sklearn/linear_model/ridge.py
@@ -1120,7 +1120,8 @@ def fit(self, X, y, sample_weight=None):
                 raise ValueError("cv!=None and store_cv_values=True "
                                  " are incompatible")
             parameters = {'alpha': self.alphas}
-            gs = GridSearchCV(Ridge(fit_intercept=self.fit_intercept),
+            gs = GridSearchCV(Ridge(fit_intercept=self.fit_intercept,
+                                    normalize=self.normalize),
                               parameters, cv=self.cv, scoring=self.scoring)
             gs.fit(X, y, sample_weight=sample_weight)
             estimator = gs.best_estimator_

From e6b38e34b9bf085c98654dceece494d369c0c9ed Mon Sep 17 00:00:00 2001
From: wallygauze <wallygauze@yahoo.fr>
Date: Fri, 14 Jul 2017 17:58:57 +0100
Subject: [PATCH 07/17] Correction number 3

---
 sklearn/linear_model/tests/test_ridge.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/sklearn/linear_model/tests/test_ridge.py b/sklearn/linear_model/tests/test_ridge.py
index 4879e02deff50..6cfce63464569 100644
--- a/sklearn/linear_model/tests/test_ridge.py
+++ b/sklearn/linear_model/tests/test_ridge.py
@@ -383,6 +383,15 @@ def _test_ridge_loo(filter_):
     return ret
 
 
+def _test_ridge_cv_normalize(filter_):
+    ridge_cv = RidgeCV(normalize=True, cv=3)
+    ridge_cv.fit(filter_(10. * X_diabetes), y_diabetes)
+ 
+    gs = GridSearchCV(Ridge(normalize=True), cv=3,
+                      param_grid={'alpha': ridge_cv.alphas})
+    gs.fit(filter_(10. * X_diabetes), y_diabetes)
+    assert_equal(gs.best_estimator_.alpha, ridge_cv.alpha_)
+ 
 def _test_ridge_cv(filter_):
     ridge_cv = RidgeCV()
     ridge_cv.fit(filter_(X_diabetes), y_diabetes)

From 93f73013ab57a995236388d97508d662294b40da Mon Sep 17 00:00:00 2001
From: wallygauze <wallygauze@yahoo.fr>
Date: Fri, 14 Jul 2017 18:05:06 +0100
Subject: [PATCH 08/17] Correction number 4

---
 sklearn/linear_model/tests/test_ridge.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/sklearn/linear_model/tests/test_ridge.py b/sklearn/linear_model/tests/test_ridge.py
index 6cfce63464569..ee44da5d56b86 100644
--- a/sklearn/linear_model/tests/test_ridge.py
+++ b/sklearn/linear_model/tests/test_ridge.py
@@ -386,12 +386,13 @@ def _test_ridge_loo(filter_):
 def _test_ridge_cv_normalize(filter_):
     ridge_cv = RidgeCV(normalize=True, cv=3)
     ridge_cv.fit(filter_(10. * X_diabetes), y_diabetes)
- 
+
     gs = GridSearchCV(Ridge(normalize=True), cv=3,
                       param_grid={'alpha': ridge_cv.alphas})
     gs.fit(filter_(10. * X_diabetes), y_diabetes)
     assert_equal(gs.best_estimator_.alpha, ridge_cv.alpha_)
- 
+
+
 def _test_ridge_cv(filter_):
     ridge_cv = RidgeCV()
     ridge_cv.fit(filter_(X_diabetes), y_diabetes)
@@ -471,6 +472,7 @@ def check_dense_sparse(test_func):
 def test_dense_sparse():
     for test_func in (_test_ridge_loo,
                       _test_ridge_cv,
+                      _test_ridge_cv_normalize,
                       _test_ridge_diabetes,
                       _test_multi_ridge_diabetes,
                       _test_ridge_classifiers,

From 1acfd8baa43ae5f9eb78dace35f3349d1fd01ea4 Mon Sep 17 00:00:00 2001
From: wallygauze <wallygauze@yahoo.fr>
Date: Fri, 14 Jul 2017 18:08:01 +0100
Subject: [PATCH 09/17] Correction number 5

---
 sklearn/model_selection/_split.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/model_selection/_split.py b/sklearn/model_selection/_split.py
index 3f228e85c43e8..4bcc0ae1c5349 100644
--- a/sklearn/model_selection/_split.py
+++ b/sklearn/model_selection/_split.py
@@ -566,7 +566,7 @@ class StratifiedKFold(_BaseKFold):
     def __init__(self, n_splits=3, shuffle=False, random_state=None):
         super(StratifiedKFold, self).__init__(n_splits, shuffle, random_state)
 
-    def _make_test_folds(self, X, y=None, groups=None):
+    def _make_test_folds(self, X, y=None):
         if self.shuffle:
             rng = check_random_state(self.random_state)
         else:

From 289a8ac93e6b704f87276be39c300378f79b2734 Mon Sep 17 00:00:00 2001
From: wallygauze <wallygauze@yahoo.fr>
Date: Fri, 14 Jul 2017 18:13:51 +0100
Subject: [PATCH 10/17] Last Correction

---
 sklearn/neural_network/multilayer_perceptron.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/neural_network/multilayer_perceptron.py b/sklearn/neural_network/multilayer_perceptron.py
index 735021a06e532..af1eca3b201d5 100644
--- a/sklearn/neural_network/multilayer_perceptron.py
+++ b/sklearn/neural_network/multilayer_perceptron.py
@@ -640,7 +640,7 @@ def partial_fit(self):
                                  % self.solver)
         return self._partial_fit
 
-    def _partial_fit(self, X, y, classes=None):
+    def _partial_fit(self, X, y):
         return self._fit(X, y, incremental=True)
 
     def _predict(self, X):

From be5ac2d0b8cf02d15f4788479214d7df94167b59 Mon Sep 17 00:00:00 2001
From: Wally <wallygauze@gmail.com>
Date: Mon, 17 Jul 2017 05:20:31 +0100
Subject: [PATCH 11/17] added regression tests for n_comp=None case in
 incremental pca

---
 .../tests/test_incremental_pca.py             | 27 +++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/sklearn/decomposition/tests/test_incremental_pca.py b/sklearn/decomposition/tests/test_incremental_pca.py
index 5f23f3440e42c..3ac37f759e92c 100644
--- a/sklearn/decomposition/tests/test_incremental_pca.py
+++ b/sklearn/decomposition/tests/test_incremental_pca.py
@@ -90,6 +90,33 @@ def test_incremental_pca_validation():
                         IncrementalPCA(n_components=3).partial_fit, X)
 
 
+def test_n_components_none():
+    # Ensures that n_components == None is handled correctly
+    rng = np.random.RandomState(1999)
+    for n_samples, n_features in [(50, 10), (10, 50)]:
+
+        ipca = IncrementalPCA(n_components=None)
+
+        for partial_fit_call in [1, 2]:
+            X = rng.rand(n_samples, n_features)
+
+            if not hasattr(ipca, 'components_'):  # first call to partial_fit
+
+                ipca.partial_fit(X)
+                if not ipca.n_components_ == min(X.shape):
+                    raise AssertionError('n_components=None did default to'
+                                         ' the choice of the minimum between '
+                                         'the batch number of samples and the '
+                                         'number of features.')
+            else:
+
+                ipca.partial_fit(X)
+                if not ipca.n_components_ == ipca.components_.shape[0]:
+                    raise AssertionError('For n_components=None, the value'
+                                         ' assigned has changed between calls '
+                                         'to partial_fit.')
+
+
 def test_incremental_pca_set_params():
     # Test that components_ sign is stable over batch sizes.
     rng = np.random.RandomState(1999)

From eee25b3ffa8af14c0c4bb5da1f3e04a4918ea167 Mon Sep 17 00:00:00 2001
From: Wally <wallygauze@gmail.com>
Date: Mon, 17 Jul 2017 07:35:24 +0100
Subject: [PATCH 12/17] some lines were never used, turned to code better for
 coverage

---
 sklearn/decomposition/tests/test_incremental_pca.py | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/sklearn/decomposition/tests/test_incremental_pca.py b/sklearn/decomposition/tests/test_incremental_pca.py
index 3ac37f759e92c..00249ee26f04a 100644
--- a/sklearn/decomposition/tests/test_incremental_pca.py
+++ b/sklearn/decomposition/tests/test_incremental_pca.py
@@ -103,18 +103,12 @@ def test_n_components_none():
             if not hasattr(ipca, 'components_'):  # first call to partial_fit
 
                 ipca.partial_fit(X)
-                if not ipca.n_components_ == min(X.shape):
-                    raise AssertionError('n_components=None did default to'
-                                         ' the choice of the minimum between '
-                                         'the batch number of samples and the '
-                                         'number of features.')
+                assert ipca.n_components_ == min(X.shape)
+
             else:
 
                 ipca.partial_fit(X)
-                if not ipca.n_components_ == ipca.components_.shape[0]:
-                    raise AssertionError('For n_components=None, the value'
-                                         ' assigned has changed between calls '
-                                         'to partial_fit.')
+                assert ipca.n_components_ == ipca.components_.shape[0]
 
 
 def test_incremental_pca_set_params():

From 46fd39273dc39517265f0ac6f6a4a4b950669f51 Mon Sep 17 00:00:00 2001
From: wallygauze <wallygauze@yahoo.fr>
Date: Mon, 24 Jul 2017 18:57:57 +0100
Subject: [PATCH 13/17] Update whats_new.rst

---
 doc/whats_new.rst | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/doc/whats_new.rst b/doc/whats_new.rst
index 9f5a8f5c914ad..aabec2c842bce 100644
--- a/doc/whats_new.rst
+++ b/doc/whats_new.rst
@@ -526,6 +526,11 @@ Decomposition, manifold learning and clustering
    - Fix bug where :mod:`mixture` ``sample`` methods did not return as many
      samples as requested. :issue:`7702` by :user:`Levi John Wolf <ljwolf>`.
 
+   - Fix for uninformative error in :class:`decomposition.incremental_pca`:
+     now an error is raised if the number of components is larger than the
+     chosen batch size. The ``n_components=None`` case was adapted accordingly.
+     :issue:`6452`. By :user:`Wally Gauze <wallygauze>`.
+     
 Preprocessing and feature selection
 
    - For sparse matrices, :func:`preprocessing.normalize` with ``return_norm=True``

From a7555542041641b9b5af90229dc14f7edeb32136 Mon Sep 17 00:00:00 2001
From: wallygauze <wallygauze@yahoo.fr>
Date: Tue, 25 Jul 2017 10:58:34 +0100
Subject: [PATCH 14/17] modifying error message (part 1)

---
 sklearn/decomposition/incremental_pca.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/sklearn/decomposition/incremental_pca.py b/sklearn/decomposition/incremental_pca.py
index 54be6e21e3cf4..101b42feaf31c 100644
--- a/sklearn/decomposition/incremental_pca.py
+++ b/sklearn/decomposition/incremental_pca.py
@@ -220,9 +220,8 @@ def partial_fit(self, X, y=None, check_input=True):
                              "processing" % (self.n_components, n_features))
         elif not self.n_components <= n_samples:
             raise ValueError("n_components=%r must be less or equal to "
-                             "the batch number of samples %d. You can change "
-                             "either one depending on what you "
-                             "want." % (self.n_components, n_samples))
+                             "the batch number of samples "
+                             "%d." % (self.n_components, n_samples))
         else:
             self.n_components_ = self.n_components
 

From 522ebe0bf1a72a8c941e3911727f9c397fb45f02 Mon Sep 17 00:00:00 2001
From: wallygauze <wallygauze@yahoo.fr>
Date: Tue, 25 Jul 2017 11:00:26 +0100
Subject: [PATCH 15/17] modifying error message part2

---
 sklearn/decomposition/tests/test_incremental_pca.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/sklearn/decomposition/tests/test_incremental_pca.py b/sklearn/decomposition/tests/test_incremental_pca.py
index 00249ee26f04a..7565279d63da4 100644
--- a/sklearn/decomposition/tests/test_incremental_pca.py
+++ b/sklearn/decomposition/tests/test_incremental_pca.py
@@ -85,8 +85,7 @@ def test_incremental_pca_validation():
     # Tests that n_components is also <= n_samples.
     assert_raises_regex(ValueError,
                         "n_components\=.* be less or equal to "
-                        "the batch number of samples .*\. You can change "
-                        "either one depending on what you want\.",
+                        "the batch number of samples .*\.",
                         IncrementalPCA(n_components=3).partial_fit, X)
 
 

From 5bdc0f3be5fa18cbb2f9f39d715beecb66d8f700 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Thu, 3 Aug 2017 12:05:00 +0200
Subject: [PATCH 16/17] Minor improvements in test_pca.py

---
 .../tests/test_incremental_pca.py             | 37 ++++++++++---------
 1 file changed, 19 insertions(+), 18 deletions(-)

diff --git a/sklearn/decomposition/tests/test_incremental_pca.py b/sklearn/decomposition/tests/test_incremental_pca.py
index 7565279d63da4..e256ca68872aa 100644
--- a/sklearn/decomposition/tests/test_incremental_pca.py
+++ b/sklearn/decomposition/tests/test_incremental_pca.py
@@ -74,40 +74,41 @@ def test_incremental_pca_inverse():
 
 def test_incremental_pca_validation():
     # Test that n_components is >=1 and <= n_features.
-    X = [[0, 1, 0], [1, 0, 0]]
+    X = np.array([[0, 1, 0], [1, 0, 0]])
+    n_samples, n_features = X.shape
     for n_components in [-1, 0, .99, 4]:
         assert_raises_regex(ValueError,
-                            "n_components\=.* invalid for n_features\=.*, need"
+                            "n_components={} invalid for n_features={}, need"
                             " more rows than columns for IncrementalPCA "
-                            "processing",
+                            "processing".format(n_components, n_features),
                             IncrementalPCA(n_components, batch_size=10).fit, X)
 
     # Tests that n_components is also <= n_samples.
+    n_components = 3
     assert_raises_regex(ValueError,
-                        "n_components\=.* be less or equal to "
-                        "the batch number of samples .*\.",
-                        IncrementalPCA(n_components=3).partial_fit, X)
+                        "n_components={} must be less or equal to "
+                        "the batch number of samples {}".format(
+                            n_components, n_samples),
+                        IncrementalPCA(
+                            n_components=n_components).partial_fit, X)
 
 
 def test_n_components_none():
     # Ensures that n_components == None is handled correctly
     rng = np.random.RandomState(1999)
     for n_samples, n_features in [(50, 10), (10, 50)]:
-
+        X = rng.rand(n_samples, n_features)
         ipca = IncrementalPCA(n_components=None)
 
-        for partial_fit_call in [1, 2]:
-            X = rng.rand(n_samples, n_features)
-
-            if not hasattr(ipca, 'components_'):  # first call to partial_fit
-
-                ipca.partial_fit(X)
-                assert ipca.n_components_ == min(X.shape)
-
-            else:
+        # First partial_fit call, ipca.n_components_ is inferred from
+        # min(X.shape)
+        ipca.partial_fit(X)
+        assert ipca.n_components_ == min(X.shape)
 
-                ipca.partial_fit(X)
-                assert ipca.n_components_ == ipca.components_.shape[0]
+        # Second partial_fit call, ipca.n_components_ is inferred from
+        # ipca.components_ computed from the first partial_fit call
+        ipca.partial_fit(X)
+        assert ipca.n_components_ == ipca.components_.shape[0]
 
 
 def test_incremental_pca_set_params():

From d15c6012a384c67787e4d7704b98454434446d56 Mon Sep 17 00:00:00 2001
From: Wally <wallygauze@gmail.com>
Date: Mon, 14 Aug 2017 12:56:36 +0100
Subject: [PATCH 17/17] moved entry to 0.20

---
 doc/whats_new.rst | 6632 +++++++++++++++++++++++----------------------
 1 file changed, 3376 insertions(+), 3256 deletions(-)

diff --git a/doc/whats_new.rst b/doc/whats_new.rst
index aabec2c842bce..a79df7a911586 100644
--- a/doc/whats_new.rst
+++ b/doc/whats_new.rst
@@ -5,10 +5,39 @@
 Release history
 ===============
 
+Version 0.20 (under development)
+================================
+
+Changed models
+--------------
+
+Changelog
+---------
+
+New features
+............
+
+Classifiers and regressors
+
+- :class:`ensemble.GradientBoostingClassifier` and
+  :class:`ensemble.GradientBoostingRegressor` now support early stopping
+  via ``n_iter_no_change``, ``validation_fraction`` and ``tol``. :issue:`7071`
+  by `Raghav RV`_
+
+Bug fixes
+.........
+
+Decomposition, manifold learning and clustering
+
+- Fix for uninformative error in :class:`decomposition.incremental_pca`:
+  now an error is raised if the number of components is larger than the
+  chosen batch size. The ``n_components=None`` case was adapted accordingly.
+  :issue:`6452`. By :user:`Wally Gauze <wallygauze>`.
+
 Version 0.19
 ============
 
-**In Development**
+**Release Candidate (0.19b2) July 17, 2017**
 
 Highlights
 ----------
@@ -22,18 +51,18 @@ algorithms in existing estimators, such as multiplicative update in
 :class:`decomposition.NMF` and multinomial
 :class:`linear_model.LogisticRegression` with L1 loss (use ``solver='saga'``).
 
-You can also learn faster.  For instance, the :ref:`new option to cache
-transformations <pipeline_cache>` in :class:`pipeline.Pipeline` makes grid
-search over pipelines including slow transformations much more efficient.  And
-you can predict faster: if you're sure you know what you're doing, you can turn
-off validating that the input is finite using :func:`config_context`.
-
 Cross validation is now able to return the results from multiple metric
 evaluations. The new :func:`model_selection.cross_validate` can return many
 scores on the test data as well as training set performance and timings, and we
 have extended the ``scoring`` and ``refit`` parameters for grid/randomized
 search :ref:`to handle multiple metrics <multimetric_grid_search>`.
 
+You can also learn faster.  For instance, the :ref:`new option to cache
+transformations <pipeline_cache>` in :class:`pipeline.Pipeline` makes grid
+search over pipelines including slow transformations much more efficient.  And
+you can predict faster: if you're sure you know what you're doing, you can turn
+off validating that the input is finite using :func:`config_context`.
+
 We've made some important fixes too.  We've fixed a longstanding implementation
 error in :func:`metrics.average_precision_score`, so please be cautious with
 prior results reported from that function.  A number of errors in the
@@ -51,21 +80,22 @@ parameters, may produce different models from the previous version. This often
 occurs due to changes in the modelling logic (bug fixes or enhancements), or in
 random sampling procedures.
 
-   * :class:`cluster.KMeans` with sparse X and initial centroids given (bug fix)
-   * :class:`cross_decomposition.PLSRegression`
-     with ``scale=True`` (bug fix)
-   * :class:`ensemble.GradientBoostingClassifier` and
-     :class:`ensemble.GradientBoostingRegressor` where ``min_impurity_split`` is used (bug fix)
-   * gradient boosting ``loss='quantile'`` (bug fix)
-   * :class:`ensemble.IsolationForest` (bug fix)
-   * :class:`feature_selection.SelectFdr` (bug fix)
-   * :class:`linear_model.RANSACRegressor` (bug fix)
-   * :class:`linear_model.LassoLars` (bug fix)
-   * :class:`linear_model.LassoLarsIC` (bug fix)
-   * :class:`manifold.TSNE` (bug fix)
-   * :class:`semi_supervised.LabelSpreading` (bug fix)
-   * :class:`semi_supervised.LabelPropagation` (bug fix)
-   * tree based models where ``min_weight_fraction_leaf`` is used (enhancement)
+- :class:`cluster.KMeans` with sparse X and initial centroids given (bug fix)
+- :class:`cross_decomposition.PLSRegression`
+  with ``scale=True`` (bug fix)
+- :class:`ensemble.GradientBoostingClassifier` and
+  :class:`ensemble.GradientBoostingRegressor` where ``min_impurity_split`` is used (bug fix)
+- gradient boosting ``loss='quantile'`` (bug fix)
+- :class:`ensemble.IsolationForest` (bug fix)
+- :class:`feature_selection.SelectFdr` (bug fix)
+- :class:`linear_model.RANSACRegressor` (bug fix)
+- :class:`linear_model.LassoLars` (bug fix)
+- :class:`linear_model.LassoLarsIC` (bug fix)
+- :class:`manifold.TSNE` (bug fix)
+- :class:`neighbors.NearestCentroid` (bug fix)
+- :class:`semi_supervised.LabelSpreading` (bug fix)
+- :class:`semi_supervised.LabelPropagation` (bug fix)
+- tree based models where ``min_weight_fraction_leaf`` is used (enhancement)
 
 Details are listed in the changelog below.
 
@@ -80,759 +110,849 @@ New features
 
 Classifiers and regressors
 
-   - Added :class:`multioutput.ClassifierChain` for multi-label
-     classification. By `Adam Kleczewski <adamklec>`_.
+- Added :class:`multioutput.ClassifierChain` for multi-label
+  classification. By `Adam Kleczewski <adamklec>`_.
 
-   - Added solver ``'saga'`` that implements the improved version of Stochastic
-     Average Gradient, in :class:`linear_model.LogisticRegression` and
-     :class:`linear_model.Ridge`. It allows the use of L1 penalty with
-     multinomial logistic loss, and behaves marginally better than 'sag'
-     during the first epochs of ridge and logistic regression.
-     :issue:`8446` by `Arthur Mensch`_.
+- Added solver ``'saga'`` that implements the improved version of Stochastic
+  Average Gradient, in :class:`linear_model.LogisticRegression` and
+  :class:`linear_model.Ridge`. It allows the use of L1 penalty with
+  multinomial logistic loss, and behaves marginally better than 'sag'
+  during the first epochs of ridge and logistic regression.
+  :issue:`8446` by `Arthur Mensch`_.
 
 Other estimators
 
-   - Added the :class:`neighbors.LocalOutlierFactor` class for anomaly
-     detection based on nearest neighbors.
-     :issue:`5279` by `Nicolas Goix`_ and `Alexandre Gramfort`_.
+- Added the :class:`neighbors.LocalOutlierFactor` class for anomaly
+  detection based on nearest neighbors.
+  :issue:`5279` by `Nicolas Goix`_ and `Alexandre Gramfort`_.
 
-   - Added :class:`preprocessing.QuantileTransformer` class and
-     :func:`preprocessing.quantile_transform` function for features
-     normalization based on quantiles.
-     :issue:`8363` by :user:`Denis Engemann <dengemann>`,
-     :user:`Guillaume Lemaitre <glemaitre>`, `Olivier Grisel`_, `Raghav RV`_,
-     :user:`Thierry Guillemot <tguillemot>`, and `Gael Varoquaux`_.
+- Added :class:`preprocessing.QuantileTransformer` class and
+  :func:`preprocessing.quantile_transform` function for features
+  normalization based on quantiles.
+  :issue:`8363` by :user:`Denis Engemann <dengemann>`,
+  :user:`Guillaume Lemaitre <glemaitre>`, `Olivier Grisel`_, `Raghav RV`_,
+  :user:`Thierry Guillemot <tguillemot>`, and `Gael Varoquaux`_.
 
-   - The new solver ``'mu'`` implements a Multiplicate Update in
-     :class:`decomposition.NMF`, allowing the optimization of all
-     beta-divergences, including the Frobenius norm, the generalized
-     Kullback-Leibler divergence and the Itakura-Saito divergence.
-     :issue:`5295` by `Tom Dupre la Tour`_.
+- The new solver ``'mu'`` implements a Multiplicate Update in
+  :class:`decomposition.NMF`, allowing the optimization of all
+  beta-divergences, including the Frobenius norm, the generalized
+  Kullback-Leibler divergence and the Itakura-Saito divergence.
+  :issue:`5295` by `Tom Dupre la Tour`_.
 
 Model selection and evaluation
 
-   - :class:`model_selection.GridSearchCV` and
-     :class:`model_selection.RandomizedSearchCV` now support simultaneous
-     evaluation of multiple metrics. Refer to the
-     :ref:`multimetric_grid_search` section of the user guide for more
-     information. :issue:`7388` by `Raghav RV`_
-
-   - Added the :func:`model_selection.cross_validate` which allows evaluation
-     of multiple metrics. This function returns a dict with more useful
-     information from cross-validation such as the train scores, fit times and
-     score times.
-     Refer to :ref:`multimetric_cross_validation` section of the userguide
-     for more information. :issue:`7388` by `Raghav RV`_
-
-   - Added :func:`metrics.mean_squared_log_error`, which computes
-     the mean square error of the logarithmic transformation of targets,
-     particularly useful for targets with an exponential trend.
-     :issue:`7655` by :user:`Karan Desai <karandesai-96>`.
-
-   - Added :func:`metrics.dcg_score` and :func:`metrics.ndcg_score`, which
-     compute Discounted cumulative gain (DCG) and Normalized discounted
-     cumulative gain (NDCG).
-     :issue:`7739` by :user:`David Gasquez <davidgasquez>`.
-
-   - Added the :class:`model_selection.RepeatedKFold` and
-     :class:`model_selection.RepeatedStratifiedKFold`.
-     :issue:`8120` by `Neeraj Gangwar`_.
+- :class:`model_selection.GridSearchCV` and
+  :class:`model_selection.RandomizedSearchCV` now support simultaneous
+  evaluation of multiple metrics. Refer to the
+  :ref:`multimetric_grid_search` section of the user guide for more
+  information. :issue:`7388` by `Raghav RV`_
+
+- Added the :func:`model_selection.cross_validate` which allows evaluation
+  of multiple metrics. This function returns a dict with more useful
+  information from cross-validation such as the train scores, fit times and
+  score times.
+  Refer to :ref:`multimetric_cross_validation` section of the userguide
+  for more information. :issue:`7388` by `Raghav RV`_
+
+- Added :func:`metrics.mean_squared_log_error`, which computes
+  the mean square error of the logarithmic transformation of targets,
+  particularly useful for targets with an exponential trend.
+  :issue:`7655` by :user:`Karan Desai <karandesai-96>`.
+
+- Added :func:`metrics.dcg_score` and :func:`metrics.ndcg_score`, which
+  compute Discounted cumulative gain (DCG) and Normalized discounted
+  cumulative gain (NDCG).
+  :issue:`7739` by :user:`David Gasquez <davidgasquez>`.
+
+- Added the :class:`model_selection.RepeatedKFold` and
+  :class:`model_selection.RepeatedStratifiedKFold`.
+  :issue:`8120` by `Neeraj Gangwar`_.
+
+- Added a scorer based on :class:`metrics.explained_variance_score`.
+  :issue:`9259` by `Hanmin Qin <https://github.com/qinhanmin2014>`_.
 
 Miscellaneous
 
-   - Validation that input data contains no NaN or inf can now be suppressed
-     using :func:`config_context`, at your own risk. This will save on runtime,
-     and may be particularly useful for prediction time. :issue:`7548` by
-     `Joel Nothman`_.
+- Validation that input data contains no NaN or inf can now be suppressed
+  using :func:`config_context`, at your own risk. This will save on runtime,
+  and may be particularly useful for prediction time. :issue:`7548` by
+  `Joel Nothman`_.
 
-   - Added a test to ensure parameter listing in docstrings match the
-     function/class signature. :issue:`9206` by `Alexandre Gramfort`_ and
-     `Raghav RV`_.
+- Added a test to ensure parameter listing in docstrings match the
+  function/class signature. :issue:`9206` by `Alexandre Gramfort`_ and
+  `Raghav RV`_.
 
 Enhancements
 ............
 
 Trees and ensembles
 
-   - The ``min_weight_fraction_leaf`` constraint in tree construction is now
-     more efficient, taking a fast path to declare a node a leaf if its weight
-     is less than 2 * the minimum. Note that the constructed tree will be
-     different from previous versions where ``min_weight_fraction_leaf`` is
-     used. :issue:`7441` by :user:`Nelson Liu <nelson-liu>`.
+- The ``min_weight_fraction_leaf`` constraint in tree construction is now
+  more efficient, taking a fast path to declare a node a leaf if its weight
+  is less than 2 * the minimum. Note that the constructed tree will be
+  different from previous versions where ``min_weight_fraction_leaf`` is
+  used. :issue:`7441` by :user:`Nelson Liu <nelson-liu>`.
 
-   - :class:`ensemble.GradientBoostingClassifier` and :class:`ensemble.GradientBoostingRegressor`
-     now support sparse input for prediction.
-     :issue:`6101` by :user:`Ibraim Ganiev <olologin>`.
+- :class:`ensemble.GradientBoostingClassifier` and :class:`ensemble.GradientBoostingRegressor`
+  now support sparse input for prediction.
+  :issue:`6101` by :user:`Ibraim Ganiev <olologin>`.
 
-   - :class:`ensemble.VotingClassifier` now allows changing estimators by using
-     :meth:`ensemble.VotingClassifier.set_params`. An estimator can also be
-     removed by setting it to ``None``.
-     :issue:`7674` by :user:`Yichuan Liu <yl565>`.
+- :class:`ensemble.VotingClassifier` now allows changing estimators by using
+  :meth:`ensemble.VotingClassifier.set_params`. An estimator can also be
+  removed by setting it to ``None``.
+  :issue:`7674` by :user:`Yichuan Liu <yl565>`.
 
-   - :func:`tree.export_graphviz` now shows configurable number of decimal
-     places. :issue:`8698` by :user:`Guillaume Lemaitre <glemaitre>`.
+- :func:`tree.export_graphviz` now shows configurable number of decimal
+  places. :issue:`8698` by :user:`Guillaume Lemaitre <glemaitre>`.
+
+- Added ``flatten_transform`` parameter to :class:`ensemble.VotingClassifier`
+  to change output shape of `transform` method to 2 dimensional.
+  :issue:`7794` by :user:`Ibraim Ganiev <olologin>` and
+  :user:`Herilalaina Rakotoarison <herilalaina>`.
 
 Linear, kernelized and related models
 
-   - :class:`linear_model.SGDClassifier`, :class:`linear_model.SGDRegressor`,
-     :class:`linear_model.PassiveAggressiveClassifier`,
-     :class:`linear_model.PassiveAggressiveRegressor` and
-     :class:`linear_model.Perceptron` now expose ``max_iter`` and
-     ``tol`` parameters, to handle convergence more precisely.
-     ``n_iter`` parameter is deprecated, and the fitted estimator exposes
-     a ``n_iter_`` attribute, with actual number of iterations before
-     convergence. :issue:`5036` by `Tom Dupre la Tour`_.
-
-   - Added ``average`` parameter to perform weight averaging in
-     :class:`linear_model.PassiveAggressiveClassifier`. :issue:`4939`
-     by :user:`Andrea Esuli <aesuli>`.
-
-   - :class:`linear_model.RANSACRegressor` no longer throws an error
-     when calling ``fit`` if no inliers are found in its first iteration.
-     Furthermore, causes of skipped iterations are tracked in newly added
-     attributes, ``n_skips_*``.
-     :issue:`7914` by :user:`Michael Horrell <mthorrell>`.
-
-   - In :class:`gaussian_process.GaussianProcessRegressor`, method ``predict``
-     is a lot faster with ``return_std=True``. :issue:`8591` by
-     :user:`Hadrien Bertrand <hbertrand>`.
-
-   - Added ``return_std`` to ``predict`` method of
-     :class:`linear_model.ARDRegression` and
-     :class:`linear_model.BayesianRidge`.
-     :issue:`7838` by :user:`Sergey Feldman <sergeyf>`.
-
-   - Memory usage enhancements: Prevent cast from float32 to float64 in:
-     :class:`linear_model.MultiTaskElasticNet`;
-     :class:`linear_model.LogisticRegression` when using newton-cg solver; and
-     :class:`linear_model.Ridge` when using svd, sparse_cg, cholesky or lsqr
-     solvers. :issue:`8835`, :issue:`8061` by :user:`Joan Massich <massich>` and :user:`Nicolas
-     Cordier <ncordier>` and :user:`Thierry Guillemot <tguillemot>`.
+- :class:`linear_model.SGDClassifier`, :class:`linear_model.SGDRegressor`,
+  :class:`linear_model.PassiveAggressiveClassifier`,
+  :class:`linear_model.PassiveAggressiveRegressor` and
+  :class:`linear_model.Perceptron` now expose ``max_iter`` and
+  ``tol`` parameters, to handle convergence more precisely.
+  ``n_iter`` parameter is deprecated, and the fitted estimator exposes
+  a ``n_iter_`` attribute, with actual number of iterations before
+  convergence. :issue:`5036` by `Tom Dupre la Tour`_.
+
+- Added ``average`` parameter to perform weight averaging in
+  :class:`linear_model.PassiveAggressiveClassifier`. :issue:`4939`
+  by :user:`Andrea Esuli <aesuli>`.
+
+- :class:`linear_model.RANSACRegressor` no longer throws an error
+  when calling ``fit`` if no inliers are found in its first iteration.
+  Furthermore, causes of skipped iterations are tracked in newly added
+  attributes, ``n_skips_*``.
+  :issue:`7914` by :user:`Michael Horrell <mthorrell>`.
+
+- In :class:`gaussian_process.GaussianProcessRegressor`, method ``predict``
+  is a lot faster with ``return_std=True``. :issue:`8591` by
+  :user:`Hadrien Bertrand <hbertrand>`.
+
+- Added ``return_std`` to ``predict`` method of
+  :class:`linear_model.ARDRegression` and
+  :class:`linear_model.BayesianRidge`.
+  :issue:`7838` by :user:`Sergey Feldman <sergeyf>`.
+
+- Memory usage enhancements: Prevent cast from float32 to float64 in:
+  :class:`linear_model.MultiTaskElasticNet`;
+  :class:`linear_model.LogisticRegression` when using newton-cg solver; and
+  :class:`linear_model.Ridge` when using svd, sparse_cg, cholesky or lsqr
+  solvers. :issue:`8835`, :issue:`8061` by :user:`Joan Massich <massich>` and :user:`Nicolas
+  Cordier <ncordier>` and :user:`Thierry Guillemot <tguillemot>`.
 
 Other predictors
 
-   - Custom metrics for the :mod:`neighbors` binary trees now have
-     fewer constraints: they must take two 1d-arrays and return a float.
-     :issue:`6288` by `Jake Vanderplas`_.
+- Custom metrics for the :mod:`neighbors` binary trees now have
+  fewer constraints: they must take two 1d-arrays and return a float.
+  :issue:`6288` by `Jake Vanderplas`_.
 
-   - ``algorithm='auto`` in :mod:`neighbors` estimators now chooses the most
-     appropriate algorithm for all input types and metrics. :issue:`9145` by
-     :user:`Herilalaina Rakotoarison <herilalaina>` and :user:`Reddy Chinthala
-     <preddy5Pradyumna>`.
+- ``algorithm='auto`` in :mod:`neighbors` estimators now chooses the most
+  appropriate algorithm for all input types and metrics. :issue:`9145` by
+  :user:`Herilalaina Rakotoarison <herilalaina>` and :user:`Reddy Chinthala
+  <preddy5Pradyumna>`.
 
 Decomposition, manifold learning and clustering
 
-   - :class:`cluster.MiniBatchKMeans` and :class:`cluster.KMeans`
-     now use significantly less memory when assigning data points to their
-     nearest cluster center. :issue:`7721` by :user:`Jon Crall <Erotemic>`.
+- :class:`cluster.MiniBatchKMeans` and :class:`cluster.KMeans`
+  now use significantly less memory when assigning data points to their
+  nearest cluster center. :issue:`7721` by :user:`Jon Crall <Erotemic>`.
+
+- :class:`decomposition.PCA`, :class:`decomposition.IncrementalPCA` and
+  :class:`decomposition.TruncatedSVD` now expose the singular values
+  from the underlying SVD. They are stored in the attribute
+  ``singular_values_``, like in :class:`decomposition.IncrementalPCA`.
+  :issue:`7685` by :user:`Tommy Löfstedt <tomlof>`
 
-   - :class:`decomposition.PCA`, :class:`decomposition.IncrementalPCA` and
-     :class:`decomposition.TruncatedSVD` now expose the singular values
-     from the underlying SVD. They are stored in the attribute
-     ``singular_values_``, like in :class:`decomposition.IncrementalPCA`.
-     :issue:`7685` by :user:`Tommy Löfstedt <tomlof>`
+- Fixed the implementation of noise_variance_ in :class:`decomposition.PCA`.
+  :issue:`9108` by `Hanmin Qin <https://github.com/qinhanmin2014>`_.
 
-   - :class:`decomposition.NMF` now faster when ``beta_loss=0``.
-     :issue:`9277` by :user:`hongkahjun`.
+- :class:`decomposition.NMF` now faster when ``beta_loss=0``.
+  :issue:`9277` by :user:`hongkahjun`.
 
-   - Memory improvements for method ``barnes_hut`` in :class:`manifold.TSNE`
-     :issue:`7089` by :user:`Thomas Moreau <tomMoral>` and `Olivier Grisel`_.
+- Memory improvements for method ``barnes_hut`` in :class:`manifold.TSNE`
+  :issue:`7089` by :user:`Thomas Moreau <tomMoral>` and `Olivier Grisel`_.
 
-   - Optimization schedule improvements for Barnes-Hut :class:`manifold.TSNE`
-     so the results are closer to the one from the reference implementation
-     `lvdmaaten/bhtsne <https://github.com/lvdmaaten/bhtsne>`_ by :user:`Thomas
-     Moreau <tomMoral>` and `Olivier Grisel`_.
+- Optimization schedule improvements for Barnes-Hut :class:`manifold.TSNE`
+  so the results are closer to the one from the reference implementation
+  `lvdmaaten/bhtsne <https://github.com/lvdmaaten/bhtsne>`_ by :user:`Thomas
+  Moreau <tomMoral>` and `Olivier Grisel`_.
 
-   - Memory usage enhancements: Prevent cast from float32 to float64 in
-     :class:`decomposition.PCA` and
-     :func:`decomposition.randomized_svd_low_rank`.
-     :issue:`9067` by `Raghav RV`_.
+- Memory usage enhancements: Prevent cast from float32 to float64 in
+  :class:`decomposition.PCA` and
+  :func:`decomposition.randomized_svd_low_rank`.
+  :issue:`9067` by `Raghav RV`_.
 
 Preprocessing and feature selection
 
-   - Added ``norm_order`` parameter to :class:`feature_selection.SelectFromModel`
-     to enable selection of the norm order when ``coef_`` is more than 1D.
-     :issue:`6181` by :user:`Antoine Wendlinger <antoinewdg>`.
+- Added ``norm_order`` parameter to :class:`feature_selection.SelectFromModel`
+  to enable selection of the norm order when ``coef_`` is more than 1D.
+  :issue:`6181` by :user:`Antoine Wendlinger <antoinewdg>`.
 
-   - Added ability to use sparse matrices in :func:`feature_selection.f_regression`
-     with ``center=True``. :issue:`8065` by :user:`Daniel LeJeune <acadiansith>`.
+- Added ability to use sparse matrices in :func:`feature_selection.f_regression`
+  with ``center=True``. :issue:`8065` by :user:`Daniel LeJeune <acadiansith>`.
 
-   - Small performance improvement to n-gram creation in
-     :mod:`feature_extraction.text` by binding methods for loops and
-     special-casing unigrams. :issue:`7567` by :user:`Jaye Doepke <jtdoepke>`
+- Small performance improvement to n-gram creation in
+  :mod:`feature_extraction.text` by binding methods for loops and
+  special-casing unigrams. :issue:`7567` by :user:`Jaye Doepke <jtdoepke>`
 
-   - Relax assumption on the data for the
-     :class:`kernel_approximation.SkewedChi2Sampler`. Since the Skewed-Chi2
-     kernel is defined on the open interval :math:`(-skewedness; +\infty)^d`,
-     the transform function should not check whether ``X < 0`` but whether ``X <
-     -self.skewedness``. :issue:`7573` by :user:`Romain Brault <RomainBrault>`.
+- Relax assumption on the data for the
+  :class:`kernel_approximation.SkewedChi2Sampler`. Since the Skewed-Chi2
+  kernel is defined on the open interval :math:`(-skewedness; +\infty)^d`,
+  the transform function should not check whether ``X < 0`` but whether ``X <
+  -self.skewedness``. :issue:`7573` by :user:`Romain Brault <RomainBrault>`.
 
-   - Made default kernel parameters kernel-dependent in
-     :class:`kernel_approximation.Nystroem`.
-     :issue:`5229` by :user:`Saurabh Bansod <mth4saurabh>` and `Andreas Müller`_.
+- Made default kernel parameters kernel-dependent in
+  :class:`kernel_approximation.Nystroem`.
+  :issue:`5229` by :user:`Saurabh Bansod <mth4saurabh>` and `Andreas Müller`_.
 
 Model evaluation and meta-estimators
 
-   - :class:`pipeline.Pipeline` is now able to cache transformers
-     within a pipeline by using the ``memory`` constructor parameter.
-     :issue:`7990` by :user:`Guillaume Lemaitre <glemaitre>`.
+- :class:`pipeline.Pipeline` is now able to cache transformers
+  within a pipeline by using the ``memory`` constructor parameter.
+  :issue:`7990` by :user:`Guillaume Lemaitre <glemaitre>`.
 
-   - :class:`pipeline.Pipeline` steps can now be accessed as attributes of its
-     ``named_steps`` attribute. :issue:`8586` by :user:`Herilalaina
-     Rakotoarison <herilalaina>`.
+- :class:`pipeline.Pipeline` steps can now be accessed as attributes of its
+  ``named_steps`` attribute. :issue:`8586` by :user:`Herilalaina
+  Rakotoarison <herilalaina>`.
 
-   - Added ``sample_weight`` parameter to :meth:`pipeline.Pipeline.score`.
-     :issue:`7723` by :user:`Mikhail Korobov <kmike>`.
+- Added ``sample_weight`` parameter to :meth:`pipeline.Pipeline.score`.
+  :issue:`7723` by :user:`Mikhail Korobov <kmike>`.
 
-   - Added ability to set ``n_jobs`` parameter to :func:`pipeline.make_union`.
-     A ``TypeError`` will be raised for any other kwargs. :issue:`8028`
-     by :user:`Alexander Booth <alexandercbooth>`.
+- Added ability to set ``n_jobs`` parameter to :func:`pipeline.make_union`.
+  A ``TypeError`` will be raised for any other kwargs. :issue:`8028`
+  by :user:`Alexander Booth <alexandercbooth>`.
 
-   - :class:`model_selection.GridSearchCV`,
-     :class:`model_selection.RandomizedSearchCV` and
-     :func:`model_selection.cross_val_score` now allow estimators with callable
-     kernels which were previously prohibited.
-     :issue:`8005` by `Andreas Müller`_ .
+- :class:`model_selection.GridSearchCV`,
+  :class:`model_selection.RandomizedSearchCV` and
+  :func:`model_selection.cross_val_score` now allow estimators with callable
+  kernels which were previously prohibited.
+  :issue:`8005` by `Andreas Müller`_ .
 
-   - :func:`model_selection.cross_val_predict` now returns output of the
-     correct shape for all values of the argument ``method``.
-     :issue:`7863` by :user:`Aman Dalmia <dalmia>`.
+- :func:`model_selection.cross_val_predict` now returns output of the
+  correct shape for all values of the argument ``method``.
+  :issue:`7863` by :user:`Aman Dalmia <dalmia>`.
 
-   - Added ``shuffle`` and ``random_state`` parameters to shuffle training
-     data before taking prefixes of it based on training sizes in
-     :func:`model_selection.learning_curve`.
-     :issue:`7506` by :user:`Narine Kokhlikyan <NarineK>`.
+- Added ``shuffle`` and ``random_state`` parameters to shuffle training
+  data before taking prefixes of it based on training sizes in
+  :func:`model_selection.learning_curve`.
+  :issue:`7506` by :user:`Narine Kokhlikyan <NarineK>`.
 
-   - :class:`model_selection.StratifiedShuffleSplit` now works with multioutput
-     multiclass (or multilabel) data.  :issue:`9044` by `Vlad Niculae`_.
+- :class:`model_selection.StratifiedShuffleSplit` now works with multioutput
+  multiclass (or multilabel) data.  :issue:`9044` by `Vlad Niculae`_.
 
-   - Speed improvements to :class:`model_selection.StratifiedShuffleSplit`.
-     :issue:`5991` by :user:`Arthur Mensch <arthurmensch>` and `Joel Nothman`_.
+- Speed improvements to :class:`model_selection.StratifiedShuffleSplit`.
+  :issue:`5991` by :user:`Arthur Mensch <arthurmensch>` and `Joel Nothman`_.
 
-   - Add ``shuffle`` parameter to :func:`model_selection.train_test_split`.
-     :issue:`8845` by  :user:`themrmax <themrmax>`
+- Add ``shuffle`` parameter to :func:`model_selection.train_test_split`.
+  :issue:`8845` by  :user:`themrmax <themrmax>`
 
-   - :class:`multioutput.MultiOutputRegressor` and :class:`multioutput.MultiOutputClassifier`
-     now support online learning using ``partial_fit``.
-     :issue: `8053` by :user:`Peng Yu <yupbank>`.
+- :class:`multioutput.MultiOutputRegressor` and :class:`multioutput.MultiOutputClassifier`
+  now support online learning using ``partial_fit``.
+  :issue: `8053` by :user:`Peng Yu <yupbank>`.
 
-   - Add ``max_train_size`` parameter to :class:`model_selection.TimeSeriesSplit`
-     :issue:`8282` by :user:`Aman Dalmia <dalmia>`.
+- Add ``max_train_size`` parameter to :class:`model_selection.TimeSeriesSplit`
+  :issue:`8282` by :user:`Aman Dalmia <dalmia>`.
 
-   - More clustering metrics are now available through :func:`metrics.get_scorer`
-     and ``scoring`` parameters. :issue:`8117` by `Raghav RV`_.
+- More clustering metrics are now available through :func:`metrics.get_scorer`
+  and ``scoring`` parameters. :issue:`8117` by `Raghav RV`_.
 
 Metrics
 
-   - :func:`metrics.matthews_corrcoef` now support multiclass classification.
-     :issue:`8094` by :user:`Jon Crall <Erotemic>`.
+- :func:`metrics.matthews_corrcoef` now support multiclass classification.
+  :issue:`8094` by :user:`Jon Crall <Erotemic>`.
 
-   - Add ``sample_weight`` parameter to :func:`metrics.cohen_kappa_score`.
-     :issue:`8335` by :user:`Victor Poughon <vpoughon>`.
+- Add ``sample_weight`` parameter to :func:`metrics.cohen_kappa_score`.
+  :issue:`8335` by :user:`Victor Poughon <vpoughon>`.
 
 Miscellaneous
 
-   - :func:`utils.check_estimator` now attempts to ensure that methods
-     transform, predict, etc.  do not set attributes on the estimator.
-     :issue:`7533` by :user:`Ekaterina Krivich <kiote>`.
+- :func:`utils.check_estimator` now attempts to ensure that methods
+  transform, predict, etc.  do not set attributes on the estimator.
+  :issue:`7533` by :user:`Ekaterina Krivich <kiote>`.
 
-   - Added type checking to the ``accept_sparse`` parameter in
-     :mod:`utils.validation` methods. This parameter now accepts only boolean,
-     string, or list/tuple of strings. ``accept_sparse=None`` is deprecated and
-     should be replaced by ``accept_sparse=False``.
-     :issue:`7880` by :user:`Josh Karnofsky <jkarno>`.
+- Added type checking to the ``accept_sparse`` parameter in
+  :mod:`utils.validation` methods. This parameter now accepts only boolean,
+  string, or list/tuple of strings. ``accept_sparse=None`` is deprecated and
+  should be replaced by ``accept_sparse=False``.
+  :issue:`7880` by :user:`Josh Karnofsky <jkarno>`.
 
-   - Make it possible to load a chunk of an svmlight formatted file by
-     passing a range of bytes to :func:`datasets.load_svmlight_file`.
-     :issue:`935` by :user:`Olivier Grisel <ogrisel>`.
+- Make it possible to load a chunk of an svmlight formatted file by
+  passing a range of bytes to :func:`datasets.load_svmlight_file`.
+  :issue:`935` by :user:`Olivier Grisel <ogrisel>`.
 
-   - :class:`dummy.DummyClassifier` and :class:`dummy.DummyRegressor`
-     now accept non-finite features. :issue:`8931` by :user:`Attractadore`.
+- :class:`dummy.DummyClassifier` and :class:`dummy.DummyRegressor`
+  now accept non-finite features. :issue:`8931` by :user:`Attractadore`.
 
 Bug fixes
 .........
 
 Trees and ensembles
 
-   - Fixed a memory leak in trees when using trees with ``criterion='mae'``.
-     :issue:`8002` by `Raghav RV`_.
+- Fixed a memory leak in trees when using trees with ``criterion='mae'``.
+  :issue:`8002` by `Raghav RV`_.
 
-   - Fixed a bug where :class:`ensemble.IsolationForest` uses an
-     an incorrect formula for the average path length
-     :issue:`8549` by `Peter Wang <https://github.com/PTRWang>`_.
+- Fixed a bug where :class:`ensemble.IsolationForest` uses an
+  an incorrect formula for the average path length
+  :issue:`8549` by `Peter Wang <https://github.com/PTRWang>`_.
 
-   - Fixed a bug where :class:`ensemble.AdaBoostClassifier` throws
-     ``ZeroDivisionError`` while fitting data with single class labels.
-     :issue:`7501` by :user:`Dominik Krzeminski <dokato>`.
+- Fixed a bug where :class:`ensemble.AdaBoostClassifier` throws
+  ``ZeroDivisionError`` while fitting data with single class labels.
+  :issue:`7501` by :user:`Dominik Krzeminski <dokato>`.
 
-   - Fixed a bug in :class:`ensemble.GradientBoostingClassifier` and
-     :class:`ensemble.GradientBoostingRegressor` where a float being compared
-     to ``0.0`` using ``==`` caused a divide by zero error. :issue:`7970` by
-     :user:`He Chen <chenhe95>`.
+- Fixed a bug in :class:`ensemble.GradientBoostingClassifier` and
+  :class:`ensemble.GradientBoostingRegressor` where a float being compared
+  to ``0.0`` using ``==`` caused a divide by zero error. :issue:`7970` by
+  :user:`He Chen <chenhe95>`.
 
-   - Fix a bug where :class:`ensemble.GradientBoostingClassifier` and
-     :class:`ensemble.GradientBoostingRegressor` ignored the
-     ``min_impurity_split`` parameter.
-     :issue:`8006` by :user:`Sebastian Pölsterl <sebp>`.
+- Fix a bug where :class:`ensemble.GradientBoostingClassifier` and
+  :class:`ensemble.GradientBoostingRegressor` ignored the
+  ``min_impurity_split`` parameter.
+  :issue:`8006` by :user:`Sebastian Pölsterl <sebp>`.
 
-   - Fixed ``oob_score`` in :class:`ensemble.BaggingClassifier`.
-     :issue:`8936` by :user:`Michael Lewis <mlewis1729>`
+- Fixed ``oob_score`` in :class:`ensemble.BaggingClassifier`.
+  :issue:`8936` by :user:`Michael Lewis <mlewis1729>`
 
-   - Fixed excessive memory usage in prediction for random forests estimators.
-     :issue:`8672` by :user:`Mike Benfield <mikebenfield>`.
+- Fixed excessive memory usage in prediction for random forests estimators.
+  :issue:`8672` by :user:`Mike Benfield <mikebenfield>`.
 
-   - Fixed a bug where ``sample_weight`` as a list broke random forests in Python 2
-     :issue:`8068` by :user:`xor`.
+- Fixed a bug where ``sample_weight`` as a list broke random forests in Python 2
+  :issue:`8068` by :user:`xor`.
 
-   - Fixed a bug where :class:`ensemble.IsolationForest` fails when
-     ``max_features`` is less than 1.
-     :issue:`5732` by :user:`Ishank Gulati <IshankGulati>`.
+- Fixed a bug where :class:`ensemble.IsolationForest` fails when
+  ``max_features`` is less than 1.
+  :issue:`5732` by :user:`Ishank Gulati <IshankGulati>`.
 
-   - Fix a bug where gradient boosting with ``loss='quantile'`` computed
-     negative errors for negative values of ``ytrue - ypred`` leading to wrong
-     values when calling ``__call__``.
-     :issue:`8087` by :user:`Alexis Mignon <AlexisMignon>`
+- Fix a bug where gradient boosting with ``loss='quantile'`` computed
+  negative errors for negative values of ``ytrue - ypred`` leading to wrong
+  values when calling ``__call__``.
+  :issue:`8087` by :user:`Alexis Mignon <AlexisMignon>`
 
-   - Fix a bug where :class:`ensemble.VotingClassifier` raises an error
-     when a numpy array is passed in for weights. :issue:`7983` by
-     :user:`Vincent Pham <vincentpham1991>`.
+- Fix a bug where :class:`ensemble.VotingClassifier` raises an error
+  when a numpy array is passed in for weights. :issue:`7983` by
+  :user:`Vincent Pham <vincentpham1991>`.
 
-   - Fixed a bug where :func:`tree.export_graphviz` raised an error
-     when the length of features_names does not match n_features in the decision
-     tree. :issue:`8512` by :user:`Li Li <aikinogard>`.
+- Fixed a bug where :func:`tree.export_graphviz` raised an error
+  when the length of features_names does not match n_features in the decision
+  tree. :issue:`8512` by :user:`Li Li <aikinogard>`.
 
 Linear, kernelized and related models
 
-   - Fixed a bug where :func:`linear_model.RANSACRegressor.fit` may run until
-     ``max_iter`` if it finds a large inlier group early. :issue:`8251` by
-     :user:`aivision2020`.
+- Fixed a bug where :func:`linear_model.RANSACRegressor.fit` may run until
+  ``max_iter`` if it finds a large inlier group early. :issue:`8251` by
+  :user:`aivision2020`.
 
-   - Fixed a bug where :class:`naive_bayes.MultinomialNB` and
-     :class:`naive_bayes.BernoulliNB` failed when ``alpha=0``. :issue:`5814` by
-     :user:`Yichuan Liu <yl565>` and :user:`Herilalaina Rakotoarison
-     <herilalaina>`.
+- Fixed a bug where :class:`naive_bayes.MultinomialNB` and
+  :class:`naive_bayes.BernoulliNB` failed when ``alpha=0``. :issue:`5814` by
+  :user:`Yichuan Liu <yl565>` and :user:`Herilalaina Rakotoarison
+  <herilalaina>`.
 
-   - Fixed a bug where :class:`linear_model.LassoLars` does not give
-     the same result as the LassoLars implementation available
-     in R (lars library). :issue:`7849` by :user:`Jair Montoya Martinez <jmontoyam>`.
+- Fixed a bug where :class:`linear_model.LassoLars` does not give
+  the same result as the LassoLars implementation available
+  in R (lars library). :issue:`7849` by :user:`Jair Montoya Martinez <jmontoyam>`.
 
-   - Fixed a bug in :class:`linear_model.RandomizedLasso`,
-     :class:`linear_model.Lars`, :class:`linear_model.LassoLars`,
-     :class:`linear_model.LarsCV` and :class:`linear_model.LassoLarsCV`,
-     where the parameter ``precompute`` was not used consistently across
-     classes, and some values proposed in the docstring could raise errors.
-     :issue:`5359` by `Tom Dupre la Tour`_.
+- Fixed a bug in :class:`linear_model.RandomizedLasso`,
+  :class:`linear_model.Lars`, :class:`linear_model.LassoLars`,
+  :class:`linear_model.LarsCV` and :class:`linear_model.LassoLarsCV`,
+  where the parameter ``precompute`` was not used consistently across
+  classes, and some values proposed in the docstring could raise errors.
+  :issue:`5359` by `Tom Dupre la Tour`_.
 
-   - Fix inconsistent results between :class:`linear_model.RidgeCV` and
-     :class:`linear_model.Ridge` when using ``normalize=True``. :issue:`9302`
-     by `Alexandre Gramfort`_.
+- Fix inconsistent results between :class:`linear_model.RidgeCV` and
+  :class:`linear_model.Ridge` when using ``normalize=True``. :issue:`9302`
+  by `Alexandre Gramfort`_.
 
-   - Fix a bug where :func:`linear_model.LassoLars.fit` sometimes
-     left ``coef_`` as a list, rather than an ndarray.
-     :issue:`8160` by :user:`CJ Carey <perimosocordiae>`.
+- Fix a bug where :func:`linear_model.LassoLars.fit` sometimes
+  left ``coef_`` as a list, rather than an ndarray.
+  :issue:`8160` by :user:`CJ Carey <perimosocordiae>`.
 
-   - Fix :func:`linear_model.BayesianRidge.fit` to return
-     ridge parameter ``alpha_`` and ``lambda_`` consistent with calculated
-     coefficients ``coef_`` and ``intercept_``.
-     :issue:`8224` by :user:`Peter Gedeck <gedeck>`.
+- Fix :func:`linear_model.BayesianRidge.fit` to return
+  ridge parameter ``alpha_`` and ``lambda_`` consistent with calculated
+  coefficients ``coef_`` and ``intercept_``.
+  :issue:`8224` by :user:`Peter Gedeck <gedeck>`.
 
-   - Fixed a bug in :class:`svm.OneClassSVM` where it returned floats instead of
-     integer classes. :issue:`8676` by :user:`Vathsala Achar <VathsalaAchar>`.
+- Fixed a bug in :class:`svm.OneClassSVM` where it returned floats instead of
+  integer classes. :issue:`8676` by :user:`Vathsala Achar <VathsalaAchar>`.
 
-   - Fix AIC/BIC criterion computation in :class:`linear_model.LassoLarsIC`.
-     :issue:`9022` by `Alexandre Gramfort`_ and :user:`Mehmet Basbug <mehmetbasbug>`.
+- Fix AIC/BIC criterion computation in :class:`linear_model.LassoLarsIC`.
+  :issue:`9022` by `Alexandre Gramfort`_ and :user:`Mehmet Basbug <mehmetbasbug>`.
 
-   - Fixed a memory leak in our LibLinear implementation. :issue:`9024` by
-     :user:`Sergei Lebedev <superbobry>`
+- Fixed a memory leak in our LibLinear implementation. :issue:`9024` by
+  :user:`Sergei Lebedev <superbobry>`
 
-   - Fix bug where stratified CV splitters did not work with
-     :class:`linear_model.LassoCV`. :issue:`8973` by
-     :user:`Paulo Haddad <paulochf>`.
+- Fix bug where stratified CV splitters did not work with
+  :class:`linear_model.LassoCV`. :issue:`8973` by
+  :user:`Paulo Haddad <paulochf>`.
 
-   - Fixed a bug in :class:`gaussian_process.GaussianProcessRegressor`
-     when the standard deviation and covariance predicted without fit
-     would fail with a unmeaningful error by default.
-     :issue:`6573` by :user:`Quazi Marufur Rahman <qmaruf>` and
-     `Manoj Kumar`_.
+- Fixed a bug in :class:`gaussian_process.GaussianProcessRegressor`
+  when the standard deviation and covariance predicted without fit
+  would fail with a unmeaningful error by default.
+  :issue:`6573` by :user:`Quazi Marufur Rahman <qmaruf>` and
+  `Manoj Kumar`_.
 
 Other predictors
 
-   - Fix :class:`semi_supervised.BaseLabelPropagation` to correctly implement
-     ``LabelPropagation`` and ``LabelSpreading`` as done in the referenced
-     papers. :issue:`9239`
-     by :user:`Andre Ambrosio Boechat <boechat107>`, :user:`Utkarsh Upadhyay
-     <musically-ut>`, and `Joel Nothman`_.
+- Fix :class:`semi_supervised.BaseLabelPropagation` to correctly implement
+  ``LabelPropagation`` and ``LabelSpreading`` as done in the referenced
+  papers. :issue:`9239`
+  by :user:`Andre Ambrosio Boechat <boechat107>`, :user:`Utkarsh Upadhyay
+  <musically-ut>`, and `Joel Nothman`_.
 
 Decomposition, manifold learning and clustering
 
-   - Fixed the implementation of :class:`manifold.TSNE`:
-      - ``early_exageration`` parameter had no effect and is now used for the
-        first 250 optimization iterations.
-      - Fixed the ``AssertionError: Tree consistency failed`` exception
-        reported in :issue:`8992`.
-      - Improve the learning schedule to match the one from the reference
-        implementation `lvdmaaten/bhtsne <https://github.com/lvdmaaten/bhtsne>`_.
+- Fixed the implementation of :class:`manifold.TSNE`:
+- ``early_exageration`` parameter had no effect and is now used for the
+  first 250 optimization iterations.
+- Fixed the ``AssertionError: Tree consistency failed`` exception
+  reported in :issue:`8992`.
+- Improve the learning schedule to match the one from the reference
+  implementation `lvdmaaten/bhtsne <https://github.com/lvdmaaten/bhtsne>`_.
      by :user:`Thomas Moreau <tomMoral>` and `Olivier Grisel`_.
 
-   - Fix a bug in :class:`decomposition.LatentDirichletAllocation`
-     where the ``perplexity`` method was returning incorrect results because
-     the ``transform`` method returns normalized document topic distributions
-     as of version 0.18. :issue:`7954` by :user:`Gary Foreman <garyForeman>`.
-
-   - Fix output shape and bugs with n_jobs > 1 in
-     :class:`decomposition.SparseCoder` transform and
-     :func:`decomposition.sparse_encode`
-     for one-dimensional data and one component.
-     This also impacts the output shape of :class:`decomposition.DictionaryLearning`.
-     :issue:`8086` by `Andreas Müller`_.
-
-   - Fixed the implementation of ``explained_variance_``
-     in :class:`decomposition.PCA`,
-     :class:`decomposition.RandomizedPCA` and
-     :class:`decomposition.IncrementalPCA`.
-     :issue:`9105` by `Hanmin Qin <https://github.com/qinhanmin2014>`_. 
-
-   - Fixed a bug where :class:`cluster.DBSCAN` gives incorrect
-     result when input is a precomputed sparse matrix with initial
-     rows all zero. :issue:`8306` by :user:`Akshay Gupta <Akshay0724>`
-
-   - Fix a bug regarding fitting :class:`cluster.KMeans` with a sparse
-     array X and initial centroids, where X's means were unnecessarily being
-     subtracted from the centroids. :issue:`7872` by :user:`Josh Karnofsky <jkarno>`.
-
-   - Fixes to the input validation in :class:`covariance.EllipticEnvelope`.
-     :issue:`8086` by `Andreas Müller`_.
-
-   - Fixed a bug in :class:`covariance.MinCovDet` where inputting data
-     that produced a singular covariance matrix would cause the helper method
-     ``_c_step`` to throw an exception.
-     :issue:`3367` by :user:`Jeremy Steward <ThatGeoGuy>`
-
-   - Fixed a bug in :class:`manifold.TSNE` affecting convergence of the
-     gradient descent. :issue:`8768` by :user:`David DeTomaso <deto>`.
-
-   - Fixed a bug in :class:`manifold.TSNE` where it stored the incorrect
-     ``kl_divergence_``. :issue:`6507` by :user:`Sebastian Saeger <ssaeger>`.
-
-   - Fixed improper scaling in :class:`cross_decomposition.PLSRegression`
-     with ``scale=True``. :issue:`7819` by :user:`jayzed82 <jayzed82>`.
-
-   - :class:`cluster.bicluster.SpectralCoclustering` and
-     :class:`cluster.bicluster.SpectralBiclustering` ``fit`` method conforms
-     with API by accepting ``y`` and returning the object.  :issue:`6126`,
-     :issue:`7814` by :user:`Laurent Direr <ldirer>` and :user:`Maniteja
-     Nandana <maniteja123>`.
-
-   - Fix bug where :mod:`mixture` ``sample`` methods did not return as many
-     samples as requested. :issue:`7702` by :user:`Levi John Wolf <ljwolf>`.
-
-   - Fix for uninformative error in :class:`decomposition.incremental_pca`:
-     now an error is raised if the number of components is larger than the
-     chosen batch size. The ``n_components=None`` case was adapted accordingly.
-     :issue:`6452`. By :user:`Wally Gauze <wallygauze>`.
-     
+- Fix a bug in :class:`decomposition.LatentDirichletAllocation`
+  where the ``perplexity`` method was returning incorrect results because
+  the ``transform`` method returns normalized document topic distributions
+  as of version 0.18. :issue:`7954` by :user:`Gary Foreman <garyForeman>`.
+
+- Fix output shape and bugs with n_jobs > 1 in
+  :class:`decomposition.SparseCoder` transform and
+  :func:`decomposition.sparse_encode`
+  for one-dimensional data and one component.
+  This also impacts the output shape of :class:`decomposition.DictionaryLearning`.
+  :issue:`8086` by `Andreas Müller`_.
+
+- Fixed the implementation of ``explained_variance_``
+  in :class:`decomposition.PCA`,
+  :class:`decomposition.RandomizedPCA` and
+  :class:`decomposition.IncrementalPCA`.
+  :issue:`9105` by `Hanmin Qin <https://github.com/qinhanmin2014>`_.
+
+- Fixed the implementation of noise_variance_ in :class:`decomposition.PCA`.
+  :issue:`9108` by `Hanmin Qin <https://github.com/qinhanmin2014>`_.
+
+- Fixed a bug where :class:`cluster.DBSCAN` gives incorrect
+  result when input is a precomputed sparse matrix with initial
+  rows all zero. :issue:`8306` by :user:`Akshay Gupta <Akshay0724>`
+
+- Fix a bug regarding fitting :class:`cluster.KMeans` with a sparse
+  array X and initial centroids, where X's means were unnecessarily being
+  subtracted from the centroids. :issue:`7872` by :user:`Josh Karnofsky <jkarno>`.
+
+- Fixes to the input validation in :class:`covariance.EllipticEnvelope`.
+  :issue:`8086` by `Andreas Müller`_.
+
+- Fixed a bug in :class:`covariance.MinCovDet` where inputting data
+  that produced a singular covariance matrix would cause the helper method
+  ``_c_step`` to throw an exception.
+  :issue:`3367` by :user:`Jeremy Steward <ThatGeoGuy>`
+
+- Fixed a bug in :class:`manifold.TSNE` affecting convergence of the
+  gradient descent. :issue:`8768` by :user:`David DeTomaso <deto>`.
+
+- Fixed a bug in :class:`manifold.TSNE` where it stored the incorrect
+  ``kl_divergence_``. :issue:`6507` by :user:`Sebastian Saeger <ssaeger>`.
+
+- Fixed improper scaling in :class:`cross_decomposition.PLSRegression`
+  with ``scale=True``. :issue:`7819` by :user:`jayzed82 <jayzed82>`.
+
+- :class:`cluster.bicluster.SpectralCoclustering` and
+  :class:`cluster.bicluster.SpectralBiclustering` ``fit`` method conforms
+  with API by accepting ``y`` and returning the object.  :issue:`6126`,
+  :issue:`7814` by :user:`Laurent Direr <ldirer>` and :user:`Maniteja
+  Nandana <maniteja123>`.
+
+- Fix bug where :mod:`mixture` ``sample`` methods did not return as many
+  samples as requested. :issue:`7702` by :user:`Levi John Wolf <ljwolf>`.
+
+- Fixed the shrinkage implementation in :class:`neighbors.NearestCentroid`.
+  :issue:`9219` by `Hanmin Qin <https://github.com/qinhanmin2014>`_.
+
 Preprocessing and feature selection
 
-   - For sparse matrices, :func:`preprocessing.normalize` with ``return_norm=True``
-     will now raise a ``NotImplementedError`` with 'l1' or 'l2' norm and with
-     norm 'max' the norms returned will be the same as for dense matrices.
-     :issue:`7771` by `Ang Lu <https://github.com/luang008>`_.
+- For sparse matrices, :func:`preprocessing.normalize` with ``return_norm=True``
+  will now raise a ``NotImplementedError`` with 'l1' or 'l2' norm and with
+  norm 'max' the norms returned will be the same as for dense matrices.
+  :issue:`7771` by `Ang Lu <https://github.com/luang008>`_.
 
-   - Fix a bug where :class:`feature_selection.SelectFdr` did not
-     exactly implement Benjamini-Hochberg procedure. It formerly may have
-     selected fewer features than it should.
-     :issue:`7490` by :user:`Peng Meng <mpjlu>`.
+- Fix a bug where :class:`feature_selection.SelectFdr` did not
+  exactly implement Benjamini-Hochberg procedure. It formerly may have
+  selected fewer features than it should.
+  :issue:`7490` by :user:`Peng Meng <mpjlu>`.
 
-   - Fixed a bug where :class:`linear_model.RandomizedLasso` and
-     :class:`linear_model.RandomizedLogisticRegression` breaks for
-     sparse input. :issue:`8259` by :user:`Aman Dalmia <dalmia>`.
+- Fixed a bug where :class:`linear_model.RandomizedLasso` and
+  :class:`linear_model.RandomizedLogisticRegression` breaks for
+  sparse input. :issue:`8259` by :user:`Aman Dalmia <dalmia>`.
 
-   - Fix a bug where :class:`feature_extraction.FeatureHasher`
-     mandatorily applied a sparse random projection to the hashed features,
-     preventing the use of
-     :class:`feature_extraction.text.HashingVectorizer` in a
-     pipeline with  :class:`feature_extraction.text.TfidfTransformer`.
-     :issue:`7565` by :user:`Roman Yurchak <rth>`.
+- Fix a bug where :class:`feature_extraction.FeatureHasher`
+  mandatorily applied a sparse random projection to the hashed features,
+  preventing the use of
+  :class:`feature_extraction.text.HashingVectorizer` in a
+  pipeline with  :class:`feature_extraction.text.TfidfTransformer`.
+  :issue:`7565` by :user:`Roman Yurchak <rth>`.
 
-   - Fix a bug where :class:`feature_selection.mutual_info_regression` did not
-     correctly use ``n_neighbors``. :issue:`8181` by :user:`Guillaume Lemaitre
-     <glemaitre>`.
+- Fix a bug where :class:`feature_selection.mutual_info_regression` did not
+  correctly use ``n_neighbors``. :issue:`8181` by :user:`Guillaume Lemaitre
+  <glemaitre>`.
 
 Model evaluation and meta-estimators
 
-   - Fixed a bug where :func:`model_selection.BaseSearchCV.inverse_transform`
-     returns ``self.best_estimator_.transform()`` instead of
-     ``self.best_estimator_.inverse_transform()``.
-     :issue:`8344` by :user:`Akshay Gupta <Akshay0724>` and :user:`Rasmus Eriksson <MrMjauh>`.
+- Fixed a bug where :func:`model_selection.BaseSearchCV.inverse_transform`
+  returns ``self.best_estimator_.transform()`` instead of
+  ``self.best_estimator_.inverse_transform()``.
+  :issue:`8344` by :user:`Akshay Gupta <Akshay0724>` and :user:`Rasmus Eriksson <MrMjauh>`.
+
+- Added ``classes_`` attribute to :class:`model_selection.GridSearchCV`,
+  :class:`model_selection.RandomizedSearchCV`,  :class:`grid_search.GridSearchCV`,
+  and  :class:`grid_search.RandomizedSearchCV` that matches the ``classes_``
+  attribute of ``best_estimator_``. :issue:`7661` and :issue:`8295`
+  by :user:`Alyssa Batula <abatula>`, :user:`Dylan Werner-Meier <unautre>`,
+  and :user:`Stephen Hoover <stephen-hoover>`.
 
-   - Added ``classes_`` attribute to :class:`model_selection.GridSearchCV`,
-     :class:`model_selection.RandomizedSearchCV`,  :class:`grid_search.GridSearchCV`,
-     and  :class:`grid_search.RandomizedSearchCV` that matches the ``classes_``
-     attribute of ``best_estimator_``. :issue:`7661` and :issue:`8295`
-     by :user:`Alyssa Batula <abatula>`, :user:`Dylan Werner-Meier <unautre>`,
-     and :user:`Stephen Hoover <stephen-hoover>`.
+- Fixed a bug where :func:`model_selection.validation_curve`
+  reused the same estimator for each parameter value.
+  :issue:`7365` by :user:`Aleksandr Sandrovskii <Sundrique>`.
 
-   - Fixed a bug where :func:`model_selection.validation_curve`
-     reused the same estimator for each parameter value.
-     :issue:`7365` by :user:`Aleksandr Sandrovskii <Sundrique>`.
+- :func:`model_selection.permutation_test_score` now works with Pandas
+  types. :issue:`5697` by :user:`Stijn Tonk <equialgo>`.
 
-   - :func:`model_selection.permutation_test_score` now works with Pandas
-     types. :issue:`5697` by :user:`Stijn Tonk <equialgo>`.
+- Several fixes to input validation in
+  :class:`multiclass.OutputCodeClassifier`
+  :issue:`8086` by `Andreas Müller`_.
 
-   - Several fixes to input validation in
-     :class:`multiclass.OutputCodeClassifier`
-     :issue:`8086` by `Andreas Müller`_.
+- :class:`multiclass.OneVsOneClassifier`'s ``partial_fit`` now ensures all
+  classes are provided up-front. :issue:`6250` by
+  :user:`Asish Panda <kaichogami>`.
 
-   - :class:`multiclass.OneVsOneClassifier`'s ``partial_fit`` now ensures all
-     classes are provided up-front. :issue:`6250` by
-     :user:`Asish Panda <kaichogami>`.
+- Fix :func:`multioutput.MultiOutputClassifier.predict_proba` to return a
+  list of 2d arrays, rather than a 3d array. In the case where different
+  target columns had different numbers of classes, a ``ValueError`` would be
+  raised on trying to stack matrices with different dimensions.
+  :issue:`8093` by :user:`Peter Bull <pjbull>`.
 
-   - Fix :func:`multioutput.MultiOutputClassifier.predict_proba` to return a
-     list of 2d arrays, rather than a 3d array. In the case where different
-     target columns had different numbers of classes, a ``ValueError`` would be
-     raised on trying to stack matrices with different dimensions.
-     :issue:`8093` by :user:`Peter Bull <pjbull>`.
+- Cross validation now works with Pandas datatypes that that have a
+  read-only index. :issue:`9507` by `Loic Esteve`_.
 
 Metrics
 
-   - :func:`metrics.average_precision_score` no longer linearly
-     interpolates between operating points, and instead weighs precisions
-     by the change in recall since the last operating point, as per the
-     `Wikipedia entry <http://en.wikipedia.org/wiki/Average_precision>`_.
-     (`#7356 <https://github.com/scikit-learn/scikit-learn/pull/7356>`_). By
-     :user:`Nick Dingwall <ndingwall>` and `Gael Varoquaux`_.
+- :func:`metrics.average_precision_score` no longer linearly
+  interpolates between operating points, and instead weighs precisions
+  by the change in recall since the last operating point, as per the
+  `Wikipedia entry <http://en.wikipedia.org/wiki/Average_precision>`_.
+  (`#7356 <https://github.com/scikit-learn/scikit-learn/pull/7356>`_). By
+  :user:`Nick Dingwall <ndingwall>` and `Gael Varoquaux`_.
 
-   - Fix a bug in :func:`metrics.classification._check_targets`
-     which would return ``'binary'`` if ``y_true`` and ``y_pred`` were
-     both ``'binary'`` but the union of ``y_true`` and ``y_pred`` was
-     ``'multiclass'``. :issue:`8377` by `Loic Esteve`_.
+- Fix a bug in :func:`metrics.classification._check_targets`
+  which would return ``'binary'`` if ``y_true`` and ``y_pred`` were
+  both ``'binary'`` but the union of ``y_true`` and ``y_pred`` was
+  ``'multiclass'``. :issue:`8377` by `Loic Esteve`_.
 
-   - Fixed an integer overflow bug in :func:`metrics.confusion_matrix` and
-     hence :func:`metrics.cohen_kappa_score`. :issue:`8354`, :issue:`7929`
-     by `Joel Nothman`_ and :user:`Jon Crall <Erotemic>`.
+- Fixed an integer overflow bug in :func:`metrics.confusion_matrix` and
+  hence :func:`metrics.cohen_kappa_score`. :issue:`8354`, :issue:`7929`
+  by `Joel Nothman`_ and :user:`Jon Crall <Erotemic>`.
 
-   - Fixed passing of ``gamma`` parameter to the ``chi2`` kernel in
-     :func:`metrics.pairwise.pairwise_kernels` :issue:`5211` by
-     :user:`Nick Rhinehart <nrhine1>`,
-     :user:`Saurabh Bansod <mth4saurabh>` and `Andreas Müller`_.
+- Fixed passing of ``gamma`` parameter to the ``chi2`` kernel in
+  :func:`metrics.pairwise.pairwise_kernels` :issue:`5211` by
+  :user:`Nick Rhinehart <nrhine1>`,
+  :user:`Saurabh Bansod <mth4saurabh>` and `Andreas Müller`_.
 
 Miscellaneous
 
-   - Fixed a bug when :func:`datasets.make_classification` fails
-     when generating more than 30 features. :issue:`8159` by
-     :user:`Herilalaina Rakotoarison <herilalaina>`.
+- Fixed a bug when :func:`datasets.make_classification` fails
+  when generating more than 30 features. :issue:`8159` by
+  :user:`Herilalaina Rakotoarison <herilalaina>`.
 
-   - Fixed a bug where :func:`datasets.make_moons` gives an
-     incorrect result when ``n_samples`` is odd.
-     :issue:`8198` by :user:`Josh Levy <levy5674>`.
+- Fixed a bug where :func:`datasets.make_moons` gives an
+  incorrect result when ``n_samples`` is odd.
+  :issue:`8198` by :user:`Josh Levy <levy5674>`.
 
-   - Some ``fetch_`` functions in :mod:`datasets` were ignoring the
-     ``download_if_missing`` keyword. :issue:`7944` by :user:`Ralf Gommers <rgommers>`.
+- Some ``fetch_`` functions in :mod:`datasets` were ignoring the
+  ``download_if_missing`` keyword. :issue:`7944` by :user:`Ralf Gommers <rgommers>`.
 
-   - Fix estimators to accept a ``sample_weight`` parameter of type
-     ``pandas.Series`` in their ``fit`` function. :issue:`7825` by
-     `Kathleen Chen`_.
+- Fix estimators to accept a ``sample_weight`` parameter of type
+  ``pandas.Series`` in their ``fit`` function. :issue:`7825` by
+  `Kathleen Chen`_.
 
-   - Fix a bug in cases where ``numpy.cumsum`` may be numerically unstable,
-     raising an exception if instability is identified. :issue:`7376` and
-     :issue:`7331` by `Joel Nothman`_ and :user:`yangarbiter`.
+- Fix a bug in cases where ``numpy.cumsum`` may be numerically unstable,
+  raising an exception if instability is identified. :issue:`7376` and
+  :issue:`7331` by `Joel Nothman`_ and :user:`yangarbiter`.
 
-   - Fix a bug where :meth:`base.BaseEstimator.__getstate__`
-     obstructed pickling customizations of child-classes, when used in a
-     multiple inheritance context.
-     :issue:`8316` by :user:`Holger Peters <HolgerPeters>`.
+- Fix a bug where :meth:`base.BaseEstimator.__getstate__`
+  obstructed pickling customizations of child-classes, when used in a
+  multiple inheritance context.
+  :issue:`8316` by :user:`Holger Peters <HolgerPeters>`.
 
-   - Update Sphinx-Gallery from 0.1.4 to 0.1.7 for resolving links in
-     documentation build with Sphinx>1.5 :issue:`8010`, :issue:`7986` by
-     :user:`Oscar Najera <Titan-C>`
+- Update Sphinx-Gallery from 0.1.4 to 0.1.7 for resolving links in
+  documentation build with Sphinx>1.5 :issue:`8010`, :issue:`7986` by
+  :user:`Oscar Najera <Titan-C>`
 
-   - Add ``data_home`` parameter to :func:`sklearn.datasets.fetch_kddcup99`.
-     :issue:`9289` by `Loic Esteve`_.
+- Add ``data_home`` parameter to :func:`sklearn.datasets.fetch_kddcup99`.
+  :issue:`9289` by `Loic Esteve`_.
 
-   - Fix dataset loaders using Python 3 version of makedirs to also work in
-     Python 2. :issue:`9284` by :user:`Sebastin Santy <SebastinSanty>`.
+- Fix dataset loaders using Python 3 version of makedirs to also work in
+  Python 2. :issue:`9284` by :user:`Sebastin Santy <SebastinSanty>`.
 
-   - Several minor issues were fixed with thanks to the alerts of
-     [lgtm.com](http://lgtm.com). :issue:`9278` by :user:`Jean Helie <jhelie>`,
-     among others.
+- Several minor issues were fixed with thanks to the alerts of
+  [lgtm.com](http://lgtm.com). :issue:`9278` by :user:`Jean Helie <jhelie>`,
+  among others.
 
 API changes summary
 -------------------
 
 Trees and ensembles
 
-   - Gradient boosting base models are no longer estimators. By `Andreas Müller`_.
+- Gradient boosting base models are no longer estimators. By `Andreas Müller`_.
 
-   - All tree based estimators now accept a ``min_impurity_decrease``
-     parameter in lieu of the ``min_impurity_split``, which is now deprecated.
-     The ``min_impurity_decrease`` helps stop splitting the nodes in which
-     the weighted impurity decrease from splitting is no longer alteast
-     ``min_impurity_decrease``.  :issue:`8449` by `Raghav RV`_.
+- All tree based estimators now accept a ``min_impurity_decrease``
+  parameter in lieu of the ``min_impurity_split``, which is now deprecated.
+  The ``min_impurity_decrease`` helps stop splitting the nodes in which
+  the weighted impurity decrease from splitting is no longer alteast
+  ``min_impurity_decrease``.  :issue:`8449` by `Raghav RV`_.
 
 Linear, kernelized and related models
 
-   - ``n_iter`` parameter is deprecated in :class:`linear_model.SGDClassifier`,
-     :class:`linear_model.SGDRegressor`,
-     :class:`linear_model.PassiveAggressiveClassifier`,
-     :class:`linear_model.PassiveAggressiveRegressor` and
-     :class:`linear_model.Perceptron`. By `Tom Dupre la Tour`_.
+- ``n_iter`` parameter is deprecated in :class:`linear_model.SGDClassifier`,
+  :class:`linear_model.SGDRegressor`,
+  :class:`linear_model.PassiveAggressiveClassifier`,
+  :class:`linear_model.PassiveAggressiveRegressor` and
+  :class:`linear_model.Perceptron`. By `Tom Dupre la Tour`_.
 
 Other predictors
 
-   - :class:`neighbors.LSHForest` has been deprecated and will be
-     removed in 0.21 due to poor performance.
-     :issue:`9078` by :user:`Laurent Direr <ldirer>`.
+- :class:`neighbors.LSHForest` has been deprecated and will be
+  removed in 0.21 due to poor performance.
+  :issue:`9078` by :user:`Laurent Direr <ldirer>`.
 
-   - :class:`neighbors.NearestCentroid` no longer purports to support
-     ``metric='precomputed'`` which now raises an error. :issue:`8515` by
-     :user:`Sergul Aydore <sergulaydore>`.
+- :class:`neighbors.NearestCentroid` no longer purports to support
+  ``metric='precomputed'`` which now raises an error. :issue:`8515` by
+  :user:`Sergul Aydore <sergulaydore>`.
 
-   - The ``alpha`` parameter of :class:`semi_supervised.LabelPropagation` now
-     has no effect and is deprecated to be removed in 0.21. :issue:`9239`
-     by :user:`Andre Ambrosio Boechat <boechat107>`, :user:`Utkarsh Upadhyay
-     <musically-ut>`, and `Joel Nothman`_.
+- The ``alpha`` parameter of :class:`semi_supervised.LabelPropagation` now
+  has no effect and is deprecated to be removed in 0.21. :issue:`9239`
+  by :user:`Andre Ambrosio Boechat <boechat107>`, :user:`Utkarsh Upadhyay
+  <musically-ut>`, and `Joel Nothman`_.
 
 Decomposition, manifold learning and clustering
 
-   - Deprecate the ``doc_topic_distr`` argument of the ``perplexity`` method
-     in :class:`decomposition.LatentDirichletAllocation` because the
-     user no longer has access to the unnormalized document topic distribution
-     needed for the perplexity calculation. :issue:`7954` by
-     :user:`Gary Foreman <garyForeman>`.
+- Deprecate the ``doc_topic_distr`` argument of the ``perplexity`` method
+  in :class:`decomposition.LatentDirichletAllocation` because the
+  user no longer has access to the unnormalized document topic distribution
+  needed for the perplexity calculation. :issue:`7954` by
+  :user:`Gary Foreman <garyForeman>`.
 
-   - The ``n_topics`` parameter of :class:`decomposition.LatentDirichletAllocation`
-     has been renamed to ``n_components`` and will be removed in version 0.21.
-     :issue:`8922` by :user:`Attractadore`.
+- The ``n_topics`` parameter of :class:`decomposition.LatentDirichletAllocation`
+  has been renamed to ``n_components`` and will be removed in version 0.21.
+  :issue:`8922` by :user:`Attractadore`.
 
-   - :meth:`decomposition.SparsePCA.transform`'s ``ridge_alpha`` parameter is
-     deprecated in preference for class parameter.
-     :issue:`8137` by :user:`Naoya Kanai <naoyak>`.
+- :meth:`decomposition.SparsePCA.transform`'s ``ridge_alpha`` parameter is
+  deprecated in preference for class parameter.
+  :issue:`8137` by :user:`Naoya Kanai <naoyak>`.
 
-   - :class:`cluster.DBSCAN` now has a ``metric_params`` parameter.
-     :issue:`8139` by :user:`Naoya Kanai <naoyak>`.
+- :class:`cluster.DBSCAN` now has a ``metric_params`` parameter.
+  :issue:`8139` by :user:`Naoya Kanai <naoyak>`.
 
 Preprocessing and feature selection
 
-   - :class:`feature_selection.SelectFromModel` now has a ``partial_fit``
-     method only if the underlying estimator does. By `Andreas Müller`_.
+- :class:`feature_selection.SelectFromModel` now has a ``partial_fit``
+  method only if the underlying estimator does. By `Andreas Müller`_.
 
-   - :class:`feature_selection.SelectFromModel` now validates the ``threshold``
-     parameter and sets the ``threshold_`` attribute during the call to
-     ``fit``, and no longer during the call to ``transform```. By `Andreas
-     Müller`_.
+- :class:`feature_selection.SelectFromModel` now validates the ``threshold``
+  parameter and sets the ``threshold_`` attribute during the call to
+  ``fit``, and no longer during the call to ``transform```. By `Andreas
+  Müller`_.
 
-   - The ``non_negative`` parameter in :class:`feature_extraction.FeatureHasher`
-     has been deprecated, and replaced with a more principled alternative,
-     ``alternate_sign``.
-     :issue:`7565` by :user:`Roman Yurchak <rth>`.
+- The ``non_negative`` parameter in :class:`feature_extraction.FeatureHasher`
+  has been deprecated, and replaced with a more principled alternative,
+  ``alternate_sign``.
+  :issue:`7565` by :user:`Roman Yurchak <rth>`.
 
-   - :class:`linear_model.RandomizedLogisticRegression`,
-     and :class:`linear_model.RandomizedLasso` have been deprecated and will
-     be removed in version 0.21.
-     :issue:`8995` by :user:`Ramana.S <sentient07>`.
+- :class:`linear_model.RandomizedLogisticRegression`,
+  and :class:`linear_model.RandomizedLasso` have been deprecated and will
+  be removed in version 0.21.
+  :issue:`8995` by :user:`Ramana.S <sentient07>`.
 
 Model evaluation and meta-estimators
 
-   - Deprecate the ``fit_params`` constructor input to the
-     :class:`model_selection.GridSearchCV` and
-     :class:`model_selection.RandomizedSearchCV` in favor
-     of passing keyword parameters to the ``fit`` methods
-     of those classes. Data-dependent parameters needed for model
-     training should be passed as keyword arguments to ``fit``,
-     and conforming to this convention will allow the hyperparameter
-     selection classes to be used with tools such as
-     :func:`model_selection.cross_val_predict`.
-     :issue:`2879` by :user:`Stephen Hoover <stephen-hoover>`.
-
-   - In version 0.21, the default behavior of splitters that use the
-     ``test_size`` and ``train_size`` parameter will change, such that
-     specifying ``train_size`` alone will cause ``test_size`` to be the
-     remainder. :issue:`7459` by :user:`Nelson Liu <nelson-liu>`.
-
-   - :class:`multiclass.OneVsRestClassifier` now has ``partial_fit``,
-     ``decision_function`` and ``predict_proba`` methods only when the
-     underlying estimator does.  :issue:`7812` by `Andreas Müller`_ and
-     :user:`Mikhail Korobov <kmike>`.
-
-   - :class:`multiclass.OneVsRestClassifier` now has a ``partial_fit`` method
-     only if the underlying estimator does.  By `Andreas Müller`_.
-
-   - The ``decision_function`` output shape for binary classification in
-     :class:`multiclass.OneVsRestClassifier` and
-     :class:`multiclass.OneVsOneClassifier` is now ``(n_samples,)`` to conform
-     to scikit-learn conventions. :issue:`9100` by `Andreas Müller`_.
-
-   - The :func:`multioutput.MultiOutputClassifier.predict_proba`
-     function used to return a 3d array (``n_samples``, ``n_classes``,
-     ``n_outputs``). In the case where different target columns had different
-     numbers of classes, a ``ValueError`` would be raised on trying to stack
-     matrices with different dimensions. This function now returns a list of
-     arrays where the length of the list is ``n_outputs``, and each array is
-     (``n_samples``, ``n_classes``) for that particular output.
-     :issue:`8093` by :user:`Peter Bull <pjbull>`.
-
-   - Replace attribute ``named_steps`` ``dict`` to :class:`utils.Bunch`
-     in :class:`pipeline.Pipeline` to enable tab completion in interactive
-     environment. In the case conflict value on ``named_steps`` and ``dict``
-     attribute, ``dict`` behavior will be prioritized.
-     :issue:`8481` by :user:`Herilalaina Rakotoarison <herilalaina>`.
+- Deprecate the ``fit_params`` constructor input to the
+  :class:`model_selection.GridSearchCV` and
+  :class:`model_selection.RandomizedSearchCV` in favor
+  of passing keyword parameters to the ``fit`` methods
+  of those classes. Data-dependent parameters needed for model
+  training should be passed as keyword arguments to ``fit``,
+  and conforming to this convention will allow the hyperparameter
+  selection classes to be used with tools such as
+  :func:`model_selection.cross_val_predict`.
+  :issue:`2879` by :user:`Stephen Hoover <stephen-hoover>`.
+
+- In version 0.21, the default behavior of splitters that use the
+  ``test_size`` and ``train_size`` parameter will change, such that
+  specifying ``train_size`` alone will cause ``test_size`` to be the
+  remainder. :issue:`7459` by :user:`Nelson Liu <nelson-liu>`.
+
+- :class:`multiclass.OneVsRestClassifier` now has ``partial_fit``,
+  ``decision_function`` and ``predict_proba`` methods only when the
+  underlying estimator does.  :issue:`7812` by `Andreas Müller`_ and
+  :user:`Mikhail Korobov <kmike>`.
+
+- :class:`multiclass.OneVsRestClassifier` now has a ``partial_fit`` method
+  only if the underlying estimator does.  By `Andreas Müller`_.
+
+- The ``decision_function`` output shape for binary classification in
+  :class:`multiclass.OneVsRestClassifier` and
+  :class:`multiclass.OneVsOneClassifier` is now ``(n_samples,)`` to conform
+  to scikit-learn conventions. :issue:`9100` by `Andreas Müller`_.
+
+- The :func:`multioutput.MultiOutputClassifier.predict_proba`
+  function used to return a 3d array (``n_samples``, ``n_classes``,
+  ``n_outputs``). In the case where different target columns had different
+  numbers of classes, a ``ValueError`` would be raised on trying to stack
+  matrices with different dimensions. This function now returns a list of
+  arrays where the length of the list is ``n_outputs``, and each array is
+  (``n_samples``, ``n_classes``) for that particular output.
+  :issue:`8093` by :user:`Peter Bull <pjbull>`.
+
+- Replace attribute ``named_steps`` ``dict`` to :class:`utils.Bunch`
+  in :class:`pipeline.Pipeline` to enable tab completion in interactive
+  environment. In the case conflict value on ``named_steps`` and ``dict``
+  attribute, ``dict`` behavior will be prioritized.
+  :issue:`8481` by :user:`Herilalaina Rakotoarison <herilalaina>`.
 
 Miscellaneous
 
-   - Deprecate the ``y`` parameter in ``transform`` and ``inverse_transform``.
-     The method  should not accept ``y`` parameter, as it's used at the prediction time.
-     :issue:`8174` by :user:`Tahar Zanouda <tzano>`, `Alexandre Gramfort`_
-     and `Raghav RV`_.
-
-   - SciPy >= 0.13.3 and NumPy >= 1.8.2 are now the minimum supported versions
-     for scikit-learn. The following backported functions in
-     :mod:`utils` have been removed or deprecated accordingly.
-     :issue:`8854` and :issue:`8874` by :user:`Naoya Kanai <naoyak>`
-
-     Removed in 0.19:
-
-     - ``utils.fixes.argpartition``
-     - ``utils.fixes.array_equal``
-     - ``utils.fixes.astype``
-     - ``utils.fixes.bincount``
-     - ``utils.fixes.expit``
-     - ``utils.fixes.frombuffer_empty``
-     - ``utils.fixes.in1d``
-     - ``utils.fixes.norm``
-     - ``utils.fixes.rankdata``
-     - ``utils.fixes.safe_copy``
-
-     Deprecated in 0.19, to be removed in 0.21:
-
-     - ``utils.arpack.eigs``
-     - ``utils.arpack.eigsh``
-     - ``utils.arpack.svds``
-     - ``utils.extmath.fast_dot``
-     - ``utils.extmath.logsumexp``
-     - ``utils.extmath.norm``
-     - ``utils.extmath.pinvh``
-     - ``utils.graph.graph_laplacian``
-     - ``utils.random.choice``
-     - ``utils.sparsetools.connected_components``
-     - ``utils.stats.rankdata``
-
-   - Estimators with both methods ``decision_function`` and ``predict_proba``
-     are now required to have a monotonic relation between them. The
-     method ``check_decision_proba_consistency`` has been added in
-     **utils.estimator_checks** to check their consistency.
-     :issue:`7578` by :user:`Shubham Bhardwaj <shubham0704>`
-
-   - All checks in ``utils.estimator_checks``, in particular
-     :func:`utils.estimator_checks.check_estimator` now accept estimator
-     instances. Most other checks do not accept
-     estimator classes any more. :issue:`9019` by `Andreas Müller`_.
-
-   - Ensure that estimators' attributes ending with ``_`` are not set
-     in the constructor but only in the ``fit`` method. Most notably,
-     ensemble estimators (deriving from :class:`ensemble.BaseEnsemble`)
-     now only have ``self.estimators_`` available after ``fit``.
-     :issue:`7464` by `Lars Buitinck`_ and `Loic Esteve`_.
-
+- Deprecate the ``y`` parameter in ``transform`` and ``inverse_transform``.
+  The method  should not accept ``y`` parameter, as it's used at the prediction time.
+  :issue:`8174` by :user:`Tahar Zanouda <tzano>`, `Alexandre Gramfort`_
+  and `Raghav RV`_.
+
+- SciPy >= 0.13.3 and NumPy >= 1.8.2 are now the minimum supported versions
+  for scikit-learn. The following backported functions in
+  :mod:`utils` have been removed or deprecated accordingly.
+  :issue:`8854` and :issue:`8874` by :user:`Naoya Kanai <naoyak>`
+
+- The ``store_covariances`` and ``covariances_`` parameters of
+  :class:`discriminant_analysis.QuadraticDiscriminantAnalysis`
+  has been renamed to ``store_covariance`` and ``covariance_`` to be
+  consistent with the corresponding parameter names of the
+  :class:`discriminant_analysis.LinearDiscriminantAnalysis`. They will be
+  removed in version 0.21. :issue:`7998` by :user:`Jiacheng <mrbeann>`
+
+  Removed in 0.19:
+
+  - ``utils.fixes.argpartition``
+  - ``utils.fixes.array_equal``
+  - ``utils.fixes.astype``
+  - ``utils.fixes.bincount``
+  - ``utils.fixes.expit``
+  - ``utils.fixes.frombuffer_empty``
+  - ``utils.fixes.in1d``
+  - ``utils.fixes.norm``
+  - ``utils.fixes.rankdata``
+  - ``utils.fixes.safe_copy``
+
+  Deprecated in 0.19, to be removed in 0.21:
+
+  - ``utils.arpack.eigs``
+  - ``utils.arpack.eigsh``
+  - ``utils.arpack.svds``
+  - ``utils.extmath.fast_dot``
+  - ``utils.extmath.logsumexp``
+  - ``utils.extmath.norm``
+  - ``utils.extmath.pinvh``
+  - ``utils.graph.graph_laplacian``
+  - ``utils.random.choice``
+  - ``utils.sparsetools.connected_components``
+  - ``utils.stats.rankdata``
+
+- Estimators with both methods ``decision_function`` and ``predict_proba``
+  are now required to have a monotonic relation between them. The
+  method ``check_decision_proba_consistency`` has been added in
+  **utils.estimator_checks** to check their consistency.
+  :issue:`7578` by :user:`Shubham Bhardwaj <shubham0704>`
+
+- All checks in ``utils.estimator_checks``, in particular
+  :func:`utils.estimator_checks.check_estimator` now accept estimator
+  instances. Most other checks do not accept
+  estimator classes any more. :issue:`9019` by `Andreas Müller`_.
+
+- Ensure that estimators' attributes ending with ``_`` are not set
+  in the constructor but only in the ``fit`` method. Most notably,
+  ensemble estimators (deriving from :class:`ensemble.BaseEnsemble`)
+  now only have ``self.estimators_`` available after ``fit``.
+  :issue:`7464` by `Lars Buitinck`_ and `Loic Esteve`_.
+
+
+Code and Documentation Contributors
+-----------------------------------
+
+Thanks to everyone who has contributed to the maintenance and improvement of the
+project since version 0.18, including:
+
+Joel Nothman, Loic Esteve, Andreas Mueller, Guillaume Lemaitre, Olivier Grisel,
+Hanmin Qin, Raghav RV, Alexandre Gramfort, themrmax, Aman Dalmia, Gael
+Varoquaux, Naoya Kanai, Tom Dupré la Tour, Rishikesh, Nelson Liu, Taehoon Lee,
+Nelle Varoquaux, Aashil, Mikhail Korobov, Sebastin Santy, Joan Massich, Roman
+Yurchak, RAKOTOARISON Herilalaina, Thierry Guillemot, Alexandre Abadie, Carol
+Willing, Balakumaran Manoharan, Josh Karnofsky, Vlad Niculae, Utkarsh Upadhyay,
+Dmitry Petrov, Minghui Liu, Srivatsan, Vincent Pham, Albert Thomas, Jake
+VanderPlas, Attractadore, JC Liu, alexandercbooth, chkoar, Óscar Nájera,
+Aarshay Jain, Kyle Gilliam, Ramana Subramanyam, CJ Carey, Clement Joudet, David
+Robles, He Chen, Joris Van den Bossche, Karan Desai, Katie Luangkote, Leland
+McInnes, Maniteja Nandana, Michele Lacchia, Sergei Lebedev, Shubham Bhardwaj,
+akshay0724, omtcyfz, rickiepark, waterponey, Vathsala Achar, jbDelafosse, Ralf
+Gommers, Ekaterina Krivich, Vivek Kumar, Ishank Gulati, Dave Elliott, ldirer,
+Reiichiro Nakano, Levi John Wolf, Mathieu Blondel, Sid Kapur, Dougal J.
+Sutherland, midinas, mikebenfield, Sourav Singh, Aseem Bansal, Ibraim Ganiev,
+Stephen Hoover, AishwaryaRK, Steven C. Howell, Gary Foreman, Neeraj Gangwar,
+Tahar, Jon Crall, dokato, Kathy Chen, ferria, Thomas Moreau, Charlie Brummitt,
+Nicolas Goix, Adam Kleczewski, Sam Shleifer, Nikita Singh, Basil Beirouti,
+Giorgio Patrini, Manoj Kumar, Rafael Possas, James Bourbeau, James A. Bednar,
+Janine Harper, Jaye, Jean Helie, Jeremy Steward, Artsiom, John Wei, Jonathan
+LIgo, Jonathan Rahn, seanpwilliams, Arthur Mensch, Josh Levy, Julian Kuhlmann,
+Julien Aubert, Jörn Hees, Kai, shivamgargsya, Kat Hempstalk, Kaushik
+Lakshmikanth, Kennedy, Kenneth Lyons, Kenneth Myers, Kevin Yap, Kirill Bobyrev,
+Konstantin Podshumok, Arthur Imbert, Lee Murray, toastedcornflakes, Lera, Li
+Li, Arthur Douillard, Mainak Jas, tobycheese, Manraj Singh, Manvendra Singh,
+Marc Meketon, MarcoFalke, Matthew Brett, Matthias Gilch, Mehul Ahuja, Melanie
+Goetz, Meng, Peng, Michael Dezube, Michal Baumgartner, vibrantabhi19, Artem
+Golubin, Milen Paskov, Antonin Carette, Morikko, MrMjauh, NALEPA Emmanuel,
+Namiya, Antoine Wendlinger, Narine Kokhlikyan, NarineK, Nate Guerin, Angus
+Williams, Ang Lu, Nicole Vavrova, Nitish Pandey, Okhlopkov Daniil Olegovich,
+Andy Craze, Om Prakash, Parminder Singh, Patrick Carlson, Patrick Pei, Paul
+Ganssle, Paulo Haddad, Paweł Lorek, Peng Yu, Pete Bachant, Peter Bull, Peter
+Csizsek, Peter Wang, Pieter Arthur de Jong, Ping-Yao, Chang, Preston Parry,
+Puneet Mathur, Quentin Hibon, Andrew Smith, Andrew Jackson, 1kastner, Rameshwar
+Bhaskaran, Rebecca Bilbro, Remi Rampin, Andrea Esuli, Rob Hall, Robert
+Bradshaw, Romain Brault, Aman Pratik, Ruifeng Zheng, Russell Smith, Sachin
+Agarwal, Sailesh Choyal, Samson Tan, Samuël Weber, Sarah Brown, Sebastian
+Pölsterl, Sebastian Raschka, Sebastian Saeger, Alyssa Batula, Abhyuday Pratap
+Singh, Sergey Feldman, Sergul Aydore, Sharan Yalburgi, willduan, Siddharth
+Gupta, Sri Krishna, Almer, Stijn Tonk, Allen Riddell, Theofilos Papapanagiotou,
+Alison, Alexis Mignon, Tommy Boucher, Tommy Löfstedt, Toshihiro Kamishima,
+Tyler Folkman, Tyler Lanigan, Alexander Junge, Varun Shenoy, Victor Poughon,
+Vilhelm von Ehrenheim, Aleksandr Sandrovskii, Alan Yee, Vlasios Vasileiou,
+Warut Vijitbenjaronk, Yang Zhang, Yaroslav Halchenko, Yichuan Liu, Yuichi
+Fujikawa, affanv14, aivision2020, xor, andreh7, brady salz, campustrampus,
+Agamemnon Krasoulis, ditenberg, elena-sharova, filipj8, fukatani, gedeck,
+guiniol, guoci, hakaa1, hongkahjun, i-am-xhy, jakirkham, jaroslaw-weber,
+jayzed82, jeroko, jmontoyam, jonathan.striebel, josephsalmon, jschendel,
+leereeves, martin-hahn, mathurinm, mehak-sachdeva, mlewis1729, mlliou112,
+mthorrell, ndingwall, nuffe, yangarbiter, plagree, pldtc325, Breno Freitas,
+Brett Olsen, Brian A. Alfano, Brian Burns, polmauri, Brandon Carter, Charlton
+Austin, Chayant T15h, Chinmaya Pancholi, Christian Danielsen, Chung Yen,
+Chyi-Kwei Yau, pravarmahajan, DOHMATOB Elvis, Daniel LeJeune, Daniel Hnyk,
+Darius Morawiec, David DeTomaso, David Gasquez, David Haberthür, David
+Heryanto, David Kirkby, David Nicholson, rashchedrin, Deborah Gertrude Digges,
+Denis Engemann, Devansh D, Dickson, Bob Baxley, Don86, E. Lynch-Klarup, Ed
+Rogers, Elizabeth Ferriss, Ellen-Co2, Fabian Egli, Fang-Chieh Chou, Bing Tian
+Dai, Greg Stupp, Grzegorz Szpak, Bertrand Thirion, Hadrien Bertrand, Harizo
+Rajaona, zxcvbnius, Henry Lin, Holger Peters, Icyblade Dai, Igor
+Andriushchenko, Ilya, Isaac Laughlin, Iván Vallés, Aurélien Bellet, JPFrancoia,
+Jacob Schreiber, Asish Mahapatra
 
 .. _changes_0_18_2:
 
@@ -850,11 +970,11 @@ Version 0.18.2
 Changelog
 ---------
 
-    - Fixes for compatibility with NumPy 1.13.0: :issue:`7946` :issue:`8355` by
-      `Loic Esteve`_.
+- Fixes for compatibility with NumPy 1.13.0: :issue:`7946` :issue:`8355` by
+  `Loic Esteve`_.
 
-    - Minor compatibility changes in the examples :issue:`9010` :issue:`8040`
-      :issue:`9149`.
+- Minor compatibility changes in the examples :issue:`9010` :issue:`8040`
+  :issue:`9149`.
 
 Code Contributors
 -----------------
@@ -874,132 +994,132 @@ Changelog
 Enhancements
 ............
 
-   - Improved ``sample_without_replacement`` speed by utilizing
-     numpy.random.permutation for most cases. As a result,
-     samples may differ in this release for a fixed random state.
-     Affected estimators:
+- Improved ``sample_without_replacement`` speed by utilizing
+  numpy.random.permutation for most cases. As a result,
+  samples may differ in this release for a fixed random state.
+  Affected estimators:
 
-     - :class:`ensemble.BaggingClassifier`
-     - :class:`ensemble.BaggingRegressor`
-     - :class:`linear_model.RANSACRegressor`
-     - :class:`model_selection.RandomizedSearchCV`
-     - :class:`random_projection.SparseRandomProjection`
+  - :class:`ensemble.BaggingClassifier`
+  - :class:`ensemble.BaggingRegressor`
+  - :class:`linear_model.RANSACRegressor`
+  - :class:`model_selection.RandomizedSearchCV`
+  - :class:`random_projection.SparseRandomProjection`
 
-     This also affects the :meth:`datasets.make_classification`
-     method.
+  This also affects the :meth:`datasets.make_classification`
+  method.
 
 Bug fixes
 .........
 
-   - Fix issue where ``min_grad_norm`` and ``n_iter_without_progress``
-     parameters were not being utilised by :class:`manifold.TSNE`.
-     :issue:`6497` by :user:`Sebastian Säger <ssaeger>`
-
-   - Fix bug for svm's decision values when ``decision_function_shape``
-     is ``ovr`` in :class:`svm.SVC`.
-     :class:`svm.SVC`'s decision_function was incorrect from versions
-     0.17.0 through 0.18.0.
-     :issue:`7724` by `Bing Tian Dai`_
-
-   - Attribute ``explained_variance_ratio`` of
-     :class:`discriminant_analysis.LinearDiscriminantAnalysis` calculated
-     with SVD and Eigen solver are now of the same length. :issue:`7632`
-     by :user:`JPFrancoia <JPFrancoia>`
-
-   - Fixes issue in :ref:`univariate_feature_selection` where score
-     functions were not accepting multi-label targets. :issue:`7676`
-     by :user:`Mohammed Affan <affanv14>`
-
-   - Fixed setting parameters when calling ``fit`` multiple times on
-     :class:`feature_selection.SelectFromModel`. :issue:`7756` by `Andreas Müller`_
-
-   - Fixes issue in ``partial_fit`` method of
-     :class:`multiclass.OneVsRestClassifier` when number of classes used in
-     ``partial_fit`` was less than the total number of classes in the
-     data. :issue:`7786` by `Srivatsan Ramesh`_
-
-   - Fixes issue in :class:`calibration.CalibratedClassifierCV` where
-     the sum of probabilities of each class for a data was not 1, and
-     ``CalibratedClassifierCV`` now handles the case where the training set
-     has less number of classes than the total data. :issue:`7799` by
-     `Srivatsan Ramesh`_
-
-   - Fix a bug where :class:`sklearn.feature_selection.SelectFdr` did not
-     exactly implement Benjamini-Hochberg procedure. It formerly may have
-     selected fewer features than it should.
-     :issue:`7490` by :user:`Peng Meng <mpjlu>`.
-
-   - :class:`sklearn.manifold.LocallyLinearEmbedding` now correctly handles
-     integer inputs. :issue:`6282` by `Jake Vanderplas`_.
-
-   - The ``min_weight_fraction_leaf`` parameter of tree-based classifiers and
-     regressors now assumes uniform sample weights by default if the
-     ``sample_weight`` argument is not passed to the ``fit`` function.
-     Previously, the parameter was silently ignored. :issue:`7301`
-     by :user:`Nelson Liu <nelson-liu>`.
-
-   - Numerical issue with :class:`linear_model.RidgeCV` on centered data when
-     `n_features > n_samples`. :issue:`6178` by `Bertrand Thirion`_
-
-   - Tree splitting criterion classes' cloning/pickling is now memory safe
-     :issue:`7680` by :user:`Ibraim Ganiev <olologin>`.
-
-   - Fixed a bug where :class:`decomposition.NMF` sets its ``n_iters_``
-     attribute in `transform()`. :issue:`7553` by :user:`Ekaterina
-     Krivich <kiote>`.
-
-   - :class:`sklearn.linear_model.LogisticRegressionCV` now correctly handles
-     string labels. :issue:`5874` by `Raghav RV`_.
-
-   - Fixed a bug where :func:`sklearn.model_selection.train_test_split` raised
-     an error when ``stratify`` is a list of string labels. :issue:`7593` by
-     `Raghav RV`_.
-
-   - Fixed a bug where :class:`sklearn.model_selection.GridSearchCV` and
-     :class:`sklearn.model_selection.RandomizedSearchCV` were not pickleable
-     because of a pickling bug in ``np.ma.MaskedArray``. :issue:`7594` by
-     `Raghav RV`_.
-
-   - All cross-validation utilities in :mod:`sklearn.model_selection` now
-     permit one time cross-validation splitters for the ``cv`` parameter. Also
-     non-deterministic cross-validation splitters (where multiple calls to
-     ``split`` produce dissimilar splits) can be used as ``cv`` parameter.
-     The :class:`sklearn.model_selection.GridSearchCV` will cross-validate each
-     parameter setting on the split produced by the first ``split`` call
-     to the cross-validation splitter.  :issue:`7660` by `Raghav RV`_.
-
-   - Fix bug where :meth:`preprocessing.MultiLabelBinarizer.fit_transform`
-     returned an invalid CSR matrix.
-     :issue:`7750` by :user:`CJ Carey <perimosocordiae>`.
-
-   - Fixed a bug where :func:`metrics.pairwise.cosine_distances` could return a
-     small negative distance. :issue:`7732` by :user:`Artsion <asanakoy>`.
+- Fix issue where ``min_grad_norm`` and ``n_iter_without_progress``
+  parameters were not being utilised by :class:`manifold.TSNE`.
+  :issue:`6497` by :user:`Sebastian Säger <ssaeger>`
+
+- Fix bug for svm's decision values when ``decision_function_shape``
+  is ``ovr`` in :class:`svm.SVC`.
+  :class:`svm.SVC`'s decision_function was incorrect from versions
+  0.17.0 through 0.18.0.
+  :issue:`7724` by `Bing Tian Dai`_
+
+- Attribute ``explained_variance_ratio`` of
+  :class:`discriminant_analysis.LinearDiscriminantAnalysis` calculated
+  with SVD and Eigen solver are now of the same length. :issue:`7632`
+  by :user:`JPFrancoia <JPFrancoia>`
+
+- Fixes issue in :ref:`univariate_feature_selection` where score
+  functions were not accepting multi-label targets. :issue:`7676`
+  by :user:`Mohammed Affan <affanv14>`
+
+- Fixed setting parameters when calling ``fit`` multiple times on
+  :class:`feature_selection.SelectFromModel`. :issue:`7756` by `Andreas Müller`_
+
+- Fixes issue in ``partial_fit`` method of
+  :class:`multiclass.OneVsRestClassifier` when number of classes used in
+  ``partial_fit`` was less than the total number of classes in the
+  data. :issue:`7786` by `Srivatsan Ramesh`_
+
+- Fixes issue in :class:`calibration.CalibratedClassifierCV` where
+  the sum of probabilities of each class for a data was not 1, and
+  ``CalibratedClassifierCV`` now handles the case where the training set
+  has less number of classes than the total data. :issue:`7799` by
+  `Srivatsan Ramesh`_
+
+- Fix a bug where :class:`sklearn.feature_selection.SelectFdr` did not
+  exactly implement Benjamini-Hochberg procedure. It formerly may have
+  selected fewer features than it should.
+  :issue:`7490` by :user:`Peng Meng <mpjlu>`.
+
+- :class:`sklearn.manifold.LocallyLinearEmbedding` now correctly handles
+  integer inputs. :issue:`6282` by `Jake Vanderplas`_.
+
+- The ``min_weight_fraction_leaf`` parameter of tree-based classifiers and
+  regressors now assumes uniform sample weights by default if the
+  ``sample_weight`` argument is not passed to the ``fit`` function.
+  Previously, the parameter was silently ignored. :issue:`7301`
+  by :user:`Nelson Liu <nelson-liu>`.
+
+- Numerical issue with :class:`linear_model.RidgeCV` on centered data when
+  `n_features > n_samples`. :issue:`6178` by `Bertrand Thirion`_
+
+- Tree splitting criterion classes' cloning/pickling is now memory safe
+  :issue:`7680` by :user:`Ibraim Ganiev <olologin>`.
+
+- Fixed a bug where :class:`decomposition.NMF` sets its ``n_iters_``
+  attribute in `transform()`. :issue:`7553` by :user:`Ekaterina
+  Krivich <kiote>`.
+
+- :class:`sklearn.linear_model.LogisticRegressionCV` now correctly handles
+  string labels. :issue:`5874` by `Raghav RV`_.
+
+- Fixed a bug where :func:`sklearn.model_selection.train_test_split` raised
+  an error when ``stratify`` is a list of string labels. :issue:`7593` by
+  `Raghav RV`_.
+
+- Fixed a bug where :class:`sklearn.model_selection.GridSearchCV` and
+  :class:`sklearn.model_selection.RandomizedSearchCV` were not pickleable
+  because of a pickling bug in ``np.ma.MaskedArray``. :issue:`7594` by
+  `Raghav RV`_.
+
+- All cross-validation utilities in :mod:`sklearn.model_selection` now
+  permit one time cross-validation splitters for the ``cv`` parameter. Also
+  non-deterministic cross-validation splitters (where multiple calls to
+  ``split`` produce dissimilar splits) can be used as ``cv`` parameter.
+  The :class:`sklearn.model_selection.GridSearchCV` will cross-validate each
+  parameter setting on the split produced by the first ``split`` call
+  to the cross-validation splitter.  :issue:`7660` by `Raghav RV`_.
+
+- Fix bug where :meth:`preprocessing.MultiLabelBinarizer.fit_transform`
+  returned an invalid CSR matrix.
+  :issue:`7750` by :user:`CJ Carey <perimosocordiae>`.
+
+- Fixed a bug where :func:`metrics.pairwise.cosine_distances` could return a
+  small negative distance. :issue:`7732` by :user:`Artsion <asanakoy>`.
 
 API changes summary
 -------------------
 
 Trees and forests
 
-   - The ``min_weight_fraction_leaf`` parameter of tree-based classifiers and
-     regressors now assumes uniform sample weights by default if the
-     ``sample_weight`` argument is not passed to the ``fit`` function.
-     Previously, the parameter was silently ignored. :issue:`7301` by :user:`Nelson
-     Liu <nelson-liu>`.
+- The ``min_weight_fraction_leaf`` parameter of tree-based classifiers and
+  regressors now assumes uniform sample weights by default if the
+  ``sample_weight`` argument is not passed to the ``fit`` function.
+  Previously, the parameter was silently ignored. :issue:`7301` by :user:`Nelson
+  Liu <nelson-liu>`.
 
-   - Tree splitting criterion classes' cloning/pickling is now memory safe.
-     :issue:`7680` by :user:`Ibraim Ganiev <olologin>`.
+- Tree splitting criterion classes' cloning/pickling is now memory safe.
+  :issue:`7680` by :user:`Ibraim Ganiev <olologin>`.
 
 
 Linear, kernelized and related models
 
-   - Length of ``explained_variance_ratio`` of
-     :class:`discriminant_analysis.LinearDiscriminantAnalysis`
-     changed for both Eigen and SVD solvers. The attribute has now a length
-     of min(n_components, n_classes - 1). :issue:`7632`
-     by :user:`JPFrancoia <JPFrancoia>`
+- Length of ``explained_variance_ratio`` of
+  :class:`discriminant_analysis.LinearDiscriminantAnalysis`
+  changed for both Eigen and SVD solvers. The attribute has now a length
+  of min(n_components, n_classes - 1). :issue:`7632`
+  by :user:`JPFrancoia <JPFrancoia>`
 
-   - Numerical issue with :class:`linear_model.RidgeCV` on centered data when
-     ``n_features > n_samples``. :issue:`6178` by `Bertrand Thirion`_
+- Numerical issue with :class:`linear_model.RidgeCV` on centered data when
+  ``n_features > n_samples``. :issue:`6178` by `Bertrand Thirion`_
 
 .. _changes_0_18:
 
@@ -1018,101 +1138,101 @@ Version 0.18
 Model Selection Enhancements and API Changes
 --------------------------------------------
 
-  - **The model_selection module**
+- **The model_selection module**
 
-    The new module :mod:`sklearn.model_selection`, which groups together the
-    functionalities of formerly :mod:`sklearn.cross_validation`,
-    :mod:`sklearn.grid_search` and :mod:`sklearn.learning_curve`, introduces new
-    possibilities such as nested cross-validation and better manipulation of
-    parameter searches with Pandas.
+  The new module :mod:`sklearn.model_selection`, which groups together the
+  functionalities of formerly :mod:`sklearn.cross_validation`,
+  :mod:`sklearn.grid_search` and :mod:`sklearn.learning_curve`, introduces new
+  possibilities such as nested cross-validation and better manipulation of
+  parameter searches with Pandas.
 
-    Many things will stay the same but there are some key differences. Read
-    below to know more about the changes.
+  Many things will stay the same but there are some key differences. Read
+  below to know more about the changes.
 
-  - **Data-independent CV splitters enabling nested cross-validation**
+- **Data-independent CV splitters enabling nested cross-validation**
 
-    The new cross-validation splitters, defined in the
-    :mod:`sklearn.model_selection`, are no longer initialized with any
-    data-dependent parameters such as ``y``. Instead they expose a
-    :func:`split` method that takes in the data and yields a generator for the
-    different splits.
+  The new cross-validation splitters, defined in the
+  :mod:`sklearn.model_selection`, are no longer initialized with any
+  data-dependent parameters such as ``y``. Instead they expose a
+  :func:`split` method that takes in the data and yields a generator for the
+  different splits.
 
-    This change makes it possible to use the cross-validation splitters to
-    perform nested cross-validation, facilitated by
-    :class:`model_selection.GridSearchCV` and
-    :class:`model_selection.RandomizedSearchCV` utilities.
+  This change makes it possible to use the cross-validation splitters to
+  perform nested cross-validation, facilitated by
+  :class:`model_selection.GridSearchCV` and
+  :class:`model_selection.RandomizedSearchCV` utilities.
 
-  - **The enhanced cv_results_ attribute**
+- **The enhanced cv_results_ attribute**
 
-    The new ``cv_results_`` attribute (of :class:`model_selection.GridSearchCV`
-    and :class:`model_selection.RandomizedSearchCV`) introduced in lieu of the
-    ``grid_scores_`` attribute is a dict of 1D arrays with elements in each
-    array corresponding to the parameter settings (i.e. search candidates).
+  The new ``cv_results_`` attribute (of :class:`model_selection.GridSearchCV`
+  and :class:`model_selection.RandomizedSearchCV`) introduced in lieu of the
+  ``grid_scores_`` attribute is a dict of 1D arrays with elements in each
+  array corresponding to the parameter settings (i.e. search candidates).
 
-    The ``cv_results_`` dict can be easily imported into ``pandas`` as a
-    ``DataFrame`` for exploring the search results.
+  The ``cv_results_`` dict can be easily imported into ``pandas`` as a
+  ``DataFrame`` for exploring the search results.
 
-    The ``cv_results_`` arrays include scores for each cross-validation split
-    (with keys such as ``'split0_test_score'``), as well as their mean
-    (``'mean_test_score'``) and standard deviation (``'std_test_score'``).
+  The ``cv_results_`` arrays include scores for each cross-validation split
+  (with keys such as ``'split0_test_score'``), as well as their mean
+  (``'mean_test_score'``) and standard deviation (``'std_test_score'``).
 
-    The ranks for the search candidates (based on their mean
-    cross-validation score) is available at ``cv_results_['rank_test_score']``.
+  The ranks for the search candidates (based on their mean
+  cross-validation score) is available at ``cv_results_['rank_test_score']``.
 
-    The parameter values for each parameter is stored separately as numpy
-    masked object arrays. The value, for that search candidate, is masked if
-    the corresponding parameter is not applicable. Additionally a list of all
-    the parameter dicts are stored at ``cv_results_['params']``.
+  The parameter values for each parameter is stored separately as numpy
+  masked object arrays. The value, for that search candidate, is masked if
+  the corresponding parameter is not applicable. Additionally a list of all
+  the parameter dicts are stored at ``cv_results_['params']``.
 
-  - **Parameters n_folds and n_iter renamed to n_splits**
+- **Parameters n_folds and n_iter renamed to n_splits**
 
-    Some parameter names have changed:
-    The ``n_folds`` parameter in new :class:`model_selection.KFold`,
-    :class:`model_selection.GroupKFold` (see below for the name change),
-    and :class:`model_selection.StratifiedKFold` is now renamed to
-    ``n_splits``. The ``n_iter`` parameter in
-    :class:`model_selection.ShuffleSplit`, the new class
-    :class:`model_selection.GroupShuffleSplit` and
-    :class:`model_selection.StratifiedShuffleSplit` is now renamed to
-    ``n_splits``.
+  Some parameter names have changed:
+  The ``n_folds`` parameter in new :class:`model_selection.KFold`,
+  :class:`model_selection.GroupKFold` (see below for the name change),
+  and :class:`model_selection.StratifiedKFold` is now renamed to
+  ``n_splits``. The ``n_iter`` parameter in
+  :class:`model_selection.ShuffleSplit`, the new class
+  :class:`model_selection.GroupShuffleSplit` and
+  :class:`model_selection.StratifiedShuffleSplit` is now renamed to
+  ``n_splits``.
 
-  - **Rename of splitter classes which accepts group labels along with data**
+- **Rename of splitter classes which accepts group labels along with data**
 
-    The cross-validation splitters ``LabelKFold``,
-    ``LabelShuffleSplit``, ``LeaveOneLabelOut`` and ``LeavePLabelOut`` have
-    been renamed to :class:`model_selection.GroupKFold`,
-    :class:`model_selection.GroupShuffleSplit`,
-    :class:`model_selection.LeaveOneGroupOut` and
-    :class:`model_selection.LeavePGroupsOut` respectively.
+  The cross-validation splitters ``LabelKFold``,
+  ``LabelShuffleSplit``, ``LeaveOneLabelOut`` and ``LeavePLabelOut`` have
+  been renamed to :class:`model_selection.GroupKFold`,
+  :class:`model_selection.GroupShuffleSplit`,
+  :class:`model_selection.LeaveOneGroupOut` and
+  :class:`model_selection.LeavePGroupsOut` respectively.
 
-    Note the change from singular to plural form in
-    :class:`model_selection.LeavePGroupsOut`.
+  Note the change from singular to plural form in
+  :class:`model_selection.LeavePGroupsOut`.
 
-  - **Fit parameter labels renamed to groups**
+- **Fit parameter labels renamed to groups**
 
-    The ``labels`` parameter in the :func:`split` method of the newly renamed
-    splitters :class:`model_selection.GroupKFold`,
-    :class:`model_selection.LeaveOneGroupOut`,
-    :class:`model_selection.LeavePGroupsOut`,
-    :class:`model_selection.GroupShuffleSplit` is renamed to ``groups``
-    following the new nomenclature of their class names.
+  The ``labels`` parameter in the :func:`split` method of the newly renamed
+  splitters :class:`model_selection.GroupKFold`,
+  :class:`model_selection.LeaveOneGroupOut`,
+  :class:`model_selection.LeavePGroupsOut`,
+  :class:`model_selection.GroupShuffleSplit` is renamed to ``groups``
+  following the new nomenclature of their class names.
 
-  - **Parameter n_labels renamed to n_groups**
+- **Parameter n_labels renamed to n_groups**
 
-    The parameter ``n_labels`` in the newly renamed
-    :class:`model_selection.LeavePGroupsOut` is changed to ``n_groups``.
+  The parameter ``n_labels`` in the newly renamed
+  :class:`model_selection.LeavePGroupsOut` is changed to ``n_groups``.
 
-  - Training scores and Timing information
+- Training scores and Timing information
 
-    ``cv_results_`` also includes the training scores for each
-    cross-validation split (with keys such as ``'split0_train_score'``), as
-    well as their mean (``'mean_train_score'``) and standard deviation
-    (``'std_train_score'``). To avoid the cost of evaluating training score,
-    set ``return_train_score=False``.
+  ``cv_results_`` also includes the training scores for each
+  cross-validation split (with keys such as ``'split0_train_score'``), as
+  well as their mean (``'mean_train_score'``) and standard deviation
+  (``'std_train_score'``). To avoid the cost of evaluating training score,
+  set ``return_train_score=False``.
 
-    Additionally the mean and standard deviation of the times taken to split,
-    train and score the model across all the cross-validation splits is
-    available at the key ``'mean_time'`` and ``'std_time'`` respectively.
+  Additionally the mean and standard deviation of the times taken to split,
+  train and score the model across all the cross-validation splits is
+  available at the key ``'mean_time'`` and ``'std_time'`` respectively.
 
 Changelog
 ---------
@@ -1122,399 +1242,399 @@ New features
 
 Classifiers and Regressors
 
-   - The Gaussian Process module has been reimplemented and now offers classification
-     and regression estimators through :class:`gaussian_process.GaussianProcessClassifier`
-     and  :class:`gaussian_process.GaussianProcessRegressor`. Among other things, the new
-     implementation supports kernel engineering, gradient-based hyperparameter optimization or
-     sampling of functions from GP prior and GP posterior. Extensive documentation and
-     examples are provided. By `Jan Hendrik Metzen`_.
+- The Gaussian Process module has been reimplemented and now offers classification
+  and regression estimators through :class:`gaussian_process.GaussianProcessClassifier`
+  and  :class:`gaussian_process.GaussianProcessRegressor`. Among other things, the new
+  implementation supports kernel engineering, gradient-based hyperparameter optimization or
+  sampling of functions from GP prior and GP posterior. Extensive documentation and
+  examples are provided. By `Jan Hendrik Metzen`_.
 
-   - Added new supervised learning algorithm: :ref:`Multi-layer Perceptron <multilayer_perceptron>`
-     :issue:`3204` by :user:`Issam H. Laradji <IssamLaradji>`
+- Added new supervised learning algorithm: :ref:`Multi-layer Perceptron <multilayer_perceptron>`
+  :issue:`3204` by :user:`Issam H. Laradji <IssamLaradji>`
 
-   - Added :class:`linear_model.HuberRegressor`, a linear model robust to outliers.
-     :issue:`5291` by `Manoj Kumar`_.
+- Added :class:`linear_model.HuberRegressor`, a linear model robust to outliers.
+  :issue:`5291` by `Manoj Kumar`_.
 
-   - Added the :class:`multioutput.MultiOutputRegressor` meta-estimator. It
-     converts single output regressors to multi-output regressors by fitting
-     one regressor per output. By :user:`Tim Head <betatim>`.
+- Added the :class:`multioutput.MultiOutputRegressor` meta-estimator. It
+  converts single output regressors to multi-output regressors by fitting
+  one regressor per output. By :user:`Tim Head <betatim>`.
 
 Other estimators
 
-   - New :class:`mixture.GaussianMixture` and :class:`mixture.BayesianGaussianMixture`
-     replace former mixture models, employing faster inference
-     for sounder results. :issue:`7295` by :user:`Wei Xue <xuewei4d>` and
-     :user:`Thierry Guillemot <tguillemot>`.
+- New :class:`mixture.GaussianMixture` and :class:`mixture.BayesianGaussianMixture`
+  replace former mixture models, employing faster inference
+  for sounder results. :issue:`7295` by :user:`Wei Xue <xuewei4d>` and
+  :user:`Thierry Guillemot <tguillemot>`.
 
-   - Class :class:`decomposition.RandomizedPCA` is now factored into :class:`decomposition.PCA`
-     and it is available calling with parameter ``svd_solver='randomized'``.
-     The default number of ``n_iter`` for ``'randomized'`` has changed to 4. The old
-     behavior of PCA is recovered by ``svd_solver='full'``. An additional solver
-     calls ``arpack`` and performs truncated (non-randomized) SVD. By default,
-     the best solver is selected depending on the size of the input and the
-     number of components requested. :issue:`5299` by :user:`Giorgio Patrini <giorgiop>`.
+- Class :class:`decomposition.RandomizedPCA` is now factored into :class:`decomposition.PCA`
+  and it is available calling with parameter ``svd_solver='randomized'``.
+  The default number of ``n_iter`` for ``'randomized'`` has changed to 4. The old
+  behavior of PCA is recovered by ``svd_solver='full'``. An additional solver
+  calls ``arpack`` and performs truncated (non-randomized) SVD. By default,
+  the best solver is selected depending on the size of the input and the
+  number of components requested. :issue:`5299` by :user:`Giorgio Patrini <giorgiop>`.
 
-   - Added two functions for mutual information estimation:
-     :func:`feature_selection.mutual_info_classif` and
-     :func:`feature_selection.mutual_info_regression`. These functions can be
-     used in :class:`feature_selection.SelectKBest` and
-     :class:`feature_selection.SelectPercentile` as score functions.
-     By :user:`Andrea Bravi <AndreaBravi>` and :user:`Nikolay Mayorov <nmayorov>`.
+- Added two functions for mutual information estimation:
+  :func:`feature_selection.mutual_info_classif` and
+  :func:`feature_selection.mutual_info_regression`. These functions can be
+  used in :class:`feature_selection.SelectKBest` and
+  :class:`feature_selection.SelectPercentile` as score functions.
+  By :user:`Andrea Bravi <AndreaBravi>` and :user:`Nikolay Mayorov <nmayorov>`.
 
-   - Added the :class:`ensemble.IsolationForest` class for anomaly detection based on
-     random forests. By `Nicolas Goix`_.
+- Added the :class:`ensemble.IsolationForest` class for anomaly detection based on
+  random forests. By `Nicolas Goix`_.
 
-   - Added ``algorithm="elkan"`` to :class:`cluster.KMeans` implementing
-     Elkan's fast K-Means algorithm. By `Andreas Müller`_.
+- Added ``algorithm="elkan"`` to :class:`cluster.KMeans` implementing
+  Elkan's fast K-Means algorithm. By `Andreas Müller`_.
 
 Model selection and evaluation
 
-   - Added :func:`metrics.cluster.fowlkes_mallows_score`, the Fowlkes Mallows
-     Index which measures the similarity of two clusterings of a set of points
-     By :user:`Arnaud Fouchet <afouchet>` and :user:`Thierry Guillemot <tguillemot>`.
+- Added :func:`metrics.cluster.fowlkes_mallows_score`, the Fowlkes Mallows
+  Index which measures the similarity of two clusterings of a set of points
+  By :user:`Arnaud Fouchet <afouchet>` and :user:`Thierry Guillemot <tguillemot>`.
 
-   - Added :func:`metrics.calinski_harabaz_score`, which computes the Calinski
-     and Harabaz score to evaluate the resulting clustering of a set of points.
-     By :user:`Arnaud Fouchet <afouchet>` and :user:`Thierry Guillemot <tguillemot>`.
+- Added :func:`metrics.calinski_harabaz_score`, which computes the Calinski
+  and Harabaz score to evaluate the resulting clustering of a set of points.
+  By :user:`Arnaud Fouchet <afouchet>` and :user:`Thierry Guillemot <tguillemot>`.
 
-   - Added new cross-validation splitter
-     :class:`model_selection.TimeSeriesSplit` to handle time series data.
-     :issue:`6586` by :user:`YenChen Lin <yenchenlin>`
+- Added new cross-validation splitter
+  :class:`model_selection.TimeSeriesSplit` to handle time series data.
+  :issue:`6586` by :user:`YenChen Lin <yenchenlin>`
 
-   - The cross-validation iterators are replaced by cross-validation splitters
-     available from :mod:`sklearn.model_selection`, allowing for nested
-     cross-validation. See :ref:`model_selection_changes` for more information.
-     :issue:`4294` by `Raghav RV`_.
+- The cross-validation iterators are replaced by cross-validation splitters
+  available from :mod:`sklearn.model_selection`, allowing for nested
+  cross-validation. See :ref:`model_selection_changes` for more information.
+  :issue:`4294` by `Raghav RV`_.
 
 Enhancements
 ............
 
 Trees and ensembles
 
-   - Added a new splitting criterion for :class:`tree.DecisionTreeRegressor`,
-     the mean absolute error. This criterion can also be used in
-     :class:`ensemble.ExtraTreesRegressor`,
-     :class:`ensemble.RandomForestRegressor`, and the gradient boosting
-     estimators. :issue:`6667` by :user:`Nelson Liu <nelson-liu>`.
+- Added a new splitting criterion for :class:`tree.DecisionTreeRegressor`,
+  the mean absolute error. This criterion can also be used in
+  :class:`ensemble.ExtraTreesRegressor`,
+  :class:`ensemble.RandomForestRegressor`, and the gradient boosting
+  estimators. :issue:`6667` by :user:`Nelson Liu <nelson-liu>`.
 
-   - Added weighted impurity-based early stopping criterion for decision tree
-     growth. :issue:`6954` by :user:`Nelson Liu <nelson-liu>`
+- Added weighted impurity-based early stopping criterion for decision tree
+  growth. :issue:`6954` by :user:`Nelson Liu <nelson-liu>`
 
-   - The random forest, extra tree and decision tree estimators now has a
-     method ``decision_path`` which returns the decision path of samples in
-     the tree. By `Arnaud Joly`_.
+- The random forest, extra tree and decision tree estimators now has a
+  method ``decision_path`` which returns the decision path of samples in
+  the tree. By `Arnaud Joly`_.
 
-   - A new example has been added unveiling the decision tree structure.
-     By `Arnaud Joly`_.
+- A new example has been added unveiling the decision tree structure.
+  By `Arnaud Joly`_.
 
-   - Random forest, extra trees, decision trees and gradient boosting estimator
-     accept the parameter ``min_samples_split`` and ``min_samples_leaf``
-     provided as a percentage of the training samples. By :user:`yelite <yelite>` and `Arnaud Joly`_.
+- Random forest, extra trees, decision trees and gradient boosting estimator
+  accept the parameter ``min_samples_split`` and ``min_samples_leaf``
+  provided as a percentage of the training samples. By :user:`yelite <yelite>` and `Arnaud Joly`_.
 
-   - Gradient boosting estimators accept the parameter ``criterion`` to specify
-     to splitting criterion used in built decision trees.
-     :issue:`6667` by :user:`Nelson Liu <nelson-liu>`.
+- Gradient boosting estimators accept the parameter ``criterion`` to specify
+  to splitting criterion used in built decision trees.
+  :issue:`6667` by :user:`Nelson Liu <nelson-liu>`.
 
-   - The memory footprint is reduced (sometimes greatly) for
-     :class:`ensemble.bagging.BaseBagging` and classes that inherit from it,
-     i.e, :class:`ensemble.BaggingClassifier`,
-     :class:`ensemble.BaggingRegressor`, and :class:`ensemble.IsolationForest`,
-     by dynamically generating attribute ``estimators_samples_`` only when it is
-     needed. By :user:`David Staub <staubda>`.
+- The memory footprint is reduced (sometimes greatly) for
+  :class:`ensemble.bagging.BaseBagging` and classes that inherit from it,
+  i.e, :class:`ensemble.BaggingClassifier`,
+  :class:`ensemble.BaggingRegressor`, and :class:`ensemble.IsolationForest`,
+  by dynamically generating attribute ``estimators_samples_`` only when it is
+  needed. By :user:`David Staub <staubda>`.
 
-   - Added ``n_jobs`` and ``sample_weight`` parameters for
-     :class:`ensemble.VotingClassifier` to fit underlying estimators in parallel.
-     :issue:`5805` by :user:`Ibraim Ganiev <olologin>`.
+- Added ``n_jobs`` and ``sample_weight`` parameters for
+  :class:`ensemble.VotingClassifier` to fit underlying estimators in parallel.
+  :issue:`5805` by :user:`Ibraim Ganiev <olologin>`.
 
 Linear, kernelized and related models
 
-   - In :class:`linear_model.LogisticRegression`, the SAG solver is now
-     available in the multinomial case. :issue:`5251` by `Tom Dupre la Tour`_.
+- In :class:`linear_model.LogisticRegression`, the SAG solver is now
+  available in the multinomial case. :issue:`5251` by `Tom Dupre la Tour`_.
 
-   - :class:`linear_model.RANSACRegressor`, :class:`svm.LinearSVC` and
-     :class:`svm.LinearSVR` now support ``sample_weight``.
-     By :user:`Imaculate <Imaculate>`.
+- :class:`linear_model.RANSACRegressor`, :class:`svm.LinearSVC` and
+  :class:`svm.LinearSVR` now support ``sample_weight``.
+  By :user:`Imaculate <Imaculate>`.
 
-   - Add parameter ``loss`` to :class:`linear_model.RANSACRegressor` to measure the
-     error on the samples for every trial. By `Manoj Kumar`_.
+- Add parameter ``loss`` to :class:`linear_model.RANSACRegressor` to measure the
+  error on the samples for every trial. By `Manoj Kumar`_.
 
-   - Prediction of out-of-sample events with Isotonic Regression
-     (:class:`isotonic.IsotonicRegression`) is now much faster (over 1000x in tests with synthetic
-     data). By :user:`Jonathan Arfa <jarfa>`.
+- Prediction of out-of-sample events with Isotonic Regression
+  (:class:`isotonic.IsotonicRegression`) is now much faster (over 1000x in tests with synthetic
+  data). By :user:`Jonathan Arfa <jarfa>`.
 
-   - Isotonic regression (:class:`isotonic.IsotonicRegression`) now uses a better algorithm to avoid
-     `O(n^2)` behavior in pathological cases, and is also generally faster
-     (:issue:`#6691`). By `Antony Lee`_.
+- Isotonic regression (:class:`isotonic.IsotonicRegression`) now uses a better algorithm to avoid
+  `O(n^2)` behavior in pathological cases, and is also generally faster
+  (:issue:`#6691`). By `Antony Lee`_.
 
-   - :class:`naive_bayes.GaussianNB` now accepts data-independent class-priors
-     through the parameter ``priors``. By :user:`Guillaume Lemaitre <glemaitre>`.
+- :class:`naive_bayes.GaussianNB` now accepts data-independent class-priors
+  through the parameter ``priors``. By :user:`Guillaume Lemaitre <glemaitre>`.
 
-   - :class:`linear_model.ElasticNet` and :class:`linear_model.Lasso`
-     now works with ``np.float32`` input data without converting it
-     into ``np.float64``. This allows to reduce the memory
-     consumption. :issue:`6913` by :user:`YenChen Lin <yenchenlin>`.
+- :class:`linear_model.ElasticNet` and :class:`linear_model.Lasso`
+  now works with ``np.float32`` input data without converting it
+  into ``np.float64``. This allows to reduce the memory
+  consumption. :issue:`6913` by :user:`YenChen Lin <yenchenlin>`.
 
-   - :class:`semi_supervised.LabelPropagation` and :class:`semi_supervised.LabelSpreading`
-     now accept arbitrary kernel functions in addition to strings ``knn`` and ``rbf``.
-     :issue:`5762` by :user:`Utkarsh Upadhyay <musically-ut>`.
+- :class:`semi_supervised.LabelPropagation` and :class:`semi_supervised.LabelSpreading`
+  now accept arbitrary kernel functions in addition to strings ``knn`` and ``rbf``.
+  :issue:`5762` by :user:`Utkarsh Upadhyay <musically-ut>`.
 
 Decomposition, manifold learning and clustering
 
-   - Added ``inverse_transform`` function to :class:`decomposition.NMF` to compute
-     data matrix of original shape. By :user:`Anish Shah <AnishShah>`.
+- Added ``inverse_transform`` function to :class:`decomposition.NMF` to compute
+  data matrix of original shape. By :user:`Anish Shah <AnishShah>`.
 
-   - :class:`cluster.KMeans` and :class:`cluster.MiniBatchKMeans` now works
-     with ``np.float32`` and ``np.float64`` input data without converting it.
-     This allows to reduce the memory consumption by using ``np.float32``.
-     :issue:`6846` by :user:`Sebastian Säger <ssaeger>` and
-     :user:`YenChen Lin <yenchenlin>`.
+- :class:`cluster.KMeans` and :class:`cluster.MiniBatchKMeans` now works
+  with ``np.float32`` and ``np.float64`` input data without converting it.
+  This allows to reduce the memory consumption by using ``np.float32``.
+  :issue:`6846` by :user:`Sebastian Säger <ssaeger>` and
+  :user:`YenChen Lin <yenchenlin>`.
 
 Preprocessing and feature selection
 
-   - :class:`preprocessing.RobustScaler` now accepts ``quantile_range`` parameter.
-     :issue:`5929` by :user:`Konstantin Podshumok <podshumok>`.
+- :class:`preprocessing.RobustScaler` now accepts ``quantile_range`` parameter.
+  :issue:`5929` by :user:`Konstantin Podshumok <podshumok>`.
 
-   - :class:`feature_extraction.FeatureHasher` now accepts string values.
-     :issue:`6173` by :user:`Ryad Zenine <ryadzenine>` and
-     :user:`Devashish Deshpande <dsquareindia>`.
+- :class:`feature_extraction.FeatureHasher` now accepts string values.
+  :issue:`6173` by :user:`Ryad Zenine <ryadzenine>` and
+  :user:`Devashish Deshpande <dsquareindia>`.
 
-   - Keyword arguments can now be supplied to ``func`` in
-     :class:`preprocessing.FunctionTransformer` by means of the ``kw_args``
-     parameter. By `Brian McFee`_.
+- Keyword arguments can now be supplied to ``func`` in
+  :class:`preprocessing.FunctionTransformer` by means of the ``kw_args``
+  parameter. By `Brian McFee`_.
 
-   - :class:`feature_selection.SelectKBest` and :class:`feature_selection.SelectPercentile`
-     now accept score functions that take X, y as input and return only the scores.
-     By :user:`Nikolay Mayorov <nmayorov>`.
+- :class:`feature_selection.SelectKBest` and :class:`feature_selection.SelectPercentile`
+  now accept score functions that take X, y as input and return only the scores.
+  By :user:`Nikolay Mayorov <nmayorov>`.
 
 Model evaluation and meta-estimators
 
-   - :class:`multiclass.OneVsOneClassifier` and :class:`multiclass.OneVsRestClassifier`
-     now support ``partial_fit``. By :user:`Asish Panda <kaichogami>` and
-     :user:`Philipp Dowling <phdowling>`.
+- :class:`multiclass.OneVsOneClassifier` and :class:`multiclass.OneVsRestClassifier`
+  now support ``partial_fit``. By :user:`Asish Panda <kaichogami>` and
+  :user:`Philipp Dowling <phdowling>`.
 
-   - Added support for substituting or disabling :class:`pipeline.Pipeline`
-     and :class:`pipeline.FeatureUnion` components using the ``set_params``
-     interface that powers :mod:`sklearn.grid_search`.
-     See :ref:`sphx_glr_auto_examples_plot_compare_reduction.py`
-     By `Joel Nothman`_ and :user:`Robert McGibbon <rmcgibbo>`.
+- Added support for substituting or disabling :class:`pipeline.Pipeline`
+  and :class:`pipeline.FeatureUnion` components using the ``set_params``
+  interface that powers :mod:`sklearn.grid_search`.
+  See :ref:`sphx_glr_auto_examples_plot_compare_reduction.py`
+  By `Joel Nothman`_ and :user:`Robert McGibbon <rmcgibbo>`.
 
-   - The new ``cv_results_`` attribute of :class:`model_selection.GridSearchCV`
-     (and :class:`model_selection.RandomizedSearchCV`) can be easily imported
-     into pandas as a ``DataFrame``. Ref :ref:`model_selection_changes` for
-     more information. :issue:`6697` by `Raghav RV`_.
+- The new ``cv_results_`` attribute of :class:`model_selection.GridSearchCV`
+  (and :class:`model_selection.RandomizedSearchCV`) can be easily imported
+  into pandas as a ``DataFrame``. Ref :ref:`model_selection_changes` for
+  more information. :issue:`6697` by `Raghav RV`_.
 
-   - Generalization of :func:`model_selection.cross_val_predict`.
-     One can pass method names such as `predict_proba` to be used in the cross
-     validation framework instead of the default `predict`.
-     By :user:`Ori Ziv <zivori>` and :user:`Sears Merritt <merritts>`.
+- Generalization of :func:`model_selection.cross_val_predict`.
+  One can pass method names such as `predict_proba` to be used in the cross
+  validation framework instead of the default `predict`.
+  By :user:`Ori Ziv <zivori>` and :user:`Sears Merritt <merritts>`.
 
-   - The training scores and time taken for training followed by scoring for
-     each search candidate are now available at the ``cv_results_`` dict.
-     See :ref:`model_selection_changes` for more information.
-     :issue:`7325` by :user:`Eugene Chen <eyc88>` and `Raghav RV`_.
+- The training scores and time taken for training followed by scoring for
+  each search candidate are now available at the ``cv_results_`` dict.
+  See :ref:`model_selection_changes` for more information.
+  :issue:`7325` by :user:`Eugene Chen <eyc88>` and `Raghav RV`_.
 
 Metrics
 
-   - Added ``labels`` flag to :class:`metrics.log_loss` to explicitly provide
-     the labels when the number of classes in ``y_true`` and ``y_pred`` differ.
-     :issue:`7239` by :user:`Hong Guangguo <hongguangguo>` with help from
-     :user:`Mads Jensen <indianajensen>` and :user:`Nelson Liu <nelson-liu>`.
+- Added ``labels`` flag to :class:`metrics.log_loss` to explicitly provide
+  the labels when the number of classes in ``y_true`` and ``y_pred`` differ.
+  :issue:`7239` by :user:`Hong Guangguo <hongguangguo>` with help from
+  :user:`Mads Jensen <indianajensen>` and :user:`Nelson Liu <nelson-liu>`.
 
-   - Support sparse contingency matrices in cluster evaluation
-     (:mod:`metrics.cluster.supervised`) to scale to a large number of
-     clusters.
-     :issue:`7419` by :user:`Gregory Stupp <stuppie>` and `Joel Nothman`_.
+- Support sparse contingency matrices in cluster evaluation
+  (:mod:`metrics.cluster.supervised`) to scale to a large number of
+  clusters.
+  :issue:`7419` by :user:`Gregory Stupp <stuppie>` and `Joel Nothman`_.
 
-   - Add ``sample_weight`` parameter to :func:`metrics.matthews_corrcoef`.
-     By :user:`Jatin Shah <jatinshah>` and `Raghav RV`_.
+- Add ``sample_weight`` parameter to :func:`metrics.matthews_corrcoef`.
+  By :user:`Jatin Shah <jatinshah>` and `Raghav RV`_.
 
-   - Speed up :func:`metrics.silhouette_score` by using vectorized operations.
-     By `Manoj Kumar`_.
+- Speed up :func:`metrics.silhouette_score` by using vectorized operations.
+  By `Manoj Kumar`_.
 
-   - Add ``sample_weight`` parameter to :func:`metrics.confusion_matrix`.
-     By :user:`Bernardo Stein <DanielSidhion>`.
+- Add ``sample_weight`` parameter to :func:`metrics.confusion_matrix`.
+  By :user:`Bernardo Stein <DanielSidhion>`.
 
 Miscellaneous
 
-   - Added ``n_jobs`` parameter to :class:`feature_selection.RFECV` to compute
-     the score on the test folds in parallel. By `Manoj Kumar`_
-
-   - Codebase does not contain C/C++ cython generated files: they are
-     generated during build. Distribution packages will still contain generated
-     C/C++ files. By :user:`Arthur Mensch <arthurmensch>`.
-
-   - Reduce the memory usage for 32-bit float input arrays of
-     :func:`utils.sparse_func.mean_variance_axis` and
-     :func:`utils.sparse_func.incr_mean_variance_axis` by supporting cython
-     fused types. By :user:`YenChen Lin <yenchenlin>`.
-
-   - The :func:`ignore_warnings` now accept a category argument to ignore only
-     the warnings of a specified type. By :user:`Thierry Guillemot <tguillemot>`.
-
-   - Added parameter ``return_X_y`` and return type ``(data, target) : tuple`` option to
-     :func:`load_iris` dataset
-     :issue:`7049`,
-     :func:`load_breast_cancer` dataset
-     :issue:`7152`,
-     :func:`load_digits` dataset,
-     :func:`load_diabetes` dataset,
-     :func:`load_linnerud` dataset,
-     :func:`load_boston` dataset
-     :issue:`7154` by
-     :user:`Manvendra Singh<manu-chroma>`.
-
-   - Simplification of the ``clone`` function, deprecate support for estimators
-     that modify parameters in ``__init__``. :issue:`5540` by `Andreas Müller`_.
-
-   - When unpickling a scikit-learn estimator in a different version than the one
-     the estimator was trained with, a ``UserWarning`` is raised, see :ref:`the documentation
-     on model persistence <persistence_limitations>` for more details. (:issue:`7248`)
-     By `Andreas Müller`_.
+- Added ``n_jobs`` parameter to :class:`feature_selection.RFECV` to compute
+  the score on the test folds in parallel. By `Manoj Kumar`_
+
+- Codebase does not contain C/C++ cython generated files: they are
+  generated during build. Distribution packages will still contain generated
+  C/C++ files. By :user:`Arthur Mensch <arthurmensch>`.
+
+- Reduce the memory usage for 32-bit float input arrays of
+  :func:`utils.sparse_func.mean_variance_axis` and
+  :func:`utils.sparse_func.incr_mean_variance_axis` by supporting cython
+  fused types. By :user:`YenChen Lin <yenchenlin>`.
+
+- The :func:`ignore_warnings` now accept a category argument to ignore only
+  the warnings of a specified type. By :user:`Thierry Guillemot <tguillemot>`.
+
+- Added parameter ``return_X_y`` and return type ``(data, target) : tuple`` option to
+  :func:`load_iris` dataset
+  :issue:`7049`,
+  :func:`load_breast_cancer` dataset
+  :issue:`7152`,
+  :func:`load_digits` dataset,
+  :func:`load_diabetes` dataset,
+  :func:`load_linnerud` dataset,
+  :func:`load_boston` dataset
+  :issue:`7154` by
+  :user:`Manvendra Singh<manu-chroma>`.
+
+- Simplification of the ``clone`` function, deprecate support for estimators
+  that modify parameters in ``__init__``. :issue:`5540` by `Andreas Müller`_.
+
+- When unpickling a scikit-learn estimator in a different version than the one
+  the estimator was trained with, a ``UserWarning`` is raised, see :ref:`the documentation
+  on model persistence <persistence_limitations>` for more details. (:issue:`7248`)
+  By `Andreas Müller`_.
 
 Bug fixes
 .........
 
 Trees and ensembles
 
-    - Random forest, extra trees, decision trees and gradient boosting
-      won't accept anymore ``min_samples_split=1`` as at least 2 samples
-      are required to split a decision tree node. By `Arnaud Joly`_
+- Random forest, extra trees, decision trees and gradient boosting
+  won't accept anymore ``min_samples_split=1`` as at least 2 samples
+  are required to split a decision tree node. By `Arnaud Joly`_
 
-    - :class:`ensemble.VotingClassifier` now raises ``NotFittedError`` if ``predict``,
-      ``transform`` or ``predict_proba`` are called on the non-fitted estimator.
-      by `Sebastian Raschka`_.
+- :class:`ensemble.VotingClassifier` now raises ``NotFittedError`` if ``predict``,
+  ``transform`` or ``predict_proba`` are called on the non-fitted estimator.
+  by `Sebastian Raschka`_.
 
-    - Fix bug where :class:`ensemble.AdaBoostClassifier` and
-      :class:`ensemble.AdaBoostRegressor` would perform poorly if the
-      ``random_state`` was fixed
-      (:issue:`7411`). By `Joel Nothman`_.
+- Fix bug where :class:`ensemble.AdaBoostClassifier` and
+  :class:`ensemble.AdaBoostRegressor` would perform poorly if the
+  ``random_state`` was fixed
+  (:issue:`7411`). By `Joel Nothman`_.
 
-    - Fix bug in ensembles with randomization where the ensemble would not
-      set ``random_state`` on base estimators in a pipeline or similar nesting.
-      (:issue:`7411`). Note, results for :class:`ensemble.BaggingClassifier`
-      :class:`ensemble.BaggingRegressor`, :class:`ensemble.AdaBoostClassifier`
-      and :class:`ensemble.AdaBoostRegressor` will now differ from previous
-      versions. By `Joel Nothman`_.
+- Fix bug in ensembles with randomization where the ensemble would not
+  set ``random_state`` on base estimators in a pipeline or similar nesting.
+  (:issue:`7411`). Note, results for :class:`ensemble.BaggingClassifier`
+  :class:`ensemble.BaggingRegressor`, :class:`ensemble.AdaBoostClassifier`
+  and :class:`ensemble.AdaBoostRegressor` will now differ from previous
+  versions. By `Joel Nothman`_.
 
 Linear, kernelized and related models
 
-    - Fixed incorrect gradient computation for ``loss='squared_epsilon_insensitive'`` in
-      :class:`linear_model.SGDClassifier` and :class:`linear_model.SGDRegressor`
-      (:issue:`6764`). By :user:`Wenhua Yang <geekoala>`.
+- Fixed incorrect gradient computation for ``loss='squared_epsilon_insensitive'`` in
+  :class:`linear_model.SGDClassifier` and :class:`linear_model.SGDRegressor`
+  (:issue:`6764`). By :user:`Wenhua Yang <geekoala>`.
 
-    - Fix bug in :class:`linear_model.LogisticRegressionCV` where
-      ``solver='liblinear'`` did not accept ``class_weights='balanced``.
-      (:issue:`6817`). By `Tom Dupre la Tour`_.
+- Fix bug in :class:`linear_model.LogisticRegressionCV` where
+  ``solver='liblinear'`` did not accept ``class_weights='balanced``.
+  (:issue:`6817`). By `Tom Dupre la Tour`_.
 
-    - Fix bug in :class:`neighbors.RadiusNeighborsClassifier` where an error
-      occurred when there were outliers being labelled and a weight function
-      specified (:issue:`6902`).  By
-      `LeonieBorne <https://github.com/LeonieBorne>`_.
+- Fix bug in :class:`neighbors.RadiusNeighborsClassifier` where an error
+  occurred when there were outliers being labelled and a weight function
+  specified (:issue:`6902`).  By
+  `LeonieBorne <https://github.com/LeonieBorne>`_.
 
-    - Fix :class:`linear_model.ElasticNet` sparse decision function to match
-      output with dense in the multioutput case.
+- Fix :class:`linear_model.ElasticNet` sparse decision function to match
+  output with dense in the multioutput case.
 
 Decomposition, manifold learning and clustering
 
-    - :class:`decomposition.RandomizedPCA` default number of `iterated_power` is 4 instead of 3.
-      :issue:`5141` by :user:`Giorgio Patrini <giorgiop>`.
+- :class:`decomposition.RandomizedPCA` default number of `iterated_power` is 4 instead of 3.
+  :issue:`5141` by :user:`Giorgio Patrini <giorgiop>`.
 
-    - :func:`utils.extmath.randomized_svd` performs 4 power iterations by default, instead or 0.
-      In practice this is enough for obtaining a good approximation of the
-      true eigenvalues/vectors in the presence of noise. When `n_components` is
-      small (``< .1 * min(X.shape)``) `n_iter` is set to 7, unless the user specifies
-      a higher number. This improves precision with few components.
-      :issue:`5299` by :user:`Giorgio Patrini<giorgiop>`.
+- :func:`utils.extmath.randomized_svd` performs 4 power iterations by default, instead or 0.
+  In practice this is enough for obtaining a good approximation of the
+  true eigenvalues/vectors in the presence of noise. When `n_components` is
+  small (``< .1 * min(X.shape)``) `n_iter` is set to 7, unless the user specifies
+  a higher number. This improves precision with few components.
+  :issue:`5299` by :user:`Giorgio Patrini<giorgiop>`.
 
-    - Whiten/non-whiten inconsistency between components of :class:`decomposition.PCA`
-      and :class:`decomposition.RandomizedPCA` (now factored into PCA, see the
-      New features) is fixed. `components_` are stored with no whitening.
-      :issue:`5299` by :user:`Giorgio Patrini <giorgiop>`.
+- Whiten/non-whiten inconsistency between components of :class:`decomposition.PCA`
+  and :class:`decomposition.RandomizedPCA` (now factored into PCA, see the
+  New features) is fixed. `components_` are stored with no whitening.
+  :issue:`5299` by :user:`Giorgio Patrini <giorgiop>`.
 
-    - Fixed bug in :func:`manifold.spectral_embedding` where diagonal of unnormalized
-      Laplacian matrix was incorrectly set to 1. :issue:`4995` by :user:`Peter Fischer <yanlend>`.
+- Fixed bug in :func:`manifold.spectral_embedding` where diagonal of unnormalized
+  Laplacian matrix was incorrectly set to 1. :issue:`4995` by :user:`Peter Fischer <yanlend>`.
 
-    - Fixed incorrect initialization of :func:`utils.arpack.eigsh` on all
-      occurrences. Affects :class:`cluster.bicluster.SpectralBiclustering`,
-      :class:`decomposition.KernelPCA`, :class:`manifold.LocallyLinearEmbedding`,
-      and :class:`manifold.SpectralEmbedding` (:issue:`5012`). By
-      :user:`Peter Fischer <yanlend>`.
+- Fixed incorrect initialization of :func:`utils.arpack.eigsh` on all
+  occurrences. Affects :class:`cluster.bicluster.SpectralBiclustering`,
+  :class:`decomposition.KernelPCA`, :class:`manifold.LocallyLinearEmbedding`,
+  and :class:`manifold.SpectralEmbedding` (:issue:`5012`). By
+  :user:`Peter Fischer <yanlend>`.
 
-    - Attribute ``explained_variance_ratio_`` calculated with the SVD solver
-      of :class:`discriminant_analysis.LinearDiscriminantAnalysis` now returns
-      correct results. By :user:`JPFrancoia <JPFrancoia>`
+- Attribute ``explained_variance_ratio_`` calculated with the SVD solver
+  of :class:`discriminant_analysis.LinearDiscriminantAnalysis` now returns
+  correct results. By :user:`JPFrancoia <JPFrancoia>`
 
 Preprocessing and feature selection
 
-    - :func:`preprocessing.data._transform_selected` now always passes a copy
-      of ``X`` to transform function when ``copy=True`` (:issue:`7194`). By `Caio
-      Oliveira <https://github.com/caioaao>`_.
+- :func:`preprocessing.data._transform_selected` now always passes a copy
+  of ``X`` to transform function when ``copy=True`` (:issue:`7194`). By `Caio
+  Oliveira <https://github.com/caioaao>`_.
 
 Model evaluation and meta-estimators
 
-    - :class:`model_selection.StratifiedKFold` now raises error if all n_labels
-      for individual classes is less than n_folds.
-      :issue:`6182` by :user:`Devashish Deshpande <dsquareindia>`.
+- :class:`model_selection.StratifiedKFold` now raises error if all n_labels
+  for individual classes is less than n_folds.
+  :issue:`6182` by :user:`Devashish Deshpande <dsquareindia>`.
 
-    - Fixed bug in :class:`model_selection.StratifiedShuffleSplit`
-      where train and test sample could overlap in some edge cases,
-      see :issue:`6121` for
-      more details. By `Loic Esteve`_.
+- Fixed bug in :class:`model_selection.StratifiedShuffleSplit`
+  where train and test sample could overlap in some edge cases,
+  see :issue:`6121` for
+  more details. By `Loic Esteve`_.
 
-    - Fix in :class:`sklearn.model_selection.StratifiedShuffleSplit` to
-      return splits of size ``train_size`` and ``test_size`` in all cases
-      (:issue:`6472`). By `Andreas Müller`_.
+- Fix in :class:`sklearn.model_selection.StratifiedShuffleSplit` to
+  return splits of size ``train_size`` and ``test_size`` in all cases
+  (:issue:`6472`). By `Andreas Müller`_.
 
-    - Cross-validation of :class:`OneVsOneClassifier` and
-      :class:`OneVsRestClassifier` now works with precomputed kernels.
-      :issue:`7350` by :user:`Russell Smith <rsmith54>`.
+- Cross-validation of :class:`OneVsOneClassifier` and
+  :class:`OneVsRestClassifier` now works with precomputed kernels.
+  :issue:`7350` by :user:`Russell Smith <rsmith54>`.
 
-    - Fix incomplete ``predict_proba`` method delegation from
-      :class:`model_selection.GridSearchCV` to
-      :class:`linear_model.SGDClassifier` (:issue:`7159`)
-      by `Yichuan Liu <https://github.com/yl565>`_.
+- Fix incomplete ``predict_proba`` method delegation from
+  :class:`model_selection.GridSearchCV` to
+  :class:`linear_model.SGDClassifier` (:issue:`7159`)
+  by `Yichuan Liu <https://github.com/yl565>`_.
 
 Metrics
 
-    - Fix bug in :func:`metrics.silhouette_score` in which clusters of
-      size 1 were incorrectly scored. They should get a score of 0.
-      By `Joel Nothman`_.
+- Fix bug in :func:`metrics.silhouette_score` in which clusters of
+  size 1 were incorrectly scored. They should get a score of 0.
+  By `Joel Nothman`_.
 
-    - Fix bug in :func:`metrics.silhouette_samples` so that it now works with
-      arbitrary labels, not just those ranging from 0 to n_clusters - 1.
+- Fix bug in :func:`metrics.silhouette_samples` so that it now works with
+  arbitrary labels, not just those ranging from 0 to n_clusters - 1.
 
-    - Fix bug where expected and adjusted mutual information were incorrect if
-      cluster contingency cells exceeded ``2**16``. By `Joel Nothman`_.
+- Fix bug where expected and adjusted mutual information were incorrect if
+  cluster contingency cells exceeded ``2**16``. By `Joel Nothman`_.
 
-    - :func:`metrics.pairwise.pairwise_distances` now converts arrays to
-      boolean arrays when required in ``scipy.spatial.distance``.
-      :issue:`5460` by `Tom Dupre la Tour`_.
+- :func:`metrics.pairwise.pairwise_distances` now converts arrays to
+  boolean arrays when required in ``scipy.spatial.distance``.
+  :issue:`5460` by `Tom Dupre la Tour`_.
 
-    - Fix sparse input support in :func:`metrics.silhouette_score` as well as
-      example examples/text/document_clustering.py. By :user:`YenChen Lin <yenchenlin>`.
+- Fix sparse input support in :func:`metrics.silhouette_score` as well as
+  example examples/text/document_clustering.py. By :user:`YenChen Lin <yenchenlin>`.
 
-    - :func:`metrics.roc_curve` and :func:`metrics.precision_recall_curve` no
-      longer round ``y_score`` values when creating ROC curves; this was causing
-      problems for users with very small differences in scores (:issue:`7353`).
+- :func:`metrics.roc_curve` and :func:`metrics.precision_recall_curve` no
+  longer round ``y_score`` values when creating ROC curves; this was causing
+  problems for users with very small differences in scores (:issue:`7353`).
 
 Miscellaneous
 
-    - :func:`model_selection.tests._search._check_param_grid` now works correctly with all types
-      that extends/implements `Sequence` (except string), including range (Python 3.x) and xrange
-      (Python 2.x). :issue:`7323` by Viacheslav Kovalevskyi.
+- :func:`model_selection.tests._search._check_param_grid` now works correctly with all types
+  that extends/implements `Sequence` (except string), including range (Python 3.x) and xrange
+  (Python 2.x). :issue:`7323` by Viacheslav Kovalevskyi.
 
-    - :func:`utils.extmath.randomized_range_finder` is more numerically stable when many
-      power iterations are requested, since it applies LU normalization by default.
-      If ``n_iter<2`` numerical issues are unlikely, thus no normalization is applied.
-      Other normalization options are available: ``'none', 'LU'`` and ``'QR'``.
-      :issue:`5141` by :user:`Giorgio Patrini <giorgiop>`.
+- :func:`utils.extmath.randomized_range_finder` is more numerically stable when many
+  power iterations are requested, since it applies LU normalization by default.
+  If ``n_iter<2`` numerical issues are unlikely, thus no normalization is applied.
+  Other normalization options are available: ``'none', 'LU'`` and ``'QR'``.
+  :issue:`5141` by :user:`Giorgio Patrini <giorgiop>`.
 
-    - Fix a bug where some formats of ``scipy.sparse`` matrix, and estimators
-      with them as parameters, could not be passed to :func:`base.clone`.
-      By `Loic Esteve`_.
+- Fix a bug where some formats of ``scipy.sparse`` matrix, and estimators
+  with them as parameters, could not be passed to :func:`base.clone`.
+  By `Loic Esteve`_.
 
-    - :func:`datasets.load_svmlight_file` now is able to read long int QID values.
-      :issue:`7101` by :user:`Ibraim Ganiev <olologin>`.
+- :func:`datasets.load_svmlight_file` now is able to read long int QID values.
+  :issue:`7101` by :user:`Ibraim Ganiev <olologin>`.
 
 
 API changes summary
@@ -1522,74 +1642,74 @@ API changes summary
 
 Linear, kernelized and related models
 
-   - ``residual_metric`` has been deprecated in :class:`linear_model.RANSACRegressor`.
-     Use ``loss`` instead. By `Manoj Kumar`_.
+- ``residual_metric`` has been deprecated in :class:`linear_model.RANSACRegressor`.
+  Use ``loss`` instead. By `Manoj Kumar`_.
 
-   - Access to public attributes ``.X_`` and ``.y_`` has been deprecated in
-     :class:`isotonic.IsotonicRegression`. By :user:`Jonathan Arfa <jarfa>`.
+- Access to public attributes ``.X_`` and ``.y_`` has been deprecated in
+  :class:`isotonic.IsotonicRegression`. By :user:`Jonathan Arfa <jarfa>`.
 
 Decomposition, manifold learning and clustering
 
-   - The old :class:`mixture.DPGMM` is deprecated in favor of the new
-     :class:`mixture.BayesianGaussianMixture` (with the parameter
-     ``weight_concentration_prior_type='dirichlet_process'``).
-     The new class solves the computational
-     problems of the old class and computes the Gaussian mixture with a
-     Dirichlet process prior faster than before.
-     :issue:`7295` by :user:`Wei Xue <xuewei4d>` and :user:`Thierry Guillemot <tguillemot>`.
-
-   - The old :class:`mixture.VBGMM` is deprecated in favor of the new
-     :class:`mixture.BayesianGaussianMixture` (with the parameter
-     ``weight_concentration_prior_type='dirichlet_distribution'``).
-     The new class solves the computational
-     problems of the old class and computes the Variational Bayesian Gaussian
-     mixture faster than before.
-     :issue:`6651` by :user:`Wei Xue <xuewei4d>` and :user:`Thierry Guillemot <tguillemot>`.
-
-   - The old :class:`mixture.GMM` is deprecated in favor of the new
-     :class:`mixture.GaussianMixture`. The new class computes the Gaussian mixture
-     faster than before and some of computational problems have been solved.
-     :issue:`6666` by :user:`Wei Xue <xuewei4d>` and :user:`Thierry Guillemot <tguillemot>`.
+- The old :class:`mixture.DPGMM` is deprecated in favor of the new
+  :class:`mixture.BayesianGaussianMixture` (with the parameter
+  ``weight_concentration_prior_type='dirichlet_process'``).
+  The new class solves the computational
+  problems of the old class and computes the Gaussian mixture with a
+  Dirichlet process prior faster than before.
+  :issue:`7295` by :user:`Wei Xue <xuewei4d>` and :user:`Thierry Guillemot <tguillemot>`.
+
+- The old :class:`mixture.VBGMM` is deprecated in favor of the new
+  :class:`mixture.BayesianGaussianMixture` (with the parameter
+  ``weight_concentration_prior_type='dirichlet_distribution'``).
+  The new class solves the computational
+  problems of the old class and computes the Variational Bayesian Gaussian
+  mixture faster than before.
+  :issue:`6651` by :user:`Wei Xue <xuewei4d>` and :user:`Thierry Guillemot <tguillemot>`.
+
+- The old :class:`mixture.GMM` is deprecated in favor of the new
+  :class:`mixture.GaussianMixture`. The new class computes the Gaussian mixture
+  faster than before and some of computational problems have been solved.
+  :issue:`6666` by :user:`Wei Xue <xuewei4d>` and :user:`Thierry Guillemot <tguillemot>`.
 
 Model evaluation and meta-estimators
 
-   - The :mod:`sklearn.cross_validation`, :mod:`sklearn.grid_search` and
-     :mod:`sklearn.learning_curve` have been deprecated and the classes and
-     functions have been reorganized into the :mod:`sklearn.model_selection`
-     module. Ref :ref:`model_selection_changes` for more information.
-     :issue:`4294` by `Raghav RV`_.
-
-   - The ``grid_scores_`` attribute of :class:`model_selection.GridSearchCV`
-     and :class:`model_selection.RandomizedSearchCV` is deprecated in favor of
-     the attribute ``cv_results_``.
-     Ref :ref:`model_selection_changes` for more information.
-     :issue:`6697` by `Raghav RV`_.
-
-   - The parameters ``n_iter`` or ``n_folds`` in old CV splitters are replaced
-     by the new parameter ``n_splits`` since it can provide a consistent
-     and unambiguous interface to represent the number of train-test splits.
-     :issue:`7187` by :user:`YenChen Lin <yenchenlin>`.
-
-   - ``classes`` parameter was renamed to ``labels`` in
-     :func:`metrics.hamming_loss`. :issue:`7260` by :user:`Sebastián Vanrell <srvanrell>`.
-
-   - The splitter classes ``LabelKFold``, ``LabelShuffleSplit``,
-     ``LeaveOneLabelOut`` and ``LeavePLabelsOut`` are renamed to
-     :class:`model_selection.GroupKFold`,
-     :class:`model_selection.GroupShuffleSplit`,
-     :class:`model_selection.LeaveOneGroupOut`
-     and :class:`model_selection.LeavePGroupsOut` respectively.
-     Also the parameter ``labels`` in the :func:`split` method of the newly
-     renamed splitters :class:`model_selection.LeaveOneGroupOut` and
-     :class:`model_selection.LeavePGroupsOut` is renamed to
-     ``groups``. Additionally in :class:`model_selection.LeavePGroupsOut`,
-     the parameter ``n_labels`` is renamed to ``n_groups``.
-     :issue:`6660` by `Raghav RV`_.
-
-   - Error and loss names for ``scoring`` parameters are now prefixed by
-     ``'neg_'``, such as ``neg_mean_squared_error``. The unprefixed versions
-     are deprecated and will be removed in version 0.20.
-     :issue:`7261` by :user:`Tim Head <betatim>`.
+- The :mod:`sklearn.cross_validation`, :mod:`sklearn.grid_search` and
+  :mod:`sklearn.learning_curve` have been deprecated and the classes and
+  functions have been reorganized into the :mod:`sklearn.model_selection`
+  module. Ref :ref:`model_selection_changes` for more information.
+  :issue:`4294` by `Raghav RV`_.
+
+- The ``grid_scores_`` attribute of :class:`model_selection.GridSearchCV`
+  and :class:`model_selection.RandomizedSearchCV` is deprecated in favor of
+  the attribute ``cv_results_``.
+  Ref :ref:`model_selection_changes` for more information.
+  :issue:`6697` by `Raghav RV`_.
+
+- The parameters ``n_iter`` or ``n_folds`` in old CV splitters are replaced
+  by the new parameter ``n_splits`` since it can provide a consistent
+  and unambiguous interface to represent the number of train-test splits.
+  :issue:`7187` by :user:`YenChen Lin <yenchenlin>`.
+
+- ``classes`` parameter was renamed to ``labels`` in
+  :func:`metrics.hamming_loss`. :issue:`7260` by :user:`Sebastián Vanrell <srvanrell>`.
+
+- The splitter classes ``LabelKFold``, ``LabelShuffleSplit``,
+  ``LeaveOneLabelOut`` and ``LeavePLabelsOut`` are renamed to
+  :class:`model_selection.GroupKFold`,
+  :class:`model_selection.GroupShuffleSplit`,
+  :class:`model_selection.LeaveOneGroupOut`
+  and :class:`model_selection.LeavePGroupsOut` respectively.
+  Also the parameter ``labels`` in the :func:`split` method of the newly
+  renamed splitters :class:`model_selection.LeaveOneGroupOut` and
+  :class:`model_selection.LeavePGroupsOut` is renamed to
+  ``groups``. Additionally in :class:`model_selection.LeavePGroupsOut`,
+  the parameter ``n_labels`` is renamed to ``n_groups``.
+  :issue:`6660` by `Raghav RV`_.
+
+- Error and loss names for ``scoring`` parameters are now prefixed by
+  ``'neg_'``, such as ``neg_mean_squared_error``. The unprefixed versions
+  are deprecated and will be removed in version 0.20.
+  :issue:`7261` by :user:`Tim Head <betatim>`.
 
 Code Contributors
 -----------------
@@ -1662,29 +1782,29 @@ Bug fixes
 .........
 
 
-    - Upgrade vendored joblib to version 0.9.4 that fixes an important bug in
-      ``joblib.Parallel`` that can silently yield to wrong results when working
-      on datasets larger than 1MB:
-      https://github.com/joblib/joblib/blob/0.9.4/CHANGES.rst
+- Upgrade vendored joblib to version 0.9.4 that fixes an important bug in
+  ``joblib.Parallel`` that can silently yield to wrong results when working
+  on datasets larger than 1MB:
+  https://github.com/joblib/joblib/blob/0.9.4/CHANGES.rst
 
-    - Fixed reading of Bunch pickles generated with scikit-learn
-      version <= 0.16. This can affect users who have already
-      downloaded a dataset with scikit-learn 0.16 and are loading it
-      with scikit-learn 0.17. See :issue:`6196` for
-      how this affected :func:`datasets.fetch_20newsgroups`. By `Loic
-      Esteve`_.
+- Fixed reading of Bunch pickles generated with scikit-learn
+  version <= 0.16. This can affect users who have already
+  downloaded a dataset with scikit-learn 0.16 and are loading it
+  with scikit-learn 0.17. See :issue:`6196` for
+  how this affected :func:`datasets.fetch_20newsgroups`. By `Loic
+  Esteve`_.
 
-    - Fixed a bug that prevented using ROC AUC score to perform grid search on
-      several CPU / cores on large arrays. See :issue:`6147`
-      By `Olivier Grisel`_.
+- Fixed a bug that prevented using ROC AUC score to perform grid search on
+  several CPU / cores on large arrays. See :issue:`6147`
+  By `Olivier Grisel`_.
 
-    - Fixed a bug that prevented to properly set the ``presort`` parameter
-      in :class:`ensemble.GradientBoostingRegressor`. See :issue:`5857`
-      By Andrew McCulloh.
+- Fixed a bug that prevented to properly set the ``presort`` parameter
+  in :class:`ensemble.GradientBoostingRegressor`. See :issue:`5857`
+  By Andrew McCulloh.
 
-    - Fixed a joblib error when evaluating the perplexity of a
-      :class:`decomposition.LatentDirichletAllocation` model. See :issue:`6258`
-      By Chyi-Kwei Yau.
+- Fixed a joblib error when evaluating the perplexity of a
+  :class:`decomposition.LatentDirichletAllocation` model. See :issue:`6258`
+  By Chyi-Kwei Yau.
 
 
 .. _changes_0_17:
@@ -1700,425 +1820,425 @@ Changelog
 New features
 ............
 
-   - All the Scaler classes but :class:`preprocessing.RobustScaler` can be fitted online by
-     calling `partial_fit`. By :user:`Giorgio Patrini <giorgiop>`.
-
-   - The new class :class:`ensemble.VotingClassifier` implements a
-     "majority rule" / "soft voting" ensemble classifier to combine
-     estimators for classification. By `Sebastian Raschka`_.
-
-   - The new class :class:`preprocessing.RobustScaler` provides an
-     alternative to :class:`preprocessing.StandardScaler` for feature-wise
-     centering and range normalization that is robust to outliers.
-     By :user:`Thomas Unterthiner <untom>`.
-
-   - The new class :class:`preprocessing.MaxAbsScaler` provides an
-     alternative to :class:`preprocessing.MinMaxScaler` for feature-wise
-     range normalization when the data is already centered or sparse.
-     By :user:`Thomas Unterthiner <untom>`.
-
-   - The new class :class:`preprocessing.FunctionTransformer` turns a Python
-     function into a ``Pipeline``-compatible transformer object.
-     By Joe Jevnik.
-
-   - The new classes :class:`cross_validation.LabelKFold` and
-     :class:`cross_validation.LabelShuffleSplit` generate train-test folds,
-     respectively similar to :class:`cross_validation.KFold` and
-     :class:`cross_validation.ShuffleSplit`, except that the folds are
-     conditioned on a label array. By `Brian McFee`_, :user:`Jean
-     Kossaifi <JeanKossaifi>` and `Gilles Louppe`_.
-
-   - :class:`decomposition.LatentDirichletAllocation` implements the Latent
-     Dirichlet Allocation topic model with online  variational
-     inference. By :user:`Chyi-Kwei Yau <chyikwei>`, with code based on an implementation
-     by Matt Hoffman. (:issue:`3659`)
-
-   - The new solver ``sag`` implements a Stochastic Average Gradient descent
-     and is available in both :class:`linear_model.LogisticRegression` and
-     :class:`linear_model.Ridge`. This solver is very efficient for large
-     datasets. By :user:`Danny Sullivan <dsullivan7>` and `Tom Dupre la Tour`_.
-     (:issue:`4738`)
-
-   - The new solver ``cd`` implements a Coordinate Descent in
-     :class:`decomposition.NMF`. Previous solver based on Projected Gradient is
-     still available setting new parameter ``solver`` to ``pg``, but is
-     deprecated and will be removed in 0.19, along with
-     :class:`decomposition.ProjectedGradientNMF` and parameters ``sparseness``,
-     ``eta``, ``beta`` and ``nls_max_iter``. New parameters ``alpha`` and
-     ``l1_ratio`` control L1 and L2 regularization, and ``shuffle`` adds a
-     shuffling step in the ``cd`` solver.
-     By `Tom Dupre la Tour`_ and `Mathieu Blondel`_.
+- All the Scaler classes but :class:`preprocessing.RobustScaler` can be fitted online by
+  calling `partial_fit`. By :user:`Giorgio Patrini <giorgiop>`.
+
+- The new class :class:`ensemble.VotingClassifier` implements a
+  "majority rule" / "soft voting" ensemble classifier to combine
+  estimators for classification. By `Sebastian Raschka`_.
+
+- The new class :class:`preprocessing.RobustScaler` provides an
+  alternative to :class:`preprocessing.StandardScaler` for feature-wise
+  centering and range normalization that is robust to outliers.
+  By :user:`Thomas Unterthiner <untom>`.
+
+- The new class :class:`preprocessing.MaxAbsScaler` provides an
+  alternative to :class:`preprocessing.MinMaxScaler` for feature-wise
+  range normalization when the data is already centered or sparse.
+  By :user:`Thomas Unterthiner <untom>`.
+
+- The new class :class:`preprocessing.FunctionTransformer` turns a Python
+  function into a ``Pipeline``-compatible transformer object.
+  By Joe Jevnik.
+
+- The new classes :class:`cross_validation.LabelKFold` and
+  :class:`cross_validation.LabelShuffleSplit` generate train-test folds,
+  respectively similar to :class:`cross_validation.KFold` and
+  :class:`cross_validation.ShuffleSplit`, except that the folds are
+  conditioned on a label array. By `Brian McFee`_, :user:`Jean
+  Kossaifi <JeanKossaifi>` and `Gilles Louppe`_.
+
+- :class:`decomposition.LatentDirichletAllocation` implements the Latent
+  Dirichlet Allocation topic model with online  variational
+  inference. By :user:`Chyi-Kwei Yau <chyikwei>`, with code based on an implementation
+  by Matt Hoffman. (:issue:`3659`)
+
+- The new solver ``sag`` implements a Stochastic Average Gradient descent
+  and is available in both :class:`linear_model.LogisticRegression` and
+  :class:`linear_model.Ridge`. This solver is very efficient for large
+  datasets. By :user:`Danny Sullivan <dsullivan7>` and `Tom Dupre la Tour`_.
+  (:issue:`4738`)
+
+- The new solver ``cd`` implements a Coordinate Descent in
+  :class:`decomposition.NMF`. Previous solver based on Projected Gradient is
+  still available setting new parameter ``solver`` to ``pg``, but is
+  deprecated and will be removed in 0.19, along with
+  :class:`decomposition.ProjectedGradientNMF` and parameters ``sparseness``,
+  ``eta``, ``beta`` and ``nls_max_iter``. New parameters ``alpha`` and
+  ``l1_ratio`` control L1 and L2 regularization, and ``shuffle`` adds a
+  shuffling step in the ``cd`` solver.
+  By `Tom Dupre la Tour`_ and `Mathieu Blondel`_.
 
 Enhancements
 ............
-   - :class:`manifold.TSNE` now supports approximate optimization via the
-     Barnes-Hut method, leading to much faster fitting. By Christopher Erick Moody.
-     (:issue:`4025`)
+- :class:`manifold.TSNE` now supports approximate optimization via the
+  Barnes-Hut method, leading to much faster fitting. By Christopher Erick Moody.
+  (:issue:`4025`)
 
-   - :class:`cluster.mean_shift_.MeanShift` now supports parallel execution,
-     as implemented in the ``mean_shift`` function. By :user:`Martino
-     Sorbaro <martinosorb>`.
+- :class:`cluster.mean_shift_.MeanShift` now supports parallel execution,
+  as implemented in the ``mean_shift`` function. By :user:`Martino
+  Sorbaro <martinosorb>`.
 
-   - :class:`naive_bayes.GaussianNB` now supports fitting with ``sample_weight``.
-     By `Jan Hendrik Metzen`_.
+- :class:`naive_bayes.GaussianNB` now supports fitting with ``sample_weight``.
+  By `Jan Hendrik Metzen`_.
 
-   - :class:`dummy.DummyClassifier` now supports a prior fitting strategy.
-     By `Arnaud Joly`_.
+- :class:`dummy.DummyClassifier` now supports a prior fitting strategy.
+  By `Arnaud Joly`_.
 
-   - Added a ``fit_predict`` method for :class:`mixture.GMM` and subclasses.
-     By :user:`Cory Lorenz <clorenz7>`.
+- Added a ``fit_predict`` method for :class:`mixture.GMM` and subclasses.
+  By :user:`Cory Lorenz <clorenz7>`.
 
-   - Added the :func:`metrics.label_ranking_loss` metric.
-     By `Arnaud Joly`_.
+- Added the :func:`metrics.label_ranking_loss` metric.
+  By `Arnaud Joly`_.
 
-   - Added the :func:`metrics.cohen_kappa_score` metric.
+- Added the :func:`metrics.cohen_kappa_score` metric.
 
-   - Added a ``warm_start`` constructor parameter to the bagging ensemble
-     models to increase the size of the ensemble. By :user:`Tim Head <betatim>`.
+- Added a ``warm_start`` constructor parameter to the bagging ensemble
+  models to increase the size of the ensemble. By :user:`Tim Head <betatim>`.
 
-   - Added option to use multi-output regression metrics without averaging.
-     By Konstantin Shmelkov and :user:`Michael Eickenberg<eickenberg>`.
+- Added option to use multi-output regression metrics without averaging.
+  By Konstantin Shmelkov and :user:`Michael Eickenberg<eickenberg>`.
 
-   - Added ``stratify`` option to :func:`cross_validation.train_test_split`
-     for stratified splitting. By Miroslav Batchkarov.
+- Added ``stratify`` option to :func:`cross_validation.train_test_split`
+  for stratified splitting. By Miroslav Batchkarov.
 
-   - The :func:`tree.export_graphviz` function now supports aesthetic
-     improvements for :class:`tree.DecisionTreeClassifier` and
-     :class:`tree.DecisionTreeRegressor`, including options for coloring nodes
-     by their majority class or impurity, showing variable names, and using
-     node proportions instead of raw sample counts. By `Trevor Stephens`_.
+- The :func:`tree.export_graphviz` function now supports aesthetic
+  improvements for :class:`tree.DecisionTreeClassifier` and
+  :class:`tree.DecisionTreeRegressor`, including options for coloring nodes
+  by their majority class or impurity, showing variable names, and using
+  node proportions instead of raw sample counts. By `Trevor Stephens`_.
 
-   - Improved speed of ``newton-cg`` solver in
-     :class:`linear_model.LogisticRegression`, by avoiding loss computation.
-     By `Mathieu Blondel`_ and `Tom Dupre la Tour`_.
+- Improved speed of ``newton-cg`` solver in
+  :class:`linear_model.LogisticRegression`, by avoiding loss computation.
+  By `Mathieu Blondel`_ and `Tom Dupre la Tour`_.
 
-   - The ``class_weight="auto"`` heuristic in classifiers supporting
-     ``class_weight`` was deprecated and replaced by the ``class_weight="balanced"``
-     option, which has a simpler formula and interpretation.
-     By `Hanna Wallach`_ and `Andreas Müller`_.
+- The ``class_weight="auto"`` heuristic in classifiers supporting
+  ``class_weight`` was deprecated and replaced by the ``class_weight="balanced"``
+  option, which has a simpler formula and interpretation.
+  By `Hanna Wallach`_ and `Andreas Müller`_.
 
-   - Add ``class_weight`` parameter to automatically weight samples by class
-     frequency for :class:`linear_model.PassiveAgressiveClassifier`. By
-     `Trevor Stephens`_.
+- Add ``class_weight`` parameter to automatically weight samples by class
+  frequency for :class:`linear_model.PassiveAgressiveClassifier`. By
+  `Trevor Stephens`_.
 
-   - Added backlinks from the API reference pages to the user guide. By
-     `Andreas Müller`_.
+- Added backlinks from the API reference pages to the user guide. By
+  `Andreas Müller`_.
 
-   - The ``labels`` parameter to :func:`sklearn.metrics.f1_score`,
-     :func:`sklearn.metrics.fbeta_score`,
-     :func:`sklearn.metrics.recall_score` and
-     :func:`sklearn.metrics.precision_score` has been extended.
-     It is now possible to ignore one or more labels, such as where
-     a multiclass problem has a majority class to ignore. By `Joel Nothman`_.
+- The ``labels`` parameter to :func:`sklearn.metrics.f1_score`,
+  :func:`sklearn.metrics.fbeta_score`,
+  :func:`sklearn.metrics.recall_score` and
+  :func:`sklearn.metrics.precision_score` has been extended.
+  It is now possible to ignore one or more labels, such as where
+  a multiclass problem has a majority class to ignore. By `Joel Nothman`_.
 
-   - Add ``sample_weight`` support to :class:`linear_model.RidgeClassifier`.
-     By `Trevor Stephens`_.
+- Add ``sample_weight`` support to :class:`linear_model.RidgeClassifier`.
+  By `Trevor Stephens`_.
 
-   - Provide an option for sparse output from
-     :func:`sklearn.metrics.pairwise.cosine_similarity`. By
-     :user:`Jaidev Deshpande <jaidevd>`.
+- Provide an option for sparse output from
+  :func:`sklearn.metrics.pairwise.cosine_similarity`. By
+  :user:`Jaidev Deshpande <jaidevd>`.
 
-   - Add :func:`minmax_scale` to provide a function interface for
-     :class:`MinMaxScaler`. By :user:`Thomas Unterthiner <untom>`.
+- Add :func:`minmax_scale` to provide a function interface for
+  :class:`MinMaxScaler`. By :user:`Thomas Unterthiner <untom>`.
 
-   - ``dump_svmlight_file`` now handles multi-label datasets.
-     By Chih-Wei Chang.
+- ``dump_svmlight_file`` now handles multi-label datasets.
+  By Chih-Wei Chang.
 
-   - RCV1 dataset loader (:func:`sklearn.datasets.fetch_rcv1`).
-     By `Tom Dupre la Tour`_.
+- RCV1 dataset loader (:func:`sklearn.datasets.fetch_rcv1`).
+  By `Tom Dupre la Tour`_.
 
-   - The "Wisconsin Breast Cancer" classical two-class classification dataset
-     is now included in scikit-learn, available with
-     :func:`sklearn.dataset.load_breast_cancer`.
+- The "Wisconsin Breast Cancer" classical two-class classification dataset
+  is now included in scikit-learn, available with
+  :func:`sklearn.dataset.load_breast_cancer`.
 
-   - Upgraded to joblib 0.9.3 to benefit from the new automatic batching of
-     short tasks. This makes it possible for scikit-learn to benefit from
-     parallelism when many very short tasks are executed in parallel, for
-     instance by the :class:`grid_search.GridSearchCV` meta-estimator
-     with ``n_jobs > 1`` used with a large grid of parameters on a small
-     dataset. By `Vlad Niculae`_, `Olivier Grisel`_ and `Loic Esteve`_.
+- Upgraded to joblib 0.9.3 to benefit from the new automatic batching of
+  short tasks. This makes it possible for scikit-learn to benefit from
+  parallelism when many very short tasks are executed in parallel, for
+  instance by the :class:`grid_search.GridSearchCV` meta-estimator
+  with ``n_jobs > 1`` used with a large grid of parameters on a small
+  dataset. By `Vlad Niculae`_, `Olivier Grisel`_ and `Loic Esteve`_.
 
-   - For more details about changes in joblib 0.9.3 see the release notes:
-     https://github.com/joblib/joblib/blob/master/CHANGES.rst#release-093
+- For more details about changes in joblib 0.9.3 see the release notes:
+  https://github.com/joblib/joblib/blob/master/CHANGES.rst#release-093
 
-   - Improved speed (3 times per iteration) of
-     :class:`decomposition.DictLearning` with coordinate descent method
-     from :class:`linear_model.Lasso`. By :user:`Arthur Mensch <arthurmensch>`.
+- Improved speed (3 times per iteration) of
+  :class:`decomposition.DictLearning` with coordinate descent method
+  from :class:`linear_model.Lasso`. By :user:`Arthur Mensch <arthurmensch>`.
 
-   - Parallel processing (threaded) for queries of nearest neighbors
-     (using the ball-tree) by Nikolay Mayorov.
+- Parallel processing (threaded) for queries of nearest neighbors
+  (using the ball-tree) by Nikolay Mayorov.
 
-   - Allow :func:`datasets.make_multilabel_classification` to output
-     a sparse ``y``. By Kashif Rasul.
+- Allow :func:`datasets.make_multilabel_classification` to output
+  a sparse ``y``. By Kashif Rasul.
 
-   - :class:`cluster.DBSCAN` now accepts a sparse matrix of precomputed
-     distances, allowing memory-efficient distance precomputation. By
-     `Joel Nothman`_.
+- :class:`cluster.DBSCAN` now accepts a sparse matrix of precomputed
+  distances, allowing memory-efficient distance precomputation. By
+  `Joel Nothman`_.
 
-   - :class:`tree.DecisionTreeClassifier` now exposes an ``apply`` method
-     for retrieving the leaf indices samples are predicted as. By
-     :user:`Daniel Galvez <galv>` and `Gilles Louppe`_.
+- :class:`tree.DecisionTreeClassifier` now exposes an ``apply`` method
+  for retrieving the leaf indices samples are predicted as. By
+  :user:`Daniel Galvez <galv>` and `Gilles Louppe`_.
 
-   - Speed up decision tree regressors, random forest regressors, extra trees
-     regressors and gradient boosting estimators by computing a proxy
-     of the impurity improvement during the tree growth. The proxy quantity is
-     such that the split that maximizes this value also maximizes the impurity
-     improvement. By `Arnaud Joly`_, :user:`Jacob Schreiber <jmschrei>`
-     and `Gilles Louppe`_.
+- Speed up decision tree regressors, random forest regressors, extra trees
+  regressors and gradient boosting estimators by computing a proxy
+  of the impurity improvement during the tree growth. The proxy quantity is
+  such that the split that maximizes this value also maximizes the impurity
+  improvement. By `Arnaud Joly`_, :user:`Jacob Schreiber <jmschrei>`
+  and `Gilles Louppe`_.
 
-   - Speed up tree based methods by reducing the number of computations needed
-     when computing the impurity measure taking into account linear
-     relationship of the computed statistics. The effect is particularly
-     visible with extra trees and on datasets with categorical or sparse
-     features. By `Arnaud Joly`_.
+- Speed up tree based methods by reducing the number of computations needed
+  when computing the impurity measure taking into account linear
+  relationship of the computed statistics. The effect is particularly
+  visible with extra trees and on datasets with categorical or sparse
+  features. By `Arnaud Joly`_.
 
-   - :class:`ensemble.GradientBoostingRegressor` and
-     :class:`ensemble.GradientBoostingClassifier` now expose an ``apply``
-     method for retrieving the leaf indices each sample ends up in under
-     each try. By :user:`Jacob Schreiber <jmschrei>`.
+- :class:`ensemble.GradientBoostingRegressor` and
+  :class:`ensemble.GradientBoostingClassifier` now expose an ``apply``
+  method for retrieving the leaf indices each sample ends up in under
+  each try. By :user:`Jacob Schreiber <jmschrei>`.
 
-   - Add ``sample_weight`` support to :class:`linear_model.LinearRegression`.
-     By Sonny Hu. (:issue:`#4881`)
+- Add ``sample_weight`` support to :class:`linear_model.LinearRegression`.
+  By Sonny Hu. (:issue:`#4881`)
 
-   - Add ``n_iter_without_progress`` to :class:`manifold.TSNE` to control
-     the stopping criterion. By Santi Villalba. (:issue:`5186`)
+- Add ``n_iter_without_progress`` to :class:`manifold.TSNE` to control
+  the stopping criterion. By Santi Villalba. (:issue:`5186`)
 
-   - Added optional parameter ``random_state`` in :class:`linear_model.Ridge`
-     , to set the seed of the pseudo random generator used in ``sag`` solver. By `Tom Dupre la Tour`_.
+- Added optional parameter ``random_state`` in :class:`linear_model.Ridge`
+  , to set the seed of the pseudo random generator used in ``sag`` solver. By `Tom Dupre la Tour`_.
 
-   - Added optional parameter ``warm_start`` in
-     :class:`linear_model.LogisticRegression`. If set to True, the solvers
-     ``lbfgs``, ``newton-cg`` and ``sag`` will be initialized with the
-     coefficients computed in the previous fit. By `Tom Dupre la Tour`_.
+- Added optional parameter ``warm_start`` in
+  :class:`linear_model.LogisticRegression`. If set to True, the solvers
+  ``lbfgs``, ``newton-cg`` and ``sag`` will be initialized with the
+  coefficients computed in the previous fit. By `Tom Dupre la Tour`_.
 
-   - Added ``sample_weight`` support to :class:`linear_model.LogisticRegression` for
-     the ``lbfgs``, ``newton-cg``, and ``sag`` solvers. By `Valentin Stolbunov`_.
-     Support added to the ``liblinear`` solver. By `Manoj Kumar`_.
+- Added ``sample_weight`` support to :class:`linear_model.LogisticRegression` for
+  the ``lbfgs``, ``newton-cg``, and ``sag`` solvers. By `Valentin Stolbunov`_.
+  Support added to the ``liblinear`` solver. By `Manoj Kumar`_.
 
-   - Added optional parameter ``presort`` to :class:`ensemble.GradientBoostingRegressor`
-     and :class:`ensemble.GradientBoostingClassifier`, keeping default behavior
-     the same. This allows gradient boosters to turn off presorting when building
-     deep trees or using sparse data. By :user:`Jacob Schreiber <jmschrei>`.
+- Added optional parameter ``presort`` to :class:`ensemble.GradientBoostingRegressor`
+  and :class:`ensemble.GradientBoostingClassifier`, keeping default behavior
+  the same. This allows gradient boosters to turn off presorting when building
+  deep trees or using sparse data. By :user:`Jacob Schreiber <jmschrei>`.
 
-   - Altered :func:`metrics.roc_curve` to drop unnecessary thresholds by
-     default. By :user:`Graham Clenaghan <gclenaghan>`.
+- Altered :func:`metrics.roc_curve` to drop unnecessary thresholds by
+  default. By :user:`Graham Clenaghan <gclenaghan>`.
 
-   - Added :class:`feature_selection.SelectFromModel` meta-transformer which can
-     be used along with estimators that have `coef_` or `feature_importances_`
-     attribute to select important features of the input data. By
-     :user:`Maheshakya Wijewardena <maheshakya>`, `Joel Nothman`_ and `Manoj Kumar`_.
+- Added :class:`feature_selection.SelectFromModel` meta-transformer which can
+  be used along with estimators that have `coef_` or `feature_importances_`
+  attribute to select important features of the input data. By
+  :user:`Maheshakya Wijewardena <maheshakya>`, `Joel Nothman`_ and `Manoj Kumar`_.
 
-   - Added :func:`metrics.pairwise.laplacian_kernel`.  By `Clyde Fare <https://github.com/Clyde-fare>`_.
+- Added :func:`metrics.pairwise.laplacian_kernel`.  By `Clyde Fare <https://github.com/Clyde-fare>`_.
 
-   - :class:`covariance.GraphLasso` allows separate control of the convergence criterion
-     for the Elastic-Net subproblem via  the ``enet_tol`` parameter.
+- :class:`covariance.GraphLasso` allows separate control of the convergence criterion
+  for the Elastic-Net subproblem via  the ``enet_tol`` parameter.
 
-   - Improved verbosity in :class:`decomposition.DictionaryLearning`.
+- Improved verbosity in :class:`decomposition.DictionaryLearning`.
 
-   - :class:`ensemble.RandomForestClassifier` and
-     :class:`ensemble.RandomForestRegressor` no longer explicitly store the
-     samples used in bagging, resulting in a much reduced memory footprint for
-     storing random forest models.
+- :class:`ensemble.RandomForestClassifier` and
+  :class:`ensemble.RandomForestRegressor` no longer explicitly store the
+  samples used in bagging, resulting in a much reduced memory footprint for
+  storing random forest models.
 
-   - Added ``positive`` option to :class:`linear_model.Lars` and
-     :func:`linear_model.lars_path` to force coefficients to be positive.
-     (:issue:`5131`)
+- Added ``positive`` option to :class:`linear_model.Lars` and
+  :func:`linear_model.lars_path` to force coefficients to be positive.
+  (:issue:`5131`)
 
-   - Added the ``X_norm_squared`` parameter to :func:`metrics.pairwise.euclidean_distances`
-     to provide precomputed squared norms for ``X``.
+- Added the ``X_norm_squared`` parameter to :func:`metrics.pairwise.euclidean_distances`
+  to provide precomputed squared norms for ``X``.
 
-   - Added the ``fit_predict`` method to :class:`pipeline.Pipeline`.
+- Added the ``fit_predict`` method to :class:`pipeline.Pipeline`.
 
-   - Added the :func:`preprocessing.min_max_scale` function.
+- Added the :func:`preprocessing.min_max_scale` function.
 
 Bug fixes
 .........
 
-    - Fixed non-determinism in :class:`dummy.DummyClassifier` with sparse
-      multi-label output. By `Andreas Müller`_.
+- Fixed non-determinism in :class:`dummy.DummyClassifier` with sparse
+  multi-label output. By `Andreas Müller`_.
 
-    - Fixed the output shape of :class:`linear_model.RANSACRegressor` to
-      ``(n_samples, )``. By `Andreas Müller`_.
+- Fixed the output shape of :class:`linear_model.RANSACRegressor` to
+  ``(n_samples, )``. By `Andreas Müller`_.
 
-    - Fixed bug in :class:`decomposition.DictLearning` when ``n_jobs < 0``. By
-      `Andreas Müller`_.
+- Fixed bug in :class:`decomposition.DictLearning` when ``n_jobs < 0``. By
+  `Andreas Müller`_.
 
-    - Fixed bug where :class:`grid_search.RandomizedSearchCV` could consume a
-      lot of memory for large discrete grids. By `Joel Nothman`_.
+- Fixed bug where :class:`grid_search.RandomizedSearchCV` could consume a
+  lot of memory for large discrete grids. By `Joel Nothman`_.
 
-    - Fixed bug in :class:`linear_model.LogisticRegressionCV` where `penalty` was ignored
-      in the final fit. By `Manoj Kumar`_.
+- Fixed bug in :class:`linear_model.LogisticRegressionCV` where `penalty` was ignored
+  in the final fit. By `Manoj Kumar`_.
 
-    - Fixed bug in :class:`ensemble.forest.ForestClassifier` while computing
-      oob_score and X is a sparse.csc_matrix. By :user:`Ankur Ankan <ankurankan>`.
+- Fixed bug in :class:`ensemble.forest.ForestClassifier` while computing
+  oob_score and X is a sparse.csc_matrix. By :user:`Ankur Ankan <ankurankan>`.
 
-    - All regressors now consistently handle and warn when given ``y`` that is of
-      shape ``(n_samples, 1)``. By `Andreas Müller`_ and Henry Lin.
-      (:issue:`5431`)
+- All regressors now consistently handle and warn when given ``y`` that is of
+  shape ``(n_samples, 1)``. By `Andreas Müller`_ and Henry Lin.
+  (:issue:`5431`)
 
-    - Fix in :class:`cluster.KMeans` cluster reassignment for sparse input by
-      `Lars Buitinck`_.
+- Fix in :class:`cluster.KMeans` cluster reassignment for sparse input by
+  `Lars Buitinck`_.
 
-    - Fixed a bug in :class:`lda.LDA` that could cause asymmetric covariance
-      matrices when using shrinkage. By `Martin Billinger`_.
+- Fixed a bug in :class:`lda.LDA` that could cause asymmetric covariance
+  matrices when using shrinkage. By `Martin Billinger`_.
 
-    - Fixed :func:`cross_validation.cross_val_predict` for estimators with
-      sparse predictions. By Buddha Prakash.
+- Fixed :func:`cross_validation.cross_val_predict` for estimators with
+  sparse predictions. By Buddha Prakash.
 
-    - Fixed the ``predict_proba`` method of :class:`linear_model.LogisticRegression`
-      to use soft-max instead of one-vs-rest normalization. By `Manoj Kumar`_.
-      (:issue:`5182`)
+- Fixed the ``predict_proba`` method of :class:`linear_model.LogisticRegression`
+  to use soft-max instead of one-vs-rest normalization. By `Manoj Kumar`_.
+  (:issue:`5182`)
 
-    - Fixed the :func:`partial_fit` method of :class:`linear_model.SGDClassifier`
-      when called with ``average=True``. By :user:`Andrew Lamb <andylamb>`.
-      (:issue:`5282`)
+- Fixed the :func:`partial_fit` method of :class:`linear_model.SGDClassifier`
+  when called with ``average=True``. By :user:`Andrew Lamb <andylamb>`.
+  (:issue:`5282`)
 
-    - Dataset fetchers use different filenames under Python 2 and Python 3 to
-      avoid pickling compatibility issues. By `Olivier Grisel`_.
-      (:issue:`5355`)
+- Dataset fetchers use different filenames under Python 2 and Python 3 to
+  avoid pickling compatibility issues. By `Olivier Grisel`_.
+  (:issue:`5355`)
 
-    - Fixed a bug in :class:`naive_bayes.GaussianNB` which caused classification
-      results to depend on scale. By `Jake Vanderplas`_.
+- Fixed a bug in :class:`naive_bayes.GaussianNB` which caused classification
+  results to depend on scale. By `Jake Vanderplas`_.
 
-    - Fixed temporarily :class:`linear_model.Ridge`, which was incorrect
-      when fitting the intercept in the case of sparse data. The fix
-      automatically changes the solver to 'sag' in this case.
-      :issue:`5360` by `Tom Dupre la Tour`_.
+- Fixed temporarily :class:`linear_model.Ridge`, which was incorrect
+  when fitting the intercept in the case of sparse data. The fix
+  automatically changes the solver to 'sag' in this case.
+  :issue:`5360` by `Tom Dupre la Tour`_.
 
-    - Fixed a performance bug in :class:`decomposition.RandomizedPCA` on data
-      with a large number of features and fewer samples. (:issue:`4478`)
-      By `Andreas Müller`_, `Loic Esteve`_ and :user:`Giorgio Patrini <giorgiop>`.
+- Fixed a performance bug in :class:`decomposition.RandomizedPCA` on data
+  with a large number of features and fewer samples. (:issue:`4478`)
+  By `Andreas Müller`_, `Loic Esteve`_ and :user:`Giorgio Patrini <giorgiop>`.
 
-    - Fixed bug in :class:`cross_decomposition.PLS` that yielded unstable and
-      platform dependent output, and failed on `fit_transform`.
-      By :user:`Arthur Mensch <arthurmensch>`.
+- Fixed bug in :class:`cross_decomposition.PLS` that yielded unstable and
+  platform dependent output, and failed on `fit_transform`.
+  By :user:`Arthur Mensch <arthurmensch>`.
 
-    - Fixes to the ``Bunch`` class used to store datasets.
+- Fixes to the ``Bunch`` class used to store datasets.
 
-    - Fixed :func:`ensemble.plot_partial_dependence` ignoring the
-      ``percentiles`` parameter.
+- Fixed :func:`ensemble.plot_partial_dependence` ignoring the
+  ``percentiles`` parameter.
 
-    - Providing a ``set`` as vocabulary in ``CountVectorizer`` no longer
-      leads to inconsistent results when pickling.
+- Providing a ``set`` as vocabulary in ``CountVectorizer`` no longer
+  leads to inconsistent results when pickling.
 
-    - Fixed the conditions on when a precomputed Gram matrix needs to
-      be recomputed in :class:`linear_model.LinearRegression`,
-      :class:`linear_model.OrthogonalMatchingPursuit`,
-      :class:`linear_model.Lasso` and :class:`linear_model.ElasticNet`.
+- Fixed the conditions on when a precomputed Gram matrix needs to
+  be recomputed in :class:`linear_model.LinearRegression`,
+  :class:`linear_model.OrthogonalMatchingPursuit`,
+  :class:`linear_model.Lasso` and :class:`linear_model.ElasticNet`.
 
-    - Fixed inconsistent memory layout in the coordinate descent solver
-      that affected :class:`linear_model.DictionaryLearning` and
-      :class:`covariance.GraphLasso`. (:issue:`5337`)
-      By `Olivier Grisel`_.
+- Fixed inconsistent memory layout in the coordinate descent solver
+  that affected :class:`linear_model.DictionaryLearning` and
+  :class:`covariance.GraphLasso`. (:issue:`5337`)
+  By `Olivier Grisel`_.
 
-    - :class:`manifold.LocallyLinearEmbedding` no longer ignores the ``reg``
-      parameter.
+- :class:`manifold.LocallyLinearEmbedding` no longer ignores the ``reg``
+  parameter.
 
-    - Nearest Neighbor estimators with custom distance metrics can now be pickled.
-      (:issue:`4362`)
+- Nearest Neighbor estimators with custom distance metrics can now be pickled.
+  (:issue:`4362`)
 
-    - Fixed a bug in :class:`pipeline.FeatureUnion` where ``transformer_weights``
-      were not properly handled when performing grid-searches.
+- Fixed a bug in :class:`pipeline.FeatureUnion` where ``transformer_weights``
+  were not properly handled when performing grid-searches.
 
-    - Fixed a bug in :class:`linear_model.LogisticRegression` and
-      :class:`linear_model.LogisticRegressionCV` when using
-      ``class_weight='balanced'```or ``class_weight='auto'``.
-      By `Tom Dupre la Tour`_.
+- Fixed a bug in :class:`linear_model.LogisticRegression` and
+  :class:`linear_model.LogisticRegressionCV` when using
+  ``class_weight='balanced'```or ``class_weight='auto'``.
+  By `Tom Dupre la Tour`_.
 
-    - Fixed bug :issue:`5495` when
-      doing OVR(SVC(decision_function_shape="ovr")). Fixed by
-      :user:`Elvis Dohmatob <dohmatob>`.
+- Fixed bug :issue:`5495` when
+  doing OVR(SVC(decision_function_shape="ovr")). Fixed by
+  :user:`Elvis Dohmatob <dohmatob>`.
 
 
 API changes summary
 -------------------
-    - Attribute `data_min`, `data_max` and `data_range` in
-      :class:`preprocessing.MinMaxScaler` are deprecated and won't be available
-      from 0.19. Instead, the class now exposes `data_min_`, `data_max_`
-      and `data_range_`. By :user:`Giorgio Patrini <giorgiop>`.
+- Attribute `data_min`, `data_max` and `data_range` in
+  :class:`preprocessing.MinMaxScaler` are deprecated and won't be available
+  from 0.19. Instead, the class now exposes `data_min_`, `data_max_`
+  and `data_range_`. By :user:`Giorgio Patrini <giorgiop>`.
 
-    - All Scaler classes now have an `scale_` attribute, the feature-wise
-      rescaling applied by their `transform` methods. The old attribute `std_`
-      in :class:`preprocessing.StandardScaler` is deprecated and superseded
-      by `scale_`; it won't be available in 0.19. By :user:`Giorgio Patrini <giorgiop>`.
+- All Scaler classes now have an `scale_` attribute, the feature-wise
+  rescaling applied by their `transform` methods. The old attribute `std_`
+  in :class:`preprocessing.StandardScaler` is deprecated and superseded
+  by `scale_`; it won't be available in 0.19. By :user:`Giorgio Patrini <giorgiop>`.
 
-    - :class:`svm.SVC`` and :class:`svm.NuSVC` now have an ``decision_function_shape``
-      parameter to make their decision function of shape ``(n_samples, n_classes)``
-      by setting ``decision_function_shape='ovr'``. This will be the default behavior
-      starting in 0.19. By `Andreas Müller`_.
+- :class:`svm.SVC`` and :class:`svm.NuSVC` now have an ``decision_function_shape``
+  parameter to make their decision function of shape ``(n_samples, n_classes)``
+  by setting ``decision_function_shape='ovr'``. This will be the default behavior
+  starting in 0.19. By `Andreas Müller`_.
 
-    - Passing 1D data arrays as input to estimators is now deprecated as it
-      caused confusion in how the array elements should be interpreted
-      as features or as samples. All data arrays are now expected
-      to be explicitly shaped ``(n_samples, n_features)``.
-      By :user:`Vighnesh Birodkar <vighneshbirodkar>`.
+- Passing 1D data arrays as input to estimators is now deprecated as it
+  caused confusion in how the array elements should be interpreted
+  as features or as samples. All data arrays are now expected
+  to be explicitly shaped ``(n_samples, n_features)``.
+  By :user:`Vighnesh Birodkar <vighneshbirodkar>`.
 
-    - :class:`lda.LDA` and :class:`qda.QDA` have been moved to
-      :class:`discriminant_analysis.LinearDiscriminantAnalysis` and
-      :class:`discriminant_analysis.QuadraticDiscriminantAnalysis`.
+- :class:`lda.LDA` and :class:`qda.QDA` have been moved to
+  :class:`discriminant_analysis.LinearDiscriminantAnalysis` and
+  :class:`discriminant_analysis.QuadraticDiscriminantAnalysis`.
 
-    - The ``store_covariance`` and ``tol`` parameters have been moved from
-      the fit method to the constructor in
-      :class:`discriminant_analysis.LinearDiscriminantAnalysis` and the
-      ``store_covariances`` and ``tol`` parameters have been moved from the
-      fit method to the constructor in
-      :class:`discriminant_analysis.QuadraticDiscriminantAnalysis`.
+- The ``store_covariance`` and ``tol`` parameters have been moved from
+  the fit method to the constructor in
+  :class:`discriminant_analysis.LinearDiscriminantAnalysis` and the
+  ``store_covariances`` and ``tol`` parameters have been moved from the
+  fit method to the constructor in
+  :class:`discriminant_analysis.QuadraticDiscriminantAnalysis`.
 
-    - Models inheriting from ``_LearntSelectorMixin`` will no longer support the
-      transform methods. (i.e,  RandomForests, GradientBoosting, LogisticRegression,
-      DecisionTrees, SVMs and SGD related models). Wrap these models around the
-      metatransfomer :class:`feature_selection.SelectFromModel` to remove
-      features (according to `coefs_` or `feature_importances_`)
-      which are below a certain threshold value instead.
+- Models inheriting from ``_LearntSelectorMixin`` will no longer support the
+  transform methods. (i.e,  RandomForests, GradientBoosting, LogisticRegression,
+  DecisionTrees, SVMs and SGD related models). Wrap these models around the
+  metatransfomer :class:`feature_selection.SelectFromModel` to remove
+  features (according to `coefs_` or `feature_importances_`)
+  which are below a certain threshold value instead.
 
-    - :class:`cluster.KMeans` re-runs cluster-assignments in case of non-convergence,
-      to ensure consistency of ``predict(X)`` and ``labels_``. By
-      :user:`Vighnesh Birodkar <vighneshbirodkar>`.
+- :class:`cluster.KMeans` re-runs cluster-assignments in case of non-convergence,
+  to ensure consistency of ``predict(X)`` and ``labels_``. By
+  :user:`Vighnesh Birodkar <vighneshbirodkar>`.
 
-    - Classifier and Regressor models are now tagged as such using the
-      ``_estimator_type`` attribute.
+- Classifier and Regressor models are now tagged as such using the
+  ``_estimator_type`` attribute.
 
-    - Cross-validation iterators always provide indices into training and test set,
-      not boolean masks.
+- Cross-validation iterators always provide indices into training and test set,
+  not boolean masks.
 
-    - The ``decision_function`` on all regressors was deprecated and will be
-      removed in 0.19.  Use ``predict`` instead.
+- The ``decision_function`` on all regressors was deprecated and will be
+  removed in 0.19.  Use ``predict`` instead.
 
-    - :func:`datasets.load_lfw_pairs` is deprecated and will be removed in 0.19.
-      Use :func:`datasets.fetch_lfw_pairs` instead.
+- :func:`datasets.load_lfw_pairs` is deprecated and will be removed in 0.19.
+  Use :func:`datasets.fetch_lfw_pairs` instead.
 
-    - The deprecated ``hmm`` module was removed.
+- The deprecated ``hmm`` module was removed.
 
-    - The deprecated ``Bootstrap`` cross-validation iterator was removed.
+- The deprecated ``Bootstrap`` cross-validation iterator was removed.
 
-    - The deprecated ``Ward`` and ``WardAgglomerative`` classes have been removed.
-      Use :class:`clustering.AgglomerativeClustering` instead.
+- The deprecated ``Ward`` and ``WardAgglomerative`` classes have been removed.
+  Use :class:`clustering.AgglomerativeClustering` instead.
 
-    - :func:`cross_validation.check_cv` is now a public function.
+- :func:`cross_validation.check_cv` is now a public function.
 
-    - The property ``residues_`` of :class:`linear_model.LinearRegression` is deprecated
-      and will be removed in 0.19.
+- The property ``residues_`` of :class:`linear_model.LinearRegression` is deprecated
+  and will be removed in 0.19.
 
-    - The deprecated ``n_jobs`` parameter of :class:`linear_model.LinearRegression` has been moved
-      to the constructor.
+- The deprecated ``n_jobs`` parameter of :class:`linear_model.LinearRegression` has been moved
+  to the constructor.
 
-    - Removed deprecated ``class_weight`` parameter from :class:`linear_model.SGDClassifier`'s ``fit``
-      method. Use the construction parameter instead.
+- Removed deprecated ``class_weight`` parameter from :class:`linear_model.SGDClassifier`'s ``fit``
+  method. Use the construction parameter instead.
 
-    - The deprecated support for the sequence of sequences (or list of lists) multilabel
-      format was removed. To convert to and from the supported binary
-      indicator matrix format, use
-      :class:`MultiLabelBinarizer <preprocessing.MultiLabelBinarizer>`.
+- The deprecated support for the sequence of sequences (or list of lists) multilabel
+  format was removed. To convert to and from the supported binary
+  indicator matrix format, use
+  :class:`MultiLabelBinarizer <preprocessing.MultiLabelBinarizer>`.
 
-    - The behavior of calling the ``inverse_transform`` method of ``Pipeline.pipeline`` will
-      change in 0.19. It will no longer reshape one-dimensional input to two-dimensional input.
+- The behavior of calling the ``inverse_transform`` method of ``Pipeline.pipeline`` will
+  change in 0.19. It will no longer reshape one-dimensional input to two-dimensional input.
 
-    - The deprecated attributes ``indicator_matrix_``, ``multilabel_`` and ``classes_`` of
-      :class:`preprocessing.LabelBinarizer` were removed.
+- The deprecated attributes ``indicator_matrix_``, ``multilabel_`` and ``classes_`` of
+  :class:`preprocessing.LabelBinarizer` were removed.
 
-    - Using ``gamma=0`` in :class:`svm.SVC` and :class:`svm.SVR` to automatically set the
-      gamma to ``1. / n_features`` is deprecated and will be removed in 0.19.
-      Use ``gamma="auto"`` instead.
+- Using ``gamma=0`` in :class:`svm.SVC` and :class:`svm.SVR` to automatically set the
+  gamma to ``1. / n_features`` is deprecated and will be removed in 0.19.
+  Use ``gamma="auto"`` instead.
 
 Code Contributors
 -----------------
@@ -2168,26 +2288,26 @@ Changelog
 Bug fixes
 .........
 
-   - Allow input data larger than ``block_size`` in
-     :class:`covariance.LedoitWolf` by `Andreas Müller`_.
+- Allow input data larger than ``block_size`` in
+  :class:`covariance.LedoitWolf` by `Andreas Müller`_.
 
-   - Fix a bug in :class:`isotonic.IsotonicRegression` deduplication that
-     caused unstable result in :class:`calibration.CalibratedClassifierCV` by
-     `Jan Hendrik Metzen`_.
+- Fix a bug in :class:`isotonic.IsotonicRegression` deduplication that
+  caused unstable result in :class:`calibration.CalibratedClassifierCV` by
+  `Jan Hendrik Metzen`_.
 
-   - Fix sorting of labels in func:`preprocessing.label_binarize` by Michael Heilman.
+- Fix sorting of labels in func:`preprocessing.label_binarize` by Michael Heilman.
 
-   - Fix several stability and convergence issues in
-     :class:`cross_decomposition.CCA` and
-     :class:`cross_decomposition.PLSCanonical` by `Andreas Müller`_
+- Fix several stability and convergence issues in
+  :class:`cross_decomposition.CCA` and
+  :class:`cross_decomposition.PLSCanonical` by `Andreas Müller`_
 
-   - Fix a bug in :class:`cluster.KMeans` when ``precompute_distances=False``
-     on fortran-ordered data.
+- Fix a bug in :class:`cluster.KMeans` when ``precompute_distances=False``
+  on fortran-ordered data.
 
-   - Fix a speed regression in :class:`ensemble.RandomForestClassifier`'s ``predict``
-     and ``predict_proba`` by `Andreas Müller`_.
+- Fix a speed regression in :class:`ensemble.RandomForestClassifier`'s ``predict``
+  and ``predict_proba`` by `Andreas Müller`_.
 
-   - Fix a regression where ``utils.shuffle`` converted lists and dataframes to arrays, by `Olivier Grisel`_
+- Fix a regression where ``utils.shuffle`` converted lists and dataframes to arrays, by `Olivier Grisel`_
 
 .. _changes_0_16:
 
@@ -2199,25 +2319,25 @@ Version 0.16
 Highlights
 -----------
 
-   - Speed improvements (notably in :class:`cluster.DBSCAN`), reduced memory
-     requirements, bug-fixes and better default settings.
+- Speed improvements (notably in :class:`cluster.DBSCAN`), reduced memory
+  requirements, bug-fixes and better default settings.
 
-   - Multinomial Logistic regression and a path algorithm in
-     :class:`linear_model.LogisticRegressionCV`.
+- Multinomial Logistic regression and a path algorithm in
+  :class:`linear_model.LogisticRegressionCV`.
 
-   - Out-of core learning of PCA via :class:`decomposition.IncrementalPCA`.
+- Out-of core learning of PCA via :class:`decomposition.IncrementalPCA`.
 
-   - Probability callibration of classifiers using
-     :class:`calibration.CalibratedClassifierCV`.
+- Probability callibration of classifiers using
+  :class:`calibration.CalibratedClassifierCV`.
 
-   - :class:`cluster.Birch` clustering method for large-scale datasets.
+- :class:`cluster.Birch` clustering method for large-scale datasets.
 
-   - Scalable approximate nearest neighbors search with Locality-sensitive
-     hashing forests in :class:`neighbors.LSHForest`.
+- Scalable approximate nearest neighbors search with Locality-sensitive
+  hashing forests in :class:`neighbors.LSHForest`.
 
-   - Improved error messages and better validation when using malformed input data.
+- Improved error messages and better validation when using malformed input data.
 
-   - More robust integration with pandas dataframes.
+- More robust integration with pandas dataframes.
 
 Changelog
 ---------
@@ -2225,438 +2345,438 @@ Changelog
 New features
 ............
 
-   - The new :class:`neighbors.LSHForest` implements locality-sensitive hashing
-     for approximate nearest neighbors search. By :user:`Maheshakya Wijewardena<maheshakya>`.
+- The new :class:`neighbors.LSHForest` implements locality-sensitive hashing
+  for approximate nearest neighbors search. By :user:`Maheshakya Wijewardena<maheshakya>`.
 
-   - Added :class:`svm.LinearSVR`. This class uses the liblinear implementation
-     of Support Vector Regression which is much faster for large
-     sample sizes than :class:`svm.SVR` with linear kernel. By
-     `Fabian Pedregosa`_ and Qiang Luo.
+- Added :class:`svm.LinearSVR`. This class uses the liblinear implementation
+  of Support Vector Regression which is much faster for large
+  sample sizes than :class:`svm.SVR` with linear kernel. By
+  `Fabian Pedregosa`_ and Qiang Luo.
 
-   - Incremental fit for :class:`GaussianNB <naive_bayes.GaussianNB>`.
+- Incremental fit for :class:`GaussianNB <naive_bayes.GaussianNB>`.
 
-   - Added ``sample_weight`` support to :class:`dummy.DummyClassifier` and
-     :class:`dummy.DummyRegressor`. By `Arnaud Joly`_.
+- Added ``sample_weight`` support to :class:`dummy.DummyClassifier` and
+  :class:`dummy.DummyRegressor`. By `Arnaud Joly`_.
 
-   - Added the :func:`metrics.label_ranking_average_precision_score` metrics.
-     By `Arnaud Joly`_.
+- Added the :func:`metrics.label_ranking_average_precision_score` metrics.
+  By `Arnaud Joly`_.
 
-   - Add the :func:`metrics.coverage_error` metrics. By `Arnaud Joly`_.
+- Add the :func:`metrics.coverage_error` metrics. By `Arnaud Joly`_.
 
-   - Added :class:`linear_model.LogisticRegressionCV`. By
-     `Manoj Kumar`_, `Fabian Pedregosa`_, `Gael Varoquaux`_
-     and `Alexandre Gramfort`_.
+- Added :class:`linear_model.LogisticRegressionCV`. By
+  `Manoj Kumar`_, `Fabian Pedregosa`_, `Gael Varoquaux`_
+  and `Alexandre Gramfort`_.
 
-   - Added ``warm_start`` constructor parameter to make it possible for any
-     trained forest model to grow additional trees incrementally. By
-     :user:`Laurent Direr<ldirer>`.
+- Added ``warm_start`` constructor parameter to make it possible for any
+  trained forest model to grow additional trees incrementally. By
+  :user:`Laurent Direr<ldirer>`.
 
-   - Added ``sample_weight`` support to :class:`ensemble.GradientBoostingClassifier` and
-     :class:`ensemble.GradientBoostingRegressor`. By `Peter Prettenhofer`_.
+- Added ``sample_weight`` support to :class:`ensemble.GradientBoostingClassifier` and
+  :class:`ensemble.GradientBoostingRegressor`. By `Peter Prettenhofer`_.
 
-   - Added :class:`decomposition.IncrementalPCA`, an implementation of the PCA
-     algorithm that supports out-of-core learning with a ``partial_fit``
-     method. By `Kyle Kastner`_.
+- Added :class:`decomposition.IncrementalPCA`, an implementation of the PCA
+  algorithm that supports out-of-core learning with a ``partial_fit``
+  method. By `Kyle Kastner`_.
 
-   - Averaged SGD for :class:`SGDClassifier <linear_model.SGDClassifier>`
-     and :class:`SGDRegressor <linear_model.SGDRegressor>` By
-     :user:`Danny Sullivan <dsullivan7>`.
+- Averaged SGD for :class:`SGDClassifier <linear_model.SGDClassifier>`
+  and :class:`SGDRegressor <linear_model.SGDRegressor>` By
+  :user:`Danny Sullivan <dsullivan7>`.
 
-   - Added :func:`cross_val_predict <cross_validation.cross_val_predict>`
-     function which computes cross-validated estimates. By `Luis Pedro Coelho`_
+- Added :func:`cross_val_predict <cross_validation.cross_val_predict>`
+  function which computes cross-validated estimates. By `Luis Pedro Coelho`_
 
-   - Added :class:`linear_model.TheilSenRegressor`, a robust
-     generalized-median-based estimator. By :user:`Florian Wilhelm <FlorianWilhelm>`.
+- Added :class:`linear_model.TheilSenRegressor`, a robust
+  generalized-median-based estimator. By :user:`Florian Wilhelm <FlorianWilhelm>`.
 
-   - Added :func:`metrics.median_absolute_error`, a robust metric.
-     By `Gael Varoquaux`_ and :user:`Florian Wilhelm <FlorianWilhelm>`.
+- Added :func:`metrics.median_absolute_error`, a robust metric.
+  By `Gael Varoquaux`_ and :user:`Florian Wilhelm <FlorianWilhelm>`.
 
-   - Add :class:`cluster.Birch`, an online clustering algorithm. By
-     `Manoj Kumar`_, `Alexandre Gramfort`_ and `Joel Nothman`_.
+- Add :class:`cluster.Birch`, an online clustering algorithm. By
+  `Manoj Kumar`_, `Alexandre Gramfort`_ and `Joel Nothman`_.
 
-   - Added shrinkage support to :class:`discriminant_analysis.LinearDiscriminantAnalysis`
-     using two new solvers. By :user:`Clemens Brunner <cle1109>` and `Martin Billinger`_.
+- Added shrinkage support to :class:`discriminant_analysis.LinearDiscriminantAnalysis`
+  using two new solvers. By :user:`Clemens Brunner <cle1109>` and `Martin Billinger`_.
 
-   - Added :class:`kernel_ridge.KernelRidge`, an implementation of
-     kernelized ridge regression.
-     By `Mathieu Blondel`_ and `Jan Hendrik Metzen`_.
+- Added :class:`kernel_ridge.KernelRidge`, an implementation of
+  kernelized ridge regression.
+  By `Mathieu Blondel`_ and `Jan Hendrik Metzen`_.
 
-   - All solvers in :class:`linear_model.Ridge` now support `sample_weight`.
-     By `Mathieu Blondel`_.
+- All solvers in :class:`linear_model.Ridge` now support `sample_weight`.
+  By `Mathieu Blondel`_.
 
-   - Added :class:`cross_validation.PredefinedSplit` cross-validation
-     for fixed user-provided cross-validation folds.
-     By :user:`Thomas Unterthiner <untom>`.
+- Added :class:`cross_validation.PredefinedSplit` cross-validation
+  for fixed user-provided cross-validation folds.
+  By :user:`Thomas Unterthiner <untom>`.
 
-   - Added :class:`calibration.CalibratedClassifierCV`, an approach for
-     calibrating the predicted probabilities of a classifier.
-     By `Alexandre Gramfort`_, `Jan Hendrik Metzen`_, `Mathieu Blondel`_
-     and :user:`Balazs Kegl <kegl>`.
+- Added :class:`calibration.CalibratedClassifierCV`, an approach for
+  calibrating the predicted probabilities of a classifier.
+  By `Alexandre Gramfort`_, `Jan Hendrik Metzen`_, `Mathieu Blondel`_
+  and :user:`Balazs Kegl <kegl>`.
 
 
 Enhancements
 ............
 
-   - Add option ``return_distance`` in :func:`hierarchical.ward_tree`
-     to return distances between nodes for both structured and unstructured
-     versions of the algorithm. By `Matteo Visconti di Oleggio Castello`_.
-     The same option was added in :func:`hierarchical.linkage_tree`.
-     By `Manoj Kumar`_
+- Add option ``return_distance`` in :func:`hierarchical.ward_tree`
+  to return distances between nodes for both structured and unstructured
+  versions of the algorithm. By `Matteo Visconti di Oleggio Castello`_.
+  The same option was added in :func:`hierarchical.linkage_tree`.
+  By `Manoj Kumar`_
 
-   - Add support for sample weights in scorer objects.  Metrics with sample
-     weight support will automatically benefit from it. By `Noel Dawe`_ and
-     `Vlad Niculae`_.
+- Add support for sample weights in scorer objects.  Metrics with sample
+  weight support will automatically benefit from it. By `Noel Dawe`_ and
+  `Vlad Niculae`_.
 
-   - Added ``newton-cg`` and `lbfgs` solver support in
-     :class:`linear_model.LogisticRegression`. By `Manoj Kumar`_.
+- Added ``newton-cg`` and `lbfgs` solver support in
+  :class:`linear_model.LogisticRegression`. By `Manoj Kumar`_.
 
-   - Add ``selection="random"`` parameter to implement stochastic coordinate
-     descent for :class:`linear_model.Lasso`, :class:`linear_model.ElasticNet`
-     and related. By `Manoj Kumar`_.
+- Add ``selection="random"`` parameter to implement stochastic coordinate
+  descent for :class:`linear_model.Lasso`, :class:`linear_model.ElasticNet`
+  and related. By `Manoj Kumar`_.
 
-   - Add ``sample_weight`` parameter to
-     :func:`metrics.jaccard_similarity_score` and :func:`metrics.log_loss`.
-     By :user:`Jatin Shah <jatinshah>`.
+- Add ``sample_weight`` parameter to
+  :func:`metrics.jaccard_similarity_score` and :func:`metrics.log_loss`.
+  By :user:`Jatin Shah <jatinshah>`.
 
-   - Support sparse multilabel indicator representation in
-     :class:`preprocessing.LabelBinarizer` and
-     :class:`multiclass.OneVsRestClassifier` (by :user:`Hamzeh Alsalhi <hamsal>` with thanks
-     to Rohit Sivaprasad), as well as evaluation metrics (by
-     `Joel Nothman`_).
+- Support sparse multilabel indicator representation in
+  :class:`preprocessing.LabelBinarizer` and
+  :class:`multiclass.OneVsRestClassifier` (by :user:`Hamzeh Alsalhi <hamsal>` with thanks
+  to Rohit Sivaprasad), as well as evaluation metrics (by
+  `Joel Nothman`_).
 
-   - Add ``sample_weight`` parameter to `metrics.jaccard_similarity_score`.
-     By `Jatin Shah`.
+- Add ``sample_weight`` parameter to `metrics.jaccard_similarity_score`.
+  By `Jatin Shah`.
 
-   - Add support for multiclass in `metrics.hinge_loss`. Added ``labels=None``
-     as optional parameter. By `Saurabh Jha`.
+- Add support for multiclass in `metrics.hinge_loss`. Added ``labels=None``
+  as optional parameter. By `Saurabh Jha`.
 
-   - Add ``sample_weight`` parameter to `metrics.hinge_loss`.
-     By `Saurabh Jha`.
+- Add ``sample_weight`` parameter to `metrics.hinge_loss`.
+  By `Saurabh Jha`.
 
-   - Add ``multi_class="multinomial"`` option in
-     :class:`linear_model.LogisticRegression` to implement a Logistic
-     Regression solver that minimizes the cross-entropy or multinomial loss
-     instead of the default One-vs-Rest setting. Supports `lbfgs` and
-     `newton-cg` solvers. By `Lars Buitinck`_ and `Manoj Kumar`_. Solver option
-     `newton-cg` by Simon Wu.
+- Add ``multi_class="multinomial"`` option in
+  :class:`linear_model.LogisticRegression` to implement a Logistic
+  Regression solver that minimizes the cross-entropy or multinomial loss
+  instead of the default One-vs-Rest setting. Supports `lbfgs` and
+  `newton-cg` solvers. By `Lars Buitinck`_ and `Manoj Kumar`_. Solver option
+  `newton-cg` by Simon Wu.
 
-   - ``DictVectorizer`` can now perform ``fit_transform`` on an iterable in a
-     single pass, when giving the option ``sort=False``. By :user:`Dan
-     Blanchard <dan-blanchard>`.
+- ``DictVectorizer`` can now perform ``fit_transform`` on an iterable in a
+  single pass, when giving the option ``sort=False``. By :user:`Dan
+  Blanchard <dan-blanchard>`.
 
-   - :class:`GridSearchCV` and :class:`RandomizedSearchCV` can now be
-     configured to work with estimators that may fail and raise errors on
-     individual folds. This option is controlled by the `error_score`
-     parameter. This does not affect errors raised on re-fit. By
-     :user:`Michal Romaniuk <romaniukm>`.
+- :class:`GridSearchCV` and :class:`RandomizedSearchCV` can now be
+  configured to work with estimators that may fail and raise errors on
+  individual folds. This option is controlled by the `error_score`
+  parameter. This does not affect errors raised on re-fit. By
+  :user:`Michal Romaniuk <romaniukm>`.
 
-   - Add ``digits`` parameter to `metrics.classification_report` to allow
-     report to show different precision of floating point numbers. By
-     :user:`Ian Gilmore <agileminor>`.
+- Add ``digits`` parameter to `metrics.classification_report` to allow
+  report to show different precision of floating point numbers. By
+  :user:`Ian Gilmore <agileminor>`.
 
-   - Add a quantile prediction strategy to the :class:`dummy.DummyRegressor`.
-     By :user:`Aaron Staple <staple>`.
+- Add a quantile prediction strategy to the :class:`dummy.DummyRegressor`.
+  By :user:`Aaron Staple <staple>`.
 
-   - Add ``handle_unknown`` option to :class:`preprocessing.OneHotEncoder` to
-     handle unknown categorical features more gracefully during transform.
-     By `Manoj Kumar`_.
+- Add ``handle_unknown`` option to :class:`preprocessing.OneHotEncoder` to
+  handle unknown categorical features more gracefully during transform.
+  By `Manoj Kumar`_.
 
-   - Added support for sparse input data to decision trees and their ensembles.
-     By `Fares Hedyati`_ and `Arnaud Joly`_.
+- Added support for sparse input data to decision trees and their ensembles.
+  By `Fares Hedyati`_ and `Arnaud Joly`_.
 
-   - Optimized :class:`cluster.AffinityPropagation` by reducing the number of
-     memory allocations of large temporary data-structures. By `Antony Lee`_.
+- Optimized :class:`cluster.AffinityPropagation` by reducing the number of
+  memory allocations of large temporary data-structures. By `Antony Lee`_.
 
-   - Parellization of the computation of feature importances in random forest.
-     By `Olivier Grisel`_ and `Arnaud Joly`_.
+- Parellization of the computation of feature importances in random forest.
+  By `Olivier Grisel`_ and `Arnaud Joly`_.
 
-   - Add ``n_iter_`` attribute to estimators that accept a ``max_iter`` attribute
-     in their constructor. By `Manoj Kumar`_.
+- Add ``n_iter_`` attribute to estimators that accept a ``max_iter`` attribute
+  in their constructor. By `Manoj Kumar`_.
 
-   - Added decision function for :class:`multiclass.OneVsOneClassifier`
-     By `Raghav RV`_ and :user:`Kyle Beauchamp <kyleabeauchamp>`.
+- Added decision function for :class:`multiclass.OneVsOneClassifier`
+  By `Raghav RV`_ and :user:`Kyle Beauchamp <kyleabeauchamp>`.
 
-   - :func:`neighbors.kneighbors_graph` and :func:`radius_neighbors_graph`
-     support non-Euclidean metrics. By `Manoj Kumar`_
+- :func:`neighbors.kneighbors_graph` and :func:`radius_neighbors_graph`
+  support non-Euclidean metrics. By `Manoj Kumar`_
 
-   - Parameter ``connectivity`` in :class:`cluster.AgglomerativeClustering`
-     and family now accept callables that return a connectivity matrix.
-     By `Manoj Kumar`_.
+- Parameter ``connectivity`` in :class:`cluster.AgglomerativeClustering`
+  and family now accept callables that return a connectivity matrix.
+  By `Manoj Kumar`_.
 
-   - Sparse support for :func:`paired_distances`. By `Joel Nothman`_.
+- Sparse support for :func:`paired_distances`. By `Joel Nothman`_.
 
-   - :class:`cluster.DBSCAN` now supports sparse input and sample weights and
-     has been optimized: the inner loop has been rewritten in Cython and
-     radius neighbors queries are now computed in batch. By `Joel Nothman`_
-     and `Lars Buitinck`_.
+- :class:`cluster.DBSCAN` now supports sparse input and sample weights and
+  has been optimized: the inner loop has been rewritten in Cython and
+  radius neighbors queries are now computed in batch. By `Joel Nothman`_
+  and `Lars Buitinck`_.
 
-   - Add ``class_weight`` parameter to automatically weight samples by class
-     frequency for :class:`ensemble.RandomForestClassifier`,
-     :class:`tree.DecisionTreeClassifier`, :class:`ensemble.ExtraTreesClassifier`
-     and :class:`tree.ExtraTreeClassifier`. By `Trevor Stephens`_.
+- Add ``class_weight`` parameter to automatically weight samples by class
+  frequency for :class:`ensemble.RandomForestClassifier`,
+  :class:`tree.DecisionTreeClassifier`, :class:`ensemble.ExtraTreesClassifier`
+  and :class:`tree.ExtraTreeClassifier`. By `Trevor Stephens`_.
 
-   - :class:`grid_search.RandomizedSearchCV` now does sampling without
-     replacement if all parameters are given as lists. By `Andreas Müller`_.
+- :class:`grid_search.RandomizedSearchCV` now does sampling without
+  replacement if all parameters are given as lists. By `Andreas Müller`_.
 
-   - Parallelized calculation of :func:`pairwise_distances` is now supported
-     for scipy metrics and custom callables. By `Joel Nothman`_.
+- Parallelized calculation of :func:`pairwise_distances` is now supported
+  for scipy metrics and custom callables. By `Joel Nothman`_.
 
-   - Allow the fitting and scoring of all clustering algorithms in
-     :class:`pipeline.Pipeline`. By `Andreas Müller`_.
+- Allow the fitting and scoring of all clustering algorithms in
+  :class:`pipeline.Pipeline`. By `Andreas Müller`_.
 
-   - More robust seeding and improved error messages in :class:`cluster.MeanShift`
-     by `Andreas Müller`_.
+- More robust seeding and improved error messages in :class:`cluster.MeanShift`
+  by `Andreas Müller`_.
 
-   - Make the stopping criterion for :class:`mixture.GMM`,
-     :class:`mixture.DPGMM` and :class:`mixture.VBGMM` less dependent on the
-     number of samples by thresholding the average log-likelihood change
-     instead of its sum over all samples. By `Hervé Bredin`_.
+- Make the stopping criterion for :class:`mixture.GMM`,
+  :class:`mixture.DPGMM` and :class:`mixture.VBGMM` less dependent on the
+  number of samples by thresholding the average log-likelihood change
+  instead of its sum over all samples. By `Hervé Bredin`_.
 
-   - The outcome of :func:`manifold.spectral_embedding` was made deterministic
-     by flipping the sign of eigenvectors. By :user:`Hasil Sharma <Hasil-Sharma>`.
+- The outcome of :func:`manifold.spectral_embedding` was made deterministic
+  by flipping the sign of eigenvectors. By :user:`Hasil Sharma <Hasil-Sharma>`.
 
-   - Significant performance and memory usage improvements in
-     :class:`preprocessing.PolynomialFeatures`. By `Eric Martin`_.
+- Significant performance and memory usage improvements in
+  :class:`preprocessing.PolynomialFeatures`. By `Eric Martin`_.
 
-   - Numerical stability improvements for :class:`preprocessing.StandardScaler`
-     and :func:`preprocessing.scale`. By `Nicolas Goix`_
+- Numerical stability improvements for :class:`preprocessing.StandardScaler`
+  and :func:`preprocessing.scale`. By `Nicolas Goix`_
 
-   - :class:`svm.SVC` fitted on sparse input now implements ``decision_function``.
-     By `Rob Zinkov`_ and `Andreas Müller`_.
+- :class:`svm.SVC` fitted on sparse input now implements ``decision_function``.
+  By `Rob Zinkov`_ and `Andreas Müller`_.
 
-   - :func:`cross_validation.train_test_split` now preserves the input type,
-     instead of converting to numpy arrays.
+- :func:`cross_validation.train_test_split` now preserves the input type,
+  instead of converting to numpy arrays.
 
 
 Documentation improvements
 ..........................
 
-   - Added example of using :class:`FeatureUnion` for heterogeneous input.
-     By :user:`Matt Terry <mrterry>`
+- Added example of using :class:`FeatureUnion` for heterogeneous input.
+  By :user:`Matt Terry <mrterry>`
 
-   - Documentation on scorers was improved, to highlight the handling of loss
-     functions. By :user:`Matt Pico <MattpSoftware>`.
+- Documentation on scorers was improved, to highlight the handling of loss
+  functions. By :user:`Matt Pico <MattpSoftware>`.
 
-   - A discrepancy between liblinear output and scikit-learn's wrappers
-     is now noted. By `Manoj Kumar`_.
+- A discrepancy between liblinear output and scikit-learn's wrappers
+  is now noted. By `Manoj Kumar`_.
 
-   - Improved documentation generation: examples referring to a class or
-     function are now shown in a gallery on the class/function's API reference
-     page. By `Joel Nothman`_.
+- Improved documentation generation: examples referring to a class or
+  function are now shown in a gallery on the class/function's API reference
+  page. By `Joel Nothman`_.
 
-   - More explicit documentation of sample generators and of data
-     transformation. By `Joel Nothman`_.
+- More explicit documentation of sample generators and of data
+  transformation. By `Joel Nothman`_.
 
-   - :class:`sklearn.neighbors.BallTree` and :class:`sklearn.neighbors.KDTree`
-     used to point to empty pages stating that they are aliases of BinaryTree.
-     This has been fixed to show the correct class docs. By `Manoj Kumar`_.
+- :class:`sklearn.neighbors.BallTree` and :class:`sklearn.neighbors.KDTree`
+  used to point to empty pages stating that they are aliases of BinaryTree.
+  This has been fixed to show the correct class docs. By `Manoj Kumar`_.
 
-   - Added silhouette plots for analysis of KMeans clustering using
-     :func:`metrics.silhouette_samples` and :func:`metrics.silhouette_score`.
-     See :ref:`sphx_glr_auto_examples_cluster_plot_kmeans_silhouette_analysis.py`
+- Added silhouette plots for analysis of KMeans clustering using
+  :func:`metrics.silhouette_samples` and :func:`metrics.silhouette_score`.
+  See :ref:`sphx_glr_auto_examples_cluster_plot_kmeans_silhouette_analysis.py`
 
 Bug fixes
 .........
-    - Metaestimators now support ducktyping for the presence of ``decision_function``,
-      ``predict_proba`` and other methods. This fixes behavior of
-      :class:`grid_search.GridSearchCV`,
-      :class:`grid_search.RandomizedSearchCV`, :class:`pipeline.Pipeline`,
-      :class:`feature_selection.RFE`, :class:`feature_selection.RFECV` when nested.
-      By `Joel Nothman`_
-
-    - The ``scoring`` attribute of grid-search and cross-validation methods is no longer
-      ignored when a :class:`grid_search.GridSearchCV` is given as a base estimator or
-      the base estimator doesn't have predict.
-
-    - The function :func:`hierarchical.ward_tree` now returns the children in
-      the same order for both the structured and unstructured versions. By
-      `Matteo Visconti di Oleggio Castello`_.
-
-    - :class:`feature_selection.RFECV` now correctly handles cases when
-      ``step`` is not equal to 1. By :user:`Nikolay Mayorov <nmayorov>`
-
-    - The :class:`decomposition.PCA` now undoes whitening in its
-      ``inverse_transform``. Also, its ``components_`` now always have unit
-      length. By :user:`Michael Eickenberg <eickenberg>`.
-
-    - Fix incomplete download of the dataset when
-      :func:`datasets.download_20newsgroups` is called. By `Manoj Kumar`_.
-
-    - Various fixes to the Gaussian processes subpackage by Vincent Dubourg
-      and Jan Hendrik Metzen.
-
-    - Calling ``partial_fit`` with ``class_weight=='auto'`` throws an
-      appropriate error message and suggests a work around.
-      By :user:`Danny Sullivan <dsullivan7>`.
-
-    - :class:`RBFSampler <kernel_approximation.RBFSampler>` with ``gamma=g``
-      formerly approximated :func:`rbf_kernel <metrics.pairwise.rbf_kernel>`
-      with ``gamma=g/2.``; the definition of ``gamma`` is now consistent,
-      which may substantially change your results if you use a fixed value.
-      (If you cross-validated over ``gamma``, it probably doesn't matter
-      too much.) By :user:`Dougal Sutherland <dougalsutherland>`.
-
-    - Pipeline object delegate the ``classes_`` attribute to the underlying
-      estimator. It allows, for instance, to make bagging of a pipeline object.
-      By `Arnaud Joly`_
-
-    - :class:`neighbors.NearestCentroid` now uses the median as the centroid
-      when metric is set to ``manhattan``. It was using the mean before.
-      By `Manoj Kumar`_
-
-    - Fix numerical stability issues in :class:`linear_model.SGDClassifier`
-      and :class:`linear_model.SGDRegressor` by clipping large gradients and
-      ensuring that weight decay rescaling is always positive (for large
-      l2 regularization and large learning rate values).
-      By `Olivier Grisel`_
-
-    - When `compute_full_tree` is set to "auto", the full tree is
-      built when n_clusters is high and is early stopped when n_clusters is
-      low, while the behavior should be vice-versa in
-      :class:`cluster.AgglomerativeClustering` (and friends).
-      This has been fixed By `Manoj Kumar`_
-
-    - Fix lazy centering of data in :func:`linear_model.enet_path` and
-      :func:`linear_model.lasso_path`. It was centered around one. It has
-      been changed to be centered around the origin. By `Manoj Kumar`_
-
-    - Fix handling of precomputed affinity matrices in
-      :class:`cluster.AgglomerativeClustering` when using connectivity
-      constraints. By :user:`Cathy Deng <cathydeng>`
-
-    - Correct ``partial_fit`` handling of ``class_prior`` for
-      :class:`sklearn.naive_bayes.MultinomialNB` and
-      :class:`sklearn.naive_bayes.BernoulliNB`. By `Trevor Stephens`_.
-
-    - Fixed a crash in :func:`metrics.precision_recall_fscore_support`
-      when using unsorted ``labels`` in the multi-label setting.
-      By `Andreas Müller`_.
-
-    - Avoid skipping the first nearest neighbor in the methods ``radius_neighbors``,
-      ``kneighbors``, ``kneighbors_graph`` and ``radius_neighbors_graph`` in
-      :class:`sklearn.neighbors.NearestNeighbors` and family, when the query
-      data is not the same as fit data. By `Manoj Kumar`_.
-
-    - Fix log-density calculation in the :class:`mixture.GMM` with
-      tied covariance. By `Will Dawson`_
-
-    - Fixed a scaling error in :class:`feature_selection.SelectFdr`
-      where a factor ``n_features`` was missing. By `Andrew Tulloch`_
-
-    - Fix zero division in :class:`neighbors.KNeighborsRegressor` and related
-      classes when using distance weighting and having identical data points.
-      By `Garret-R <https://github.com/Garrett-R>`_.
-
-    - Fixed round off errors with non positive-definite covariance matrices
-      in GMM. By :user:`Alexis Mignon <AlexisMignon>`.
-
-    - Fixed a error in the computation of conditional probabilities in
-      :class:`naive_bayes.BernoulliNB`. By `Hanna Wallach`_.
-
-    - Make the method ``radius_neighbors`` of
-      :class:`neighbors.NearestNeighbors` return the samples lying on the
-      boundary for ``algorithm='brute'``. By `Yan Yi`_.
-
-    - Flip sign of ``dual_coef_`` of :class:`svm.SVC`
-      to make it consistent with the documentation and
-      ``decision_function``. By Artem Sobolev.
+- Metaestimators now support ducktyping for the presence of ``decision_function``,
+  ``predict_proba`` and other methods. This fixes behavior of
+  :class:`grid_search.GridSearchCV`,
+  :class:`grid_search.RandomizedSearchCV`, :class:`pipeline.Pipeline`,
+  :class:`feature_selection.RFE`, :class:`feature_selection.RFECV` when nested.
+  By `Joel Nothman`_
+
+- The ``scoring`` attribute of grid-search and cross-validation methods is no longer
+  ignored when a :class:`grid_search.GridSearchCV` is given as a base estimator or
+  the base estimator doesn't have predict.
+
+- The function :func:`hierarchical.ward_tree` now returns the children in
+  the same order for both the structured and unstructured versions. By
+  `Matteo Visconti di Oleggio Castello`_.
+
+- :class:`feature_selection.RFECV` now correctly handles cases when
+  ``step`` is not equal to 1. By :user:`Nikolay Mayorov <nmayorov>`
+
+- The :class:`decomposition.PCA` now undoes whitening in its
+  ``inverse_transform``. Also, its ``components_`` now always have unit
+  length. By :user:`Michael Eickenberg <eickenberg>`.
+
+- Fix incomplete download of the dataset when
+  :func:`datasets.download_20newsgroups` is called. By `Manoj Kumar`_.
+
+- Various fixes to the Gaussian processes subpackage by Vincent Dubourg
+  and Jan Hendrik Metzen.
+
+- Calling ``partial_fit`` with ``class_weight=='auto'`` throws an
+  appropriate error message and suggests a work around.
+  By :user:`Danny Sullivan <dsullivan7>`.
+
+- :class:`RBFSampler <kernel_approximation.RBFSampler>` with ``gamma=g``
+  formerly approximated :func:`rbf_kernel <metrics.pairwise.rbf_kernel>`
+  with ``gamma=g/2.``; the definition of ``gamma`` is now consistent,
+  which may substantially change your results if you use a fixed value.
+  (If you cross-validated over ``gamma``, it probably doesn't matter
+  too much.) By :user:`Dougal Sutherland <dougalsutherland>`.
+
+- Pipeline object delegate the ``classes_`` attribute to the underlying
+  estimator. It allows, for instance, to make bagging of a pipeline object.
+  By `Arnaud Joly`_
+
+- :class:`neighbors.NearestCentroid` now uses the median as the centroid
+  when metric is set to ``manhattan``. It was using the mean before.
+  By `Manoj Kumar`_
+
+- Fix numerical stability issues in :class:`linear_model.SGDClassifier`
+  and :class:`linear_model.SGDRegressor` by clipping large gradients and
+  ensuring that weight decay rescaling is always positive (for large
+  l2 regularization and large learning rate values).
+  By `Olivier Grisel`_
+
+- When `compute_full_tree` is set to "auto", the full tree is
+  built when n_clusters is high and is early stopped when n_clusters is
+  low, while the behavior should be vice-versa in
+  :class:`cluster.AgglomerativeClustering` (and friends).
+  This has been fixed By `Manoj Kumar`_
+
+- Fix lazy centering of data in :func:`linear_model.enet_path` and
+  :func:`linear_model.lasso_path`. It was centered around one. It has
+  been changed to be centered around the origin. By `Manoj Kumar`_
+
+- Fix handling of precomputed affinity matrices in
+  :class:`cluster.AgglomerativeClustering` when using connectivity
+  constraints. By :user:`Cathy Deng <cathydeng>`
+
+- Correct ``partial_fit`` handling of ``class_prior`` for
+  :class:`sklearn.naive_bayes.MultinomialNB` and
+  :class:`sklearn.naive_bayes.BernoulliNB`. By `Trevor Stephens`_.
+
+- Fixed a crash in :func:`metrics.precision_recall_fscore_support`
+  when using unsorted ``labels`` in the multi-label setting.
+  By `Andreas Müller`_.
+
+- Avoid skipping the first nearest neighbor in the methods ``radius_neighbors``,
+  ``kneighbors``, ``kneighbors_graph`` and ``radius_neighbors_graph`` in
+  :class:`sklearn.neighbors.NearestNeighbors` and family, when the query
+  data is not the same as fit data. By `Manoj Kumar`_.
+
+- Fix log-density calculation in the :class:`mixture.GMM` with
+  tied covariance. By `Will Dawson`_
+
+- Fixed a scaling error in :class:`feature_selection.SelectFdr`
+  where a factor ``n_features`` was missing. By `Andrew Tulloch`_
+
+- Fix zero division in :class:`neighbors.KNeighborsRegressor` and related
+  classes when using distance weighting and having identical data points.
+  By `Garret-R <https://github.com/Garrett-R>`_.
+
+- Fixed round off errors with non positive-definite covariance matrices
+  in GMM. By :user:`Alexis Mignon <AlexisMignon>`.
+
+- Fixed a error in the computation of conditional probabilities in
+  :class:`naive_bayes.BernoulliNB`. By `Hanna Wallach`_.
+
+- Make the method ``radius_neighbors`` of
+  :class:`neighbors.NearestNeighbors` return the samples lying on the
+  boundary for ``algorithm='brute'``. By `Yan Yi`_.
+
+- Flip sign of ``dual_coef_`` of :class:`svm.SVC`
+  to make it consistent with the documentation and
+  ``decision_function``. By Artem Sobolev.
 
-    - Fixed handling of ties in :class:`isotonic.IsotonicRegression`.
-      We now use the weighted average of targets (secondary method). By
-      `Andreas Müller`_ and `Michael Bommarito <http://bommaritollc.com/>`_.
+- Fixed handling of ties in :class:`isotonic.IsotonicRegression`.
+  We now use the weighted average of targets (secondary method). By
+  `Andreas Müller`_ and `Michael Bommarito <http://bommaritollc.com/>`_.
 
 API changes summary
 -------------------
 
-    - :class:`GridSearchCV <grid_search.GridSearchCV>` and
-      :func:`cross_val_score <cross_validation.cross_val_score>` and other
-      meta-estimators don't convert pandas DataFrames into arrays any more,
-      allowing DataFrame specific operations in custom estimators.
+- :class:`GridSearchCV <grid_search.GridSearchCV>` and
+  :func:`cross_val_score <cross_validation.cross_val_score>` and other
+  meta-estimators don't convert pandas DataFrames into arrays any more,
+  allowing DataFrame specific operations in custom estimators.
 
-    - :func:`multiclass.fit_ovr`, :func:`multiclass.predict_ovr`,
-      :func:`predict_proba_ovr`,
-      :func:`multiclass.fit_ovo`, :func:`multiclass.predict_ovo`,
-      :func:`multiclass.fit_ecoc` and :func:`multiclass.predict_ecoc`
-      are deprecated. Use the underlying estimators instead.
+- :func:`multiclass.fit_ovr`, :func:`multiclass.predict_ovr`,
+  :func:`predict_proba_ovr`,
+  :func:`multiclass.fit_ovo`, :func:`multiclass.predict_ovo`,
+  :func:`multiclass.fit_ecoc` and :func:`multiclass.predict_ecoc`
+  are deprecated. Use the underlying estimators instead.
 
-    - Nearest neighbors estimators used to take arbitrary keyword arguments
-      and pass these to their distance metric. This will no longer be supported
-      in scikit-learn 0.18; use the ``metric_params`` argument instead.
+- Nearest neighbors estimators used to take arbitrary keyword arguments
+  and pass these to their distance metric. This will no longer be supported
+  in scikit-learn 0.18; use the ``metric_params`` argument instead.
 
-    - `n_jobs` parameter of the fit method shifted to the constructor of the
+- `n_jobs` parameter of the fit method shifted to the constructor of the
        LinearRegression class.
 
-    - The ``predict_proba`` method of :class:`multiclass.OneVsRestClassifier`
-      now returns two probabilities per sample in the multiclass case; this
-      is consistent with other estimators and with the method's documentation,
-      but previous versions accidentally returned only the positive
-      probability. Fixed by Will Lamond and `Lars Buitinck`_.
-
-    - Change default value of precompute in :class:`ElasticNet` and :class:`Lasso`
-      to False. Setting precompute to "auto" was found to be slower when
-      n_samples > n_features since the computation of the Gram matrix is
-      computationally expensive and outweighs the benefit of fitting the Gram
-      for just one alpha.
-      ``precompute="auto"`` is now deprecated and will be removed in 0.18
-      By `Manoj Kumar`_.
-
-    - Expose ``positive`` option in :func:`linear_model.enet_path` and
-      :func:`linear_model.enet_path` which constrains coefficients to be
-      positive. By `Manoj Kumar`_.
-
-    - Users should now supply an explicit ``average`` parameter to
-      :func:`sklearn.metrics.f1_score`, :func:`sklearn.metrics.fbeta_score`,
-      :func:`sklearn.metrics.recall_score` and
-      :func:`sklearn.metrics.precision_score` when performing multiclass
-      or multilabel (i.e. not binary) classification. By `Joel Nothman`_.
-
-    - `scoring` parameter for cross validation now accepts `'f1_micro'`,
-      `'f1_macro'` or `'f1_weighted'`. `'f1'` is now for binary classification
-      only. Similar changes apply to `'precision'` and `'recall'`.
-      By `Joel Nothman`_.
-
-    - The ``fit_intercept``, ``normalize`` and ``return_models`` parameters in
-      :func:`linear_model.enet_path` and :func:`linear_model.lasso_path` have
-      been removed. They were deprecated since 0.14
-
-    - From now onwards, all estimators will uniformly raise ``NotFittedError``
-      (:class:`utils.validation.NotFittedError`), when any of the ``predict``
-      like methods are called before the model is fit. By `Raghav RV`_.
-
-    - Input data validation was refactored for more consistent input
-      validation. The ``check_arrays`` function was replaced by ``check_array``
-      and ``check_X_y``. By `Andreas Müller`_.
-
-    - Allow ``X=None`` in the methods ``radius_neighbors``, ``kneighbors``,
-      ``kneighbors_graph`` and ``radius_neighbors_graph`` in
-      :class:`sklearn.neighbors.NearestNeighbors` and family. If set to None,
-      then for every sample this avoids setting the sample itself as the
-      first nearest neighbor. By `Manoj Kumar`_.
-
-    - Add parameter ``include_self`` in :func:`neighbors.kneighbors_graph`
-      and :func:`neighbors.radius_neighbors_graph` which has to be explicitly
-      set by the user. If set to True, then the sample itself is considered
-      as the first nearest neighbor.
-
-    - `thresh` parameter is deprecated in favor of new `tol` parameter in
-      :class:`GMM`, :class:`DPGMM` and :class:`VBGMM`. See `Enhancements`
-      section for details. By `Hervé Bredin`_.
-
-    - Estimators will treat input with dtype object as numeric when possible.
-      By `Andreas Müller`_
-
-    - Estimators now raise `ValueError` consistently when fitted on empty
-      data (less than 1 sample or less than 1 feature for 2D input).
-      By `Olivier Grisel`_.
-
-
-    - The ``shuffle`` option of :class:`.linear_model.SGDClassifier`,
-      :class:`linear_model.SGDRegressor`, :class:`linear_model.Perceptron`,
-      :class:`linear_model.PassiveAgressiveClassifier` and
-      :class:`linear_model.PassiveAgressiveRegressor` now defaults to ``True``.
-
-    - :class:`cluster.DBSCAN` now uses a deterministic initialization. The
-      `random_state` parameter is deprecated. By :user:`Erich Schubert <kno10>`.
+- The ``predict_proba`` method of :class:`multiclass.OneVsRestClassifier`
+  now returns two probabilities per sample in the multiclass case; this
+  is consistent with other estimators and with the method's documentation,
+  but previous versions accidentally returned only the positive
+  probability. Fixed by Will Lamond and `Lars Buitinck`_.
+
+- Change default value of precompute in :class:`ElasticNet` and :class:`Lasso`
+  to False. Setting precompute to "auto" was found to be slower when
+  n_samples > n_features since the computation of the Gram matrix is
+  computationally expensive and outweighs the benefit of fitting the Gram
+  for just one alpha.
+  ``precompute="auto"`` is now deprecated and will be removed in 0.18
+  By `Manoj Kumar`_.
+
+- Expose ``positive`` option in :func:`linear_model.enet_path` and
+  :func:`linear_model.enet_path` which constrains coefficients to be
+  positive. By `Manoj Kumar`_.
+
+- Users should now supply an explicit ``average`` parameter to
+  :func:`sklearn.metrics.f1_score`, :func:`sklearn.metrics.fbeta_score`,
+  :func:`sklearn.metrics.recall_score` and
+  :func:`sklearn.metrics.precision_score` when performing multiclass
+  or multilabel (i.e. not binary) classification. By `Joel Nothman`_.
+
+- `scoring` parameter for cross validation now accepts `'f1_micro'`,
+  `'f1_macro'` or `'f1_weighted'`. `'f1'` is now for binary classification
+  only. Similar changes apply to `'precision'` and `'recall'`.
+  By `Joel Nothman`_.
+
+- The ``fit_intercept``, ``normalize`` and ``return_models`` parameters in
+  :func:`linear_model.enet_path` and :func:`linear_model.lasso_path` have
+  been removed. They were deprecated since 0.14
+
+- From now onwards, all estimators will uniformly raise ``NotFittedError``
+  (:class:`utils.validation.NotFittedError`), when any of the ``predict``
+  like methods are called before the model is fit. By `Raghav RV`_.
+
+- Input data validation was refactored for more consistent input
+  validation. The ``check_arrays`` function was replaced by ``check_array``
+  and ``check_X_y``. By `Andreas Müller`_.
+
+- Allow ``X=None`` in the methods ``radius_neighbors``, ``kneighbors``,
+  ``kneighbors_graph`` and ``radius_neighbors_graph`` in
+  :class:`sklearn.neighbors.NearestNeighbors` and family. If set to None,
+  then for every sample this avoids setting the sample itself as the
+  first nearest neighbor. By `Manoj Kumar`_.
+
+- Add parameter ``include_self`` in :func:`neighbors.kneighbors_graph`
+  and :func:`neighbors.radius_neighbors_graph` which has to be explicitly
+  set by the user. If set to True, then the sample itself is considered
+  as the first nearest neighbor.
+
+- `thresh` parameter is deprecated in favor of new `tol` parameter in
+  :class:`GMM`, :class:`DPGMM` and :class:`VBGMM`. See `Enhancements`
+  section for details. By `Hervé Bredin`_.
+
+- Estimators will treat input with dtype object as numeric when possible.
+  By `Andreas Müller`_
+
+- Estimators now raise `ValueError` consistently when fitted on empty
+  data (less than 1 sample or less than 1 feature for 2D input).
+  By `Olivier Grisel`_.
+
+
+- The ``shuffle`` option of :class:`.linear_model.SGDClassifier`,
+  :class:`linear_model.SGDRegressor`, :class:`linear_model.Perceptron`,
+  :class:`linear_model.PassiveAgressiveClassifier` and
+  :class:`linear_model.PassiveAgressiveRegressor` now defaults to ``True``.
+
+- :class:`cluster.DBSCAN` now uses a deterministic initialization. The
+  `random_state` parameter is deprecated. By :user:`Erich Schubert <kno10>`.
 
 Code Contributors
 -----------------
@@ -2702,41 +2822,41 @@ Version 0.15.2
 Bug fixes
 ---------
 
-  - Fixed handling of the ``p`` parameter of the Minkowski distance that was
-    previously ignored in nearest neighbors models. By :user:`Nikolay
-    Mayorov <nmayorov>`.
+- Fixed handling of the ``p`` parameter of the Minkowski distance that was
+  previously ignored in nearest neighbors models. By :user:`Nikolay
+  Mayorov <nmayorov>`.
 
-  - Fixed duplicated alphas in :class:`linear_model.LassoLars` with early
-    stopping on 32 bit Python. By `Olivier Grisel`_ and `Fabian Pedregosa`_.
+- Fixed duplicated alphas in :class:`linear_model.LassoLars` with early
+  stopping on 32 bit Python. By `Olivier Grisel`_ and `Fabian Pedregosa`_.
 
-  - Fixed the build under Windows when scikit-learn is built with MSVC while
-    NumPy is built with MinGW. By `Olivier Grisel`_ and :user:`Federico
-    Vaggi <FedericoV>`.
+- Fixed the build under Windows when scikit-learn is built with MSVC while
+  NumPy is built with MinGW. By `Olivier Grisel`_ and :user:`Federico
+  Vaggi <FedericoV>`.
 
-  - Fixed an array index overflow bug in the coordinate descent solver. By
-    `Gael Varoquaux`_.
+- Fixed an array index overflow bug in the coordinate descent solver. By
+  `Gael Varoquaux`_.
 
-  - Better handling of numpy 1.9 deprecation warnings. By `Gael Varoquaux`_.
+- Better handling of numpy 1.9 deprecation warnings. By `Gael Varoquaux`_.
 
-  - Removed unnecessary data copy in :class:`cluster.KMeans`.
-    By `Gael Varoquaux`_.
+- Removed unnecessary data copy in :class:`cluster.KMeans`.
+  By `Gael Varoquaux`_.
 
-  - Explicitly close open files to avoid ``ResourceWarnings`` under Python 3.
-    By Calvin Giles.
+- Explicitly close open files to avoid ``ResourceWarnings`` under Python 3.
+  By Calvin Giles.
 
-  - The ``transform`` of :class:`discriminant_analysis.LinearDiscriminantAnalysis`
-    now projects the input on the most discriminant directions. By Martin Billinger.
+- The ``transform`` of :class:`discriminant_analysis.LinearDiscriminantAnalysis`
+  now projects the input on the most discriminant directions. By Martin Billinger.
 
-  - Fixed potential overflow in ``_tree.safe_realloc`` by `Lars Buitinck`_.
+- Fixed potential overflow in ``_tree.safe_realloc`` by `Lars Buitinck`_.
 
-  - Performance optimization in :class:`isotonic.IsotonicRegression`.
-    By Robert Bradshaw.
+- Performance optimization in :class:`isotonic.IsotonicRegression`.
+  By Robert Bradshaw.
 
-  - ``nose`` is non-longer a runtime dependency to import ``sklearn``, only for
-    running the tests. By `Joel Nothman`_.
+- ``nose`` is non-longer a runtime dependency to import ``sklearn``, only for
+  running the tests. By `Joel Nothman`_.
 
-  - Many documentation and website fixes by `Joel Nothman`_, `Lars Buitinck`_
-    :user:`Matt Pico <MattpSoftware>`, and others.
+- Many documentation and website fixes by `Joel Nothman`_, `Lars Buitinck`_
+  :user:`Matt Pico <MattpSoftware>`, and others.
 
 .. _changes_0_15_1:
 
@@ -2748,35 +2868,35 @@ Version 0.15.1
 Bug fixes
 ---------
 
-   - Made :func:`cross_validation.cross_val_score` use
-     :class:`cross_validation.KFold` instead of
-     :class:`cross_validation.StratifiedKFold` on multi-output classification
-     problems. By :user:`Nikolay Mayorov <nmayorov>`.
+- Made :func:`cross_validation.cross_val_score` use
+  :class:`cross_validation.KFold` instead of
+  :class:`cross_validation.StratifiedKFold` on multi-output classification
+  problems. By :user:`Nikolay Mayorov <nmayorov>`.
 
-   - Support unseen labels :class:`preprocessing.LabelBinarizer` to restore
-     the default behavior of 0.14.1 for backward compatibility. By
-     :user:`Hamzeh Alsalhi <hamsal>`.
+- Support unseen labels :class:`preprocessing.LabelBinarizer` to restore
+  the default behavior of 0.14.1 for backward compatibility. By
+  :user:`Hamzeh Alsalhi <hamsal>`.
 
-   - Fixed the :class:`cluster.KMeans` stopping criterion that prevented early
-     convergence detection. By Edward Raff and `Gael Varoquaux`_.
+- Fixed the :class:`cluster.KMeans` stopping criterion that prevented early
+  convergence detection. By Edward Raff and `Gael Varoquaux`_.
 
-   - Fixed the behavior of :class:`multiclass.OneVsOneClassifier`.
-     in case of ties at the per-class vote level by computing the correct
-     per-class sum of prediction scores. By `Andreas Müller`_.
+- Fixed the behavior of :class:`multiclass.OneVsOneClassifier`.
+  in case of ties at the per-class vote level by computing the correct
+  per-class sum of prediction scores. By `Andreas Müller`_.
 
-   - Made :func:`cross_validation.cross_val_score` and
-     :class:`grid_search.GridSearchCV` accept Python lists as input data.
-     This is especially useful for cross-validation and model selection of
-     text processing pipelines. By `Andreas Müller`_.
+- Made :func:`cross_validation.cross_val_score` and
+  :class:`grid_search.GridSearchCV` accept Python lists as input data.
+  This is especially useful for cross-validation and model selection of
+  text processing pipelines. By `Andreas Müller`_.
 
-   - Fixed data input checks of most estimators to accept input data that
-     implements the NumPy ``__array__`` protocol. This is the case for
-     for ``pandas.Series`` and ``pandas.DataFrame`` in recent versions of
-     pandas. By `Gael Varoquaux`_.
+- Fixed data input checks of most estimators to accept input data that
+  implements the NumPy ``__array__`` protocol. This is the case for
+  for ``pandas.Series`` and ``pandas.DataFrame`` in recent versions of
+  pandas. By `Gael Varoquaux`_.
 
-   - Fixed a regression for :class:`linear_model.SGDClassifier` with
-     ``class_weight="auto"`` on data with non-contiguous labels. By
-     `Olivier Grisel`_.
+- Fixed a regression for :class:`linear_model.SGDClassifier` with
+  ``class_weight="auto"`` on data with non-contiguous labels. By
+  `Olivier Grisel`_.
 
 
 .. _changes_0_15:
@@ -2789,22 +2909,22 @@ Version 0.15
 Highlights
 -----------
 
-   - Many speed and memory improvements all across the code
+- Many speed and memory improvements all across the code
 
-   - Huge speed and memory improvements to random forests (and extra
-     trees) that also benefit better from parallel computing.
+- Huge speed and memory improvements to random forests (and extra
+  trees) that also benefit better from parallel computing.
 
-   - Incremental fit to :class:`BernoulliRBM <neural_network.BernoulliRBM>`
+- Incremental fit to :class:`BernoulliRBM <neural_network.BernoulliRBM>`
 
-   - Added :class:`cluster.AgglomerativeClustering` for hierarchical
-     agglomerative clustering with average linkage, complete linkage and
-     ward strategies.
+- Added :class:`cluster.AgglomerativeClustering` for hierarchical
+  agglomerative clustering with average linkage, complete linkage and
+  ward strategies.
 
-   - Added :class:`linear_model.RANSACRegressor` for robust regression
-     models.
+- Added :class:`linear_model.RANSACRegressor` for robust regression
+  models.
 
-   - Added dimensionality reduction with :class:`manifold.TSNE` which can be
-     used to visualize high-dimensional data.
+- Added dimensionality reduction with :class:`manifold.TSNE` which can be
+  used to visualize high-dimensional data.
 
 
 Changelog
@@ -2813,334 +2933,334 @@ Changelog
 New features
 ............
 
-   - Added :class:`ensemble.BaggingClassifier` and
-     :class:`ensemble.BaggingRegressor` meta-estimators for ensembling
-     any kind of base estimator. See the :ref:`Bagging <bagging>` section of
-     the user guide for details and examples. By `Gilles Louppe`_.
+- Added :class:`ensemble.BaggingClassifier` and
+  :class:`ensemble.BaggingRegressor` meta-estimators for ensembling
+  any kind of base estimator. See the :ref:`Bagging <bagging>` section of
+  the user guide for details and examples. By `Gilles Louppe`_.
 
-   - New unsupervised feature selection algorithm
-     :class:`feature_selection.VarianceThreshold`, by `Lars Buitinck`_.
+- New unsupervised feature selection algorithm
+  :class:`feature_selection.VarianceThreshold`, by `Lars Buitinck`_.
 
-   - Added :class:`linear_model.RANSACRegressor` meta-estimator for the robust
-     fitting of regression models. By :user:`Johannes Schönberger <ahojnnes>`.
+- Added :class:`linear_model.RANSACRegressor` meta-estimator for the robust
+  fitting of regression models. By :user:`Johannes Schönberger <ahojnnes>`.
 
-   - Added :class:`cluster.AgglomerativeClustering` for hierarchical
-     agglomerative clustering with average linkage, complete linkage and
-     ward strategies, by  `Nelle Varoquaux`_ and `Gael Varoquaux`_.
+- Added :class:`cluster.AgglomerativeClustering` for hierarchical
+  agglomerative clustering with average linkage, complete linkage and
+  ward strategies, by  `Nelle Varoquaux`_ and `Gael Varoquaux`_.
 
-   - Shorthand constructors :func:`pipeline.make_pipeline` and
-     :func:`pipeline.make_union` were added by `Lars Buitinck`_.
+- Shorthand constructors :func:`pipeline.make_pipeline` and
+  :func:`pipeline.make_union` were added by `Lars Buitinck`_.
 
-   - Shuffle option for :class:`cross_validation.StratifiedKFold`.
-     By :user:`Jeffrey Blackburne <jblackburne>`.
+- Shuffle option for :class:`cross_validation.StratifiedKFold`.
+  By :user:`Jeffrey Blackburne <jblackburne>`.
 
-   - Incremental learning (``partial_fit``) for Gaussian Naive Bayes by
-     Imran Haque.
+- Incremental learning (``partial_fit``) for Gaussian Naive Bayes by
+  Imran Haque.
 
-   - Added ``partial_fit`` to :class:`BernoulliRBM
-     <neural_network.BernoulliRBM>`
-     By :user:`Danny Sullivan <dsullivan7>`.
+- Added ``partial_fit`` to :class:`BernoulliRBM
+  <neural_network.BernoulliRBM>`
+  By :user:`Danny Sullivan <dsullivan7>`.
 
-   - Added :func:`learning_curve <learning_curve.learning_curve>` utility to
-     chart performance with respect to training size. See
-     :ref:`sphx_glr_auto_examples_model_selection_plot_learning_curve.py`. By Alexander Fabisch.
+- Added :func:`learning_curve <learning_curve.learning_curve>` utility to
+  chart performance with respect to training size. See
+  :ref:`sphx_glr_auto_examples_model_selection_plot_learning_curve.py`. By Alexander Fabisch.
 
-   - Add positive option in :class:`LassoCV <linear_model.LassoCV>` and
-     :class:`ElasticNetCV <linear_model.ElasticNetCV>`.
-     By Brian Wignall and `Alexandre Gramfort`_.
+- Add positive option in :class:`LassoCV <linear_model.LassoCV>` and
+  :class:`ElasticNetCV <linear_model.ElasticNetCV>`.
+  By Brian Wignall and `Alexandre Gramfort`_.
 
-   - Added :class:`linear_model.MultiTaskElasticNetCV` and
-     :class:`linear_model.MultiTaskLassoCV`. By `Manoj Kumar`_.
+- Added :class:`linear_model.MultiTaskElasticNetCV` and
+  :class:`linear_model.MultiTaskLassoCV`. By `Manoj Kumar`_.
 
-   - Added :class:`manifold.TSNE`. By Alexander Fabisch.
+- Added :class:`manifold.TSNE`. By Alexander Fabisch.
 
 Enhancements
 ............
 
-   - Add sparse input support to :class:`ensemble.AdaBoostClassifier` and
-     :class:`ensemble.AdaBoostRegressor` meta-estimators.
-     By :user:`Hamzeh Alsalhi <hamsal>`.
+- Add sparse input support to :class:`ensemble.AdaBoostClassifier` and
+  :class:`ensemble.AdaBoostRegressor` meta-estimators.
+  By :user:`Hamzeh Alsalhi <hamsal>`.
 
-   - Memory improvements of decision trees, by `Arnaud Joly`_.
+- Memory improvements of decision trees, by `Arnaud Joly`_.
 
-   - Decision trees can now be built in best-first manner by using ``max_leaf_nodes``
-     as the stopping criteria. Refactored the tree code to use either a
-     stack or a priority queue for tree building.
-     By `Peter Prettenhofer`_ and `Gilles Louppe`_.
+- Decision trees can now be built in best-first manner by using ``max_leaf_nodes``
+  as the stopping criteria. Refactored the tree code to use either a
+  stack or a priority queue for tree building.
+  By `Peter Prettenhofer`_ and `Gilles Louppe`_.
 
-   - Decision trees can now be fitted on fortran- and c-style arrays, and
-     non-continuous arrays without the need to make a copy.
-     If the input array has a different dtype than ``np.float32``, a fortran-
-     style copy will be made since fortran-style memory layout has speed
-     advantages. By `Peter Prettenhofer`_ and `Gilles Louppe`_.
+- Decision trees can now be fitted on fortran- and c-style arrays, and
+  non-continuous arrays without the need to make a copy.
+  If the input array has a different dtype than ``np.float32``, a fortran-
+  style copy will be made since fortran-style memory layout has speed
+  advantages. By `Peter Prettenhofer`_ and `Gilles Louppe`_.
 
-   - Speed improvement of regression trees by optimizing the
-     the computation of the mean square error criterion. This lead
-     to speed improvement of the tree, forest and gradient boosting tree
-     modules. By `Arnaud Joly`_
+- Speed improvement of regression trees by optimizing the
+  the computation of the mean square error criterion. This lead
+  to speed improvement of the tree, forest and gradient boosting tree
+  modules. By `Arnaud Joly`_
 
-   - The ``img_to_graph`` and ``grid_tograph`` functions in
-     :mod:`sklearn.feature_extraction.image` now return ``np.ndarray``
-     instead of ``np.matrix`` when ``return_as=np.ndarray``.  See the
-     Notes section for more information on compatibility.
-
-   - Changed the internal storage of decision trees to use a struct array.
-     This fixed some small bugs, while improving code and providing a small
-     speed gain. By `Joel Nothman`_.
-
-   - Reduce memory usage and overhead when fitting and predicting with forests
-     of randomized trees in parallel with ``n_jobs != 1`` by leveraging new
-     threading backend of joblib 0.8 and releasing the GIL in the tree fitting
-     Cython code.  By `Olivier Grisel`_ and `Gilles Louppe`_.
-
-   - Speed improvement of the :mod:`sklearn.ensemble.gradient_boosting` module.
-     By `Gilles Louppe`_ and `Peter Prettenhofer`_.
-
-   - Various enhancements to the  :mod:`sklearn.ensemble.gradient_boosting`
-     module: a ``warm_start`` argument to fit additional trees,
-     a ``max_leaf_nodes`` argument to fit GBM style trees,
-     a ``monitor`` fit argument to inspect the estimator during training, and
-     refactoring of the verbose code. By `Peter Prettenhofer`_.
-
-   - Faster :class:`sklearn.ensemble.ExtraTrees` by caching feature values.
-     By `Arnaud Joly`_.
-
-   - Faster depth-based tree building algorithm such as decision tree,
-     random forest, extra trees or gradient tree boosting (with depth based
-     growing strategy) by avoiding trying to split on found constant features
-     in the sample subset. By `Arnaud Joly`_.
-
-   - Add ``min_weight_fraction_leaf`` pre-pruning parameter to tree-based
-     methods: the minimum weighted fraction of the input samples required to be
-     at a leaf node. By `Noel Dawe`_.
-
-   - Added :func:`metrics.pairwise_distances_argmin_min`, by Philippe Gervais.
-
-   - Added predict method to :class:`cluster.AffinityPropagation` and
-     :class:`cluster.MeanShift`, by `Mathieu Blondel`_.
-
-   - Vector and matrix multiplications have been optimised throughout the
-     library by `Denis Engemann`_, and `Alexandre Gramfort`_.
-     In particular, they should take less memory with older NumPy versions
-     (prior to 1.7.2).
-
-   - Precision-recall and ROC examples now use train_test_split, and have more
-     explanation of why these metrics are useful. By `Kyle Kastner`_
-
-   - The training algorithm for :class:`decomposition.NMF` is faster for
-     sparse matrices and has much lower memory complexity, meaning it will
-     scale up gracefully to large datasets. By `Lars Buitinck`_.
-
-   - Added svd_method option with default value to "randomized" to
-     :class:`decomposition.FactorAnalysis` to save memory and
-     significantly speedup computation by `Denis Engemann`_, and
-     `Alexandre Gramfort`_.
-
-   - Changed :class:`cross_validation.StratifiedKFold` to try and
-     preserve as much of the original ordering of samples as possible so as
-     not to hide overfitting on datasets with a non-negligible level of
-     samples dependency.
-     By `Daniel Nouri`_ and `Olivier Grisel`_.
-
-   - Add multi-output support to :class:`gaussian_process.GaussianProcess`
-     by John Novak.
-
-   - Support for precomputed distance matrices in nearest neighbor estimators
-     by `Robert Layton`_ and `Joel Nothman`_.
-
-   - Norm computations optimized for NumPy 1.6 and later versions by
-     `Lars Buitinck`_. In particular, the k-means algorithm no longer
-     needs a temporary data structure the size of its input.
-
-   - :class:`dummy.DummyClassifier` can now be used to predict a constant
-     output value. By `Manoj Kumar`_.
-
-   - :class:`dummy.DummyRegressor` has now a strategy parameter which allows
-     to predict the mean, the median of the training set or a constant
-     output value. By :user:`Maheshakya Wijewardena <maheshakya>`.
-
-   - Multi-label classification output in multilabel indicator format
-     is now supported by :func:`metrics.roc_auc_score` and
-     :func:`metrics.average_precision_score` by `Arnaud Joly`_.
-
-   - Significant performance improvements (more than 100x speedup for
-     large problems) in :class:`isotonic.IsotonicRegression` by
-     `Andrew Tulloch`_.
-
-   - Speed and memory usage improvements to the SGD algorithm for linear
-     models: it now uses threads, not separate processes, when ``n_jobs>1``.
-     By `Lars Buitinck`_.
-
-   - Grid search and cross validation allow NaNs in the input arrays so that
-     preprocessors such as :class:`preprocessing.Imputer
-     <preprocessing.Imputer>` can be trained within the cross validation loop,
-     avoiding potentially skewed results.
-
-   - Ridge regression can now deal with sample weights in feature space
-     (only sample space until then). By :user:`Michael Eickenberg <eickenberg>`.
-     Both solutions are provided by the Cholesky solver.
-
-   - Several classification and regression metrics now support weighted
-     samples with the new ``sample_weight`` argument:
-     :func:`metrics.accuracy_score`,
-     :func:`metrics.zero_one_loss`,
-     :func:`metrics.precision_score`,
-     :func:`metrics.average_precision_score`,
-     :func:`metrics.f1_score`,
-     :func:`metrics.fbeta_score`,
-     :func:`metrics.recall_score`,
-     :func:`metrics.roc_auc_score`,
-     :func:`metrics.explained_variance_score`,
-     :func:`metrics.mean_squared_error`,
-     :func:`metrics.mean_absolute_error`,
-     :func:`metrics.r2_score`.
-     By `Noel Dawe`_.
-
-   - Speed up of the sample generator
-     :func:`datasets.make_multilabel_classification`. By `Joel Nothman`_.
+- The ``img_to_graph`` and ``grid_tograph`` functions in
+  :mod:`sklearn.feature_extraction.image` now return ``np.ndarray``
+  instead of ``np.matrix`` when ``return_as=np.ndarray``.  See the
+  Notes section for more information on compatibility.
+
+- Changed the internal storage of decision trees to use a struct array.
+  This fixed some small bugs, while improving code and providing a small
+  speed gain. By `Joel Nothman`_.
+
+- Reduce memory usage and overhead when fitting and predicting with forests
+  of randomized trees in parallel with ``n_jobs != 1`` by leveraging new
+  threading backend of joblib 0.8 and releasing the GIL in the tree fitting
+  Cython code.  By `Olivier Grisel`_ and `Gilles Louppe`_.
+
+- Speed improvement of the :mod:`sklearn.ensemble.gradient_boosting` module.
+  By `Gilles Louppe`_ and `Peter Prettenhofer`_.
+
+- Various enhancements to the  :mod:`sklearn.ensemble.gradient_boosting`
+  module: a ``warm_start`` argument to fit additional trees,
+  a ``max_leaf_nodes`` argument to fit GBM style trees,
+  a ``monitor`` fit argument to inspect the estimator during training, and
+  refactoring of the verbose code. By `Peter Prettenhofer`_.
+
+- Faster :class:`sklearn.ensemble.ExtraTrees` by caching feature values.
+  By `Arnaud Joly`_.
+
+- Faster depth-based tree building algorithm such as decision tree,
+  random forest, extra trees or gradient tree boosting (with depth based
+  growing strategy) by avoiding trying to split on found constant features
+  in the sample subset. By `Arnaud Joly`_.
+
+- Add ``min_weight_fraction_leaf`` pre-pruning parameter to tree-based
+  methods: the minimum weighted fraction of the input samples required to be
+  at a leaf node. By `Noel Dawe`_.
+
+- Added :func:`metrics.pairwise_distances_argmin_min`, by Philippe Gervais.
+
+- Added predict method to :class:`cluster.AffinityPropagation` and
+  :class:`cluster.MeanShift`, by `Mathieu Blondel`_.
+
+- Vector and matrix multiplications have been optimised throughout the
+  library by `Denis Engemann`_, and `Alexandre Gramfort`_.
+  In particular, they should take less memory with older NumPy versions
+  (prior to 1.7.2).
+
+- Precision-recall and ROC examples now use train_test_split, and have more
+  explanation of why these metrics are useful. By `Kyle Kastner`_
+
+- The training algorithm for :class:`decomposition.NMF` is faster for
+  sparse matrices and has much lower memory complexity, meaning it will
+  scale up gracefully to large datasets. By `Lars Buitinck`_.
+
+- Added svd_method option with default value to "randomized" to
+  :class:`decomposition.FactorAnalysis` to save memory and
+  significantly speedup computation by `Denis Engemann`_, and
+  `Alexandre Gramfort`_.
+
+- Changed :class:`cross_validation.StratifiedKFold` to try and
+  preserve as much of the original ordering of samples as possible so as
+  not to hide overfitting on datasets with a non-negligible level of
+  samples dependency.
+  By `Daniel Nouri`_ and `Olivier Grisel`_.
+
+- Add multi-output support to :class:`gaussian_process.GaussianProcess`
+  by John Novak.
+
+- Support for precomputed distance matrices in nearest neighbor estimators
+  by `Robert Layton`_ and `Joel Nothman`_.
+
+- Norm computations optimized for NumPy 1.6 and later versions by
+  `Lars Buitinck`_. In particular, the k-means algorithm no longer
+  needs a temporary data structure the size of its input.
+
+- :class:`dummy.DummyClassifier` can now be used to predict a constant
+  output value. By `Manoj Kumar`_.
+
+- :class:`dummy.DummyRegressor` has now a strategy parameter which allows
+  to predict the mean, the median of the training set or a constant
+  output value. By :user:`Maheshakya Wijewardena <maheshakya>`.
+
+- Multi-label classification output in multilabel indicator format
+  is now supported by :func:`metrics.roc_auc_score` and
+  :func:`metrics.average_precision_score` by `Arnaud Joly`_.
+
+- Significant performance improvements (more than 100x speedup for
+  large problems) in :class:`isotonic.IsotonicRegression` by
+  `Andrew Tulloch`_.
+
+- Speed and memory usage improvements to the SGD algorithm for linear
+  models: it now uses threads, not separate processes, when ``n_jobs>1``.
+  By `Lars Buitinck`_.
+
+- Grid search and cross validation allow NaNs in the input arrays so that
+  preprocessors such as :class:`preprocessing.Imputer
+  <preprocessing.Imputer>` can be trained within the cross validation loop,
+  avoiding potentially skewed results.
+
+- Ridge regression can now deal with sample weights in feature space
+  (only sample space until then). By :user:`Michael Eickenberg <eickenberg>`.
+  Both solutions are provided by the Cholesky solver.
+
+- Several classification and regression metrics now support weighted
+  samples with the new ``sample_weight`` argument:
+  :func:`metrics.accuracy_score`,
+  :func:`metrics.zero_one_loss`,
+  :func:`metrics.precision_score`,
+  :func:`metrics.average_precision_score`,
+  :func:`metrics.f1_score`,
+  :func:`metrics.fbeta_score`,
+  :func:`metrics.recall_score`,
+  :func:`metrics.roc_auc_score`,
+  :func:`metrics.explained_variance_score`,
+  :func:`metrics.mean_squared_error`,
+  :func:`metrics.mean_absolute_error`,
+  :func:`metrics.r2_score`.
+  By `Noel Dawe`_.
+
+- Speed up of the sample generator
+  :func:`datasets.make_multilabel_classification`. By `Joel Nothman`_.
 
 Documentation improvements
 ...........................
 
-   - The :ref:`Working With Text Data <text_data_tutorial>` tutorial
-     has now been worked in to the main documentation's tutorial section.
-     Includes exercises and skeletons for tutorial presentation.
-     Original tutorial created by several authors including
-     `Olivier Grisel`_, Lars Buitinck and many others.
-     Tutorial integration into the scikit-learn documentation
-     by `Jaques Grobler`_
-
-   - Added :ref:`Computational Performance <computational_performance>`
-     documentation. Discussion and examples of prediction latency / throughput
-     and different factors that have influence over speed. Additional tips for
-     building faster models and choosing a relevant compromise between speed
-     and predictive power.
-     By :user:`Eustache Diemert <oddskool>`.
+- The :ref:`Working With Text Data <text_data_tutorial>` tutorial
+  has now been worked in to the main documentation's tutorial section.
+  Includes exercises and skeletons for tutorial presentation.
+  Original tutorial created by several authors including
+  `Olivier Grisel`_, Lars Buitinck and many others.
+  Tutorial integration into the scikit-learn documentation
+  by `Jaques Grobler`_
+
+- Added :ref:`Computational Performance <computational_performance>`
+  documentation. Discussion and examples of prediction latency / throughput
+  and different factors that have influence over speed. Additional tips for
+  building faster models and choosing a relevant compromise between speed
+  and predictive power.
+  By :user:`Eustache Diemert <oddskool>`.
 
 Bug fixes
 .........
 
-   - Fixed bug in :class:`decomposition.MiniBatchDictionaryLearning` :
-     ``partial_fit`` was not working properly.
+- Fixed bug in :class:`decomposition.MiniBatchDictionaryLearning` :
+  ``partial_fit`` was not working properly.
 
-   - Fixed bug in :class:`linear_model.stochastic_gradient` :
-     ``l1_ratio`` was used as ``(1.0 - l1_ratio)`` .
+- Fixed bug in :class:`linear_model.stochastic_gradient` :
+  ``l1_ratio`` was used as ``(1.0 - l1_ratio)`` .
 
-   - Fixed bug in :class:`multiclass.OneVsOneClassifier` with string
-     labels
+- Fixed bug in :class:`multiclass.OneVsOneClassifier` with string
+  labels
 
-   - Fixed a bug in :class:`LassoCV <linear_model.LassoCV>` and
-     :class:`ElasticNetCV <linear_model.ElasticNetCV>`: they would not
-     pre-compute the Gram matrix with ``precompute=True`` or
-     ``precompute="auto"`` and ``n_samples > n_features``. By `Manoj Kumar`_.
+- Fixed a bug in :class:`LassoCV <linear_model.LassoCV>` and
+  :class:`ElasticNetCV <linear_model.ElasticNetCV>`: they would not
+  pre-compute the Gram matrix with ``precompute=True`` or
+  ``precompute="auto"`` and ``n_samples > n_features``. By `Manoj Kumar`_.
 
-   - Fixed incorrect estimation of the degrees of freedom in
-     :func:`feature_selection.f_regression` when variates are not centered.
-     By :user:`Virgile Fritsch <VirgileFritsch>`.
+- Fixed incorrect estimation of the degrees of freedom in
+  :func:`feature_selection.f_regression` when variates are not centered.
+  By :user:`Virgile Fritsch <VirgileFritsch>`.
 
-   - Fixed a race condition in parallel processing with
-     ``pre_dispatch != "all"`` (for instance, in ``cross_val_score``).
-     By `Olivier Grisel`_.
+- Fixed a race condition in parallel processing with
+  ``pre_dispatch != "all"`` (for instance, in ``cross_val_score``).
+  By `Olivier Grisel`_.
 
-   - Raise error in :class:`cluster.FeatureAgglomeration` and
-     :class:`cluster.WardAgglomeration` when no samples are given,
-     rather than returning meaningless clustering.
+- Raise error in :class:`cluster.FeatureAgglomeration` and
+  :class:`cluster.WardAgglomeration` when no samples are given,
+  rather than returning meaningless clustering.
 
-   - Fixed bug in :class:`gradient_boosting.GradientBoostingRegressor` with
-     ``loss='huber'``: ``gamma`` might have not been initialized.
+- Fixed bug in :class:`gradient_boosting.GradientBoostingRegressor` with
+  ``loss='huber'``: ``gamma`` might have not been initialized.
 
-   - Fixed feature importances as computed with a forest of randomized trees
-     when fit with ``sample_weight != None`` and/or with ``bootstrap=True``.
-     By `Gilles Louppe`_.
+- Fixed feature importances as computed with a forest of randomized trees
+  when fit with ``sample_weight != None`` and/or with ``bootstrap=True``.
+  By `Gilles Louppe`_.
 
 API changes summary
 -------------------
 
-   - :mod:`sklearn.hmm` is deprecated. Its removal is planned
-     for the 0.17 release.
-
-   - Use of :class:`covariance.EllipticEnvelop` has now been removed after
-     deprecation.
-     Please use :class:`covariance.EllipticEnvelope` instead.
-
-   - :class:`cluster.Ward` is deprecated. Use
-     :class:`cluster.AgglomerativeClustering` instead.
-
-   - :class:`cluster.WardClustering` is deprecated. Use
-   - :class:`cluster.AgglomerativeClustering` instead.
-
-   - :class:`cross_validation.Bootstrap` is deprecated.
-     :class:`cross_validation.KFold` or
-     :class:`cross_validation.ShuffleSplit` are recommended instead.
-
-   - Direct support for the sequence of sequences (or list of lists) multilabel
-     format is deprecated. To convert to and from the supported binary
-     indicator matrix format, use
-     :class:`MultiLabelBinarizer <preprocessing.MultiLabelBinarizer>`.
-     By `Joel Nothman`_.
-
-   - Add score method to :class:`PCA <decomposition.PCA>` following the model of
-     probabilistic PCA and deprecate
-     :class:`ProbabilisticPCA <decomposition.ProbabilisticPCA>` model whose
-     score implementation is not correct. The computation now also exploits the
-     matrix inversion lemma for faster computation. By `Alexandre Gramfort`_.
-
-   - The score method of :class:`FactorAnalysis <decomposition.FactorAnalysis>`
-     now returns the average log-likelihood of the samples. Use score_samples
-     to get log-likelihood of each sample. By `Alexandre Gramfort`_.
-
-   - Generating boolean masks (the setting ``indices=False``)
-     from cross-validation generators is deprecated.
-     Support for masks will be removed in 0.17.
-     The generators have produced arrays of indices by default since 0.10.
-     By `Joel Nothman`_.
-
-   - 1-d arrays containing strings with ``dtype=object`` (as used in Pandas)
-     are now considered valid classification targets. This fixes a regression
-     from version 0.13 in some classifiers. By `Joel Nothman`_.
-
-   - Fix wrong ``explained_variance_ratio_`` attribute in
-     :class:`RandomizedPCA <decomposition.RandomizedPCA>`.
-     By `Alexandre Gramfort`_.
-
-   - Fit alphas for each ``l1_ratio`` instead of ``mean_l1_ratio`` in
-     :class:`linear_model.ElasticNetCV` and :class:`linear_model.LassoCV`.
-     This changes the shape of ``alphas_`` from ``(n_alphas,)`` to
-     ``(n_l1_ratio, n_alphas)`` if the ``l1_ratio`` provided is a 1-D array like
-     object of length greater than one.
-     By `Manoj Kumar`_.
-
-   - Fix :class:`linear_model.ElasticNetCV` and :class:`linear_model.LassoCV`
-     when fitting intercept and input data is sparse. The automatic grid
-     of alphas was not computed correctly and the scaling with normalize
-     was wrong. By `Manoj Kumar`_.
-
-   - Fix wrong maximal number of features drawn (``max_features``) at each split
-     for decision trees, random forests and gradient tree boosting.
-     Previously, the count for the number of drawn features started only after
-     one non constant features in the split. This bug fix will affect
-     computational and generalization performance of those algorithms in the
-     presence of constant features. To get back previous generalization
-     performance, you should modify the value of ``max_features``.
-     By `Arnaud Joly`_.
-
-   - Fix wrong maximal number of features drawn (``max_features``) at each split
-     for :class:`ensemble.ExtraTreesClassifier` and
-     :class:`ensemble.ExtraTreesRegressor`. Previously, only non constant
-     features in the split was counted as drawn. Now constant features are
-     counted as drawn. Furthermore at least one feature must be non constant
-     in order to make a valid split. This bug fix will affect
-     computational and generalization performance of extra trees in the
-     presence of constant features. To get back previous generalization
-     performance, you should modify the value of ``max_features``.
-     By `Arnaud Joly`_.
-
-   - Fix :func:`utils.compute_class_weight` when ``class_weight=="auto"``.
-     Previously it was broken for input of non-integer ``dtype`` and the
-     weighted array that was returned was wrong. By `Manoj Kumar`_.
-
-   - Fix :class:`cross_validation.Bootstrap` to return ``ValueError``
-     when ``n_train + n_test > n``. By :user:`Ronald Phlypo <rphlypo>`.
+- :mod:`sklearn.hmm` is deprecated. Its removal is planned
+  for the 0.17 release.
+
+- Use of :class:`covariance.EllipticEnvelop` has now been removed after
+  deprecation.
+  Please use :class:`covariance.EllipticEnvelope` instead.
+
+- :class:`cluster.Ward` is deprecated. Use
+  :class:`cluster.AgglomerativeClustering` instead.
+
+- :class:`cluster.WardClustering` is deprecated. Use
+- :class:`cluster.AgglomerativeClustering` instead.
+
+- :class:`cross_validation.Bootstrap` is deprecated.
+  :class:`cross_validation.KFold` or
+  :class:`cross_validation.ShuffleSplit` are recommended instead.
+
+- Direct support for the sequence of sequences (or list of lists) multilabel
+  format is deprecated. To convert to and from the supported binary
+  indicator matrix format, use
+  :class:`MultiLabelBinarizer <preprocessing.MultiLabelBinarizer>`.
+  By `Joel Nothman`_.
+
+- Add score method to :class:`PCA <decomposition.PCA>` following the model of
+  probabilistic PCA and deprecate
+  :class:`ProbabilisticPCA <decomposition.ProbabilisticPCA>` model whose
+  score implementation is not correct. The computation now also exploits the
+  matrix inversion lemma for faster computation. By `Alexandre Gramfort`_.
+
+- The score method of :class:`FactorAnalysis <decomposition.FactorAnalysis>`
+  now returns the average log-likelihood of the samples. Use score_samples
+  to get log-likelihood of each sample. By `Alexandre Gramfort`_.
+
+- Generating boolean masks (the setting ``indices=False``)
+  from cross-validation generators is deprecated.
+  Support for masks will be removed in 0.17.
+  The generators have produced arrays of indices by default since 0.10.
+  By `Joel Nothman`_.
+
+- 1-d arrays containing strings with ``dtype=object`` (as used in Pandas)
+  are now considered valid classification targets. This fixes a regression
+  from version 0.13 in some classifiers. By `Joel Nothman`_.
+
+- Fix wrong ``explained_variance_ratio_`` attribute in
+  :class:`RandomizedPCA <decomposition.RandomizedPCA>`.
+  By `Alexandre Gramfort`_.
+
+- Fit alphas for each ``l1_ratio`` instead of ``mean_l1_ratio`` in
+  :class:`linear_model.ElasticNetCV` and :class:`linear_model.LassoCV`.
+  This changes the shape of ``alphas_`` from ``(n_alphas,)`` to
+  ``(n_l1_ratio, n_alphas)`` if the ``l1_ratio`` provided is a 1-D array like
+  object of length greater than one.
+  By `Manoj Kumar`_.
+
+- Fix :class:`linear_model.ElasticNetCV` and :class:`linear_model.LassoCV`
+  when fitting intercept and input data is sparse. The automatic grid
+  of alphas was not computed correctly and the scaling with normalize
+  was wrong. By `Manoj Kumar`_.
+
+- Fix wrong maximal number of features drawn (``max_features``) at each split
+  for decision trees, random forests and gradient tree boosting.
+  Previously, the count for the number of drawn features started only after
+  one non constant features in the split. This bug fix will affect
+  computational and generalization performance of those algorithms in the
+  presence of constant features. To get back previous generalization
+  performance, you should modify the value of ``max_features``.
+  By `Arnaud Joly`_.
+
+- Fix wrong maximal number of features drawn (``max_features``) at each split
+  for :class:`ensemble.ExtraTreesClassifier` and
+  :class:`ensemble.ExtraTreesRegressor`. Previously, only non constant
+  features in the split was counted as drawn. Now constant features are
+  counted as drawn. Furthermore at least one feature must be non constant
+  in order to make a valid split. This bug fix will affect
+  computational and generalization performance of extra trees in the
+  presence of constant features. To get back previous generalization
+  performance, you should modify the value of ``max_features``.
+  By `Arnaud Joly`_.
+
+- Fix :func:`utils.compute_class_weight` when ``class_weight=="auto"``.
+  Previously it was broken for input of non-integer ``dtype`` and the
+  weighted array that was returned was wrong. By `Manoj Kumar`_.
+
+- Fix :class:`cross_validation.Bootstrap` to return ``ValueError``
+  when ``n_train + n_test > n``. By :user:`Ronald Phlypo <rphlypo>`.
 
 
 People
@@ -3322,287 +3442,287 @@ Version 0.14
 Changelog
 ---------
 
-   - Missing values with sparse and dense matrices can be imputed with the
-     transformer :class:`preprocessing.Imputer` by `Nicolas Trésegnie`_.
-
-   - The core implementation of decisions trees has been rewritten from
-     scratch, allowing for faster tree induction and lower memory
-     consumption in all tree-based estimators. By `Gilles Louppe`_.
-
-   - Added :class:`ensemble.AdaBoostClassifier` and
-     :class:`ensemble.AdaBoostRegressor`, by `Noel Dawe`_  and
-     `Gilles Louppe`_. See the :ref:`AdaBoost <adaboost>` section of the user
-     guide for details and examples.
-
-   - Added :class:`grid_search.RandomizedSearchCV` and
-     :class:`grid_search.ParameterSampler` for randomized hyperparameter
-     optimization. By `Andreas Müller`_.
-
-   - Added :ref:`biclustering <biclustering>` algorithms
-     (:class:`sklearn.cluster.bicluster.SpectralCoclustering` and
-     :class:`sklearn.cluster.bicluster.SpectralBiclustering`), data
-     generation methods (:func:`sklearn.datasets.make_biclusters` and
-     :func:`sklearn.datasets.make_checkerboard`), and scoring metrics
-     (:func:`sklearn.metrics.consensus_score`). By `Kemal Eren`_.
-
-   - Added :ref:`Restricted Boltzmann Machines<rbm>`
-     (:class:`neural_network.BernoulliRBM`). By `Yann Dauphin`_.
-
-   - Python 3 support by :user:`Justin Vincent <justinvf>`, `Lars Buitinck`_,
-     :user:`Subhodeep Moitra <smoitra87>` and `Olivier Grisel`_. All tests now pass under
-     Python 3.3.
-
-   - Ability to pass one penalty (alpha value) per target in
-     :class:`linear_model.Ridge`, by @eickenberg and `Mathieu Blondel`_.
-
-   - Fixed :mod:`sklearn.linear_model.stochastic_gradient.py` L2 regularization
-     issue (minor practical significance).
-     By :user:`Norbert Crombach <norbert>` and `Mathieu Blondel`_ .
-
-   - Added an interactive version of `Andreas Müller`_'s
-     `Machine Learning Cheat Sheet (for scikit-learn)
-     <http://peekaboo-vision.blogspot.de/2013/01/machine-learning-cheat-sheet-for-scikit.html>`_
-     to the documentation. See :ref:`Choosing the right estimator <ml_map>`.
-     By `Jaques Grobler`_.
-
-   - :class:`grid_search.GridSearchCV` and
-     :func:`cross_validation.cross_val_score` now support the use of advanced
-     scoring function such as area under the ROC curve and f-beta scores.
-     See :ref:`scoring_parameter` for details. By `Andreas Müller`_
-     and `Lars Buitinck`_.
-     Passing a function from :mod:`sklearn.metrics` as ``score_func`` is
-     deprecated.
-
-   - Multi-label classification output is now supported by
-     :func:`metrics.accuracy_score`, :func:`metrics.zero_one_loss`,
-     :func:`metrics.f1_score`, :func:`metrics.fbeta_score`,
-     :func:`metrics.classification_report`,
-     :func:`metrics.precision_score` and :func:`metrics.recall_score`
-     by `Arnaud Joly`_.
-
-   - Two new metrics :func:`metrics.hamming_loss` and
-     :func:`metrics.jaccard_similarity_score`
-     are added with multi-label support by `Arnaud Joly`_.
-
-   - Speed and memory usage improvements in
-     :class:`feature_extraction.text.CountVectorizer` and
-     :class:`feature_extraction.text.TfidfVectorizer`,
-     by Jochen Wersdörfer and Roman Sinayev.
-
-   - The ``min_df`` parameter in
-     :class:`feature_extraction.text.CountVectorizer` and
-     :class:`feature_extraction.text.TfidfVectorizer`, which used to be 2,
-     has been reset to 1 to avoid unpleasant surprises (empty vocabularies)
-     for novice users who try it out on tiny document collections.
-     A value of at least 2 is still recommended for practical use.
-
-   - :class:`svm.LinearSVC`, :class:`linear_model.SGDClassifier` and
-     :class:`linear_model.SGDRegressor` now have a ``sparsify`` method that
-     converts their ``coef_`` into a sparse matrix, meaning stored models
-     trained using these estimators can be made much more compact.
-
-   - :class:`linear_model.SGDClassifier` now produces multiclass probability
-     estimates when trained under log loss or modified Huber loss.
-
-   - Hyperlinks to documentation in example code on the website by
-     :user:`Martin Luessi <mluessi>`.
-
-   - Fixed bug in :class:`preprocessing.MinMaxScaler` causing incorrect scaling
-     of the features for non-default ``feature_range`` settings. By `Andreas
-     Müller`_.
-
-   - ``max_features`` in :class:`tree.DecisionTreeClassifier`,
-     :class:`tree.DecisionTreeRegressor` and all derived ensemble estimators
-     now supports percentage values. By `Gilles Louppe`_.
-
-   - Performance improvements in :class:`isotonic.IsotonicRegression` by
-     `Nelle Varoquaux`_.
-
-   - :func:`metrics.accuracy_score` has an option normalize to return
-     the fraction or the number of correctly classified sample
-     by `Arnaud Joly`_.
-
-   - Added :func:`metrics.log_loss` that computes log loss, aka cross-entropy
-     loss. By Jochen Wersdörfer and `Lars Buitinck`_.
+- Missing values with sparse and dense matrices can be imputed with the
+  transformer :class:`preprocessing.Imputer` by `Nicolas Trésegnie`_.
+
+- The core implementation of decisions trees has been rewritten from
+  scratch, allowing for faster tree induction and lower memory
+  consumption in all tree-based estimators. By `Gilles Louppe`_.
+
+- Added :class:`ensemble.AdaBoostClassifier` and
+  :class:`ensemble.AdaBoostRegressor`, by `Noel Dawe`_  and
+  `Gilles Louppe`_. See the :ref:`AdaBoost <adaboost>` section of the user
+  guide for details and examples.
+
+- Added :class:`grid_search.RandomizedSearchCV` and
+  :class:`grid_search.ParameterSampler` for randomized hyperparameter
+  optimization. By `Andreas Müller`_.
+
+- Added :ref:`biclustering <biclustering>` algorithms
+  (:class:`sklearn.cluster.bicluster.SpectralCoclustering` and
+  :class:`sklearn.cluster.bicluster.SpectralBiclustering`), data
+  generation methods (:func:`sklearn.datasets.make_biclusters` and
+  :func:`sklearn.datasets.make_checkerboard`), and scoring metrics
+  (:func:`sklearn.metrics.consensus_score`). By `Kemal Eren`_.
+
+- Added :ref:`Restricted Boltzmann Machines<rbm>`
+  (:class:`neural_network.BernoulliRBM`). By `Yann Dauphin`_.
+
+- Python 3 support by :user:`Justin Vincent <justinvf>`, `Lars Buitinck`_,
+  :user:`Subhodeep Moitra <smoitra87>` and `Olivier Grisel`_. All tests now pass under
+  Python 3.3.
+
+- Ability to pass one penalty (alpha value) per target in
+  :class:`linear_model.Ridge`, by @eickenberg and `Mathieu Blondel`_.
+
+- Fixed :mod:`sklearn.linear_model.stochastic_gradient.py` L2 regularization
+  issue (minor practical significance).
+  By :user:`Norbert Crombach <norbert>` and `Mathieu Blondel`_ .
+
+- Added an interactive version of `Andreas Müller`_'s
+  `Machine Learning Cheat Sheet (for scikit-learn)
+  <http://peekaboo-vision.blogspot.de/2013/01/machine-learning-cheat-sheet-for-scikit.html>`_
+  to the documentation. See :ref:`Choosing the right estimator <ml_map>`.
+  By `Jaques Grobler`_.
+
+- :class:`grid_search.GridSearchCV` and
+  :func:`cross_validation.cross_val_score` now support the use of advanced
+  scoring function such as area under the ROC curve and f-beta scores.
+  See :ref:`scoring_parameter` for details. By `Andreas Müller`_
+  and `Lars Buitinck`_.
+  Passing a function from :mod:`sklearn.metrics` as ``score_func`` is
+  deprecated.
+
+- Multi-label classification output is now supported by
+  :func:`metrics.accuracy_score`, :func:`metrics.zero_one_loss`,
+  :func:`metrics.f1_score`, :func:`metrics.fbeta_score`,
+  :func:`metrics.classification_report`,
+  :func:`metrics.precision_score` and :func:`metrics.recall_score`
+  by `Arnaud Joly`_.
+
+- Two new metrics :func:`metrics.hamming_loss` and
+  :func:`metrics.jaccard_similarity_score`
+  are added with multi-label support by `Arnaud Joly`_.
+
+- Speed and memory usage improvements in
+  :class:`feature_extraction.text.CountVectorizer` and
+  :class:`feature_extraction.text.TfidfVectorizer`,
+  by Jochen Wersdörfer and Roman Sinayev.
+
+- The ``min_df`` parameter in
+  :class:`feature_extraction.text.CountVectorizer` and
+  :class:`feature_extraction.text.TfidfVectorizer`, which used to be 2,
+  has been reset to 1 to avoid unpleasant surprises (empty vocabularies)
+  for novice users who try it out on tiny document collections.
+  A value of at least 2 is still recommended for practical use.
+
+- :class:`svm.LinearSVC`, :class:`linear_model.SGDClassifier` and
+  :class:`linear_model.SGDRegressor` now have a ``sparsify`` method that
+  converts their ``coef_`` into a sparse matrix, meaning stored models
+  trained using these estimators can be made much more compact.
+
+- :class:`linear_model.SGDClassifier` now produces multiclass probability
+  estimates when trained under log loss or modified Huber loss.
+
+- Hyperlinks to documentation in example code on the website by
+  :user:`Martin Luessi <mluessi>`.
+
+- Fixed bug in :class:`preprocessing.MinMaxScaler` causing incorrect scaling
+  of the features for non-default ``feature_range`` settings. By `Andreas
+  Müller`_.
+
+- ``max_features`` in :class:`tree.DecisionTreeClassifier`,
+  :class:`tree.DecisionTreeRegressor` and all derived ensemble estimators
+  now supports percentage values. By `Gilles Louppe`_.
+
+- Performance improvements in :class:`isotonic.IsotonicRegression` by
+  `Nelle Varoquaux`_.
+
+- :func:`metrics.accuracy_score` has an option normalize to return
+  the fraction or the number of correctly classified sample
+  by `Arnaud Joly`_.
+
+- Added :func:`metrics.log_loss` that computes log loss, aka cross-entropy
+  loss. By Jochen Wersdörfer and `Lars Buitinck`_.
 
-   - A bug that caused :class:`ensemble.AdaBoostClassifier`'s to output
-     incorrect probabilities has been fixed.
-
-   - Feature selectors now share a mixin providing consistent ``transform``,
-     ``inverse_transform`` and ``get_support`` methods. By `Joel Nothman`_.
-
-   - A fitted :class:`grid_search.GridSearchCV` or
-     :class:`grid_search.RandomizedSearchCV` can now generally be pickled.
-     By `Joel Nothman`_.
-
-   - Refactored and vectorized implementation of :func:`metrics.roc_curve`
-     and :func:`metrics.precision_recall_curve`. By `Joel Nothman`_.
+- A bug that caused :class:`ensemble.AdaBoostClassifier`'s to output
+  incorrect probabilities has been fixed.
+
+- Feature selectors now share a mixin providing consistent ``transform``,
+  ``inverse_transform`` and ``get_support`` methods. By `Joel Nothman`_.
+
+- A fitted :class:`grid_search.GridSearchCV` or
+  :class:`grid_search.RandomizedSearchCV` can now generally be pickled.
+  By `Joel Nothman`_.
+
+- Refactored and vectorized implementation of :func:`metrics.roc_curve`
+  and :func:`metrics.precision_recall_curve`. By `Joel Nothman`_.
 
-   - The new estimator :class:`sklearn.decomposition.TruncatedSVD`
-     performs dimensionality reduction using SVD on sparse matrices,
-     and can be used for latent semantic analysis (LSA).
-     By `Lars Buitinck`_.
+- The new estimator :class:`sklearn.decomposition.TruncatedSVD`
+  performs dimensionality reduction using SVD on sparse matrices,
+  and can be used for latent semantic analysis (LSA).
+  By `Lars Buitinck`_.
 
-   - Added self-contained example of out-of-core learning on text data
-     :ref:`sphx_glr_auto_examples_applications_plot_out_of_core_classification.py`.
-     By :user:`Eustache Diemert <oddskool>`.
+- Added self-contained example of out-of-core learning on text data
+  :ref:`sphx_glr_auto_examples_applications_plot_out_of_core_classification.py`.
+  By :user:`Eustache Diemert <oddskool>`.
 
-   - The default number of components for
-     :class:`sklearn.decomposition.RandomizedPCA` is now correctly documented
-     to be ``n_features``. This was the default behavior, so programs using it
-     will continue to work as they did.
+- The default number of components for
+  :class:`sklearn.decomposition.RandomizedPCA` is now correctly documented
+  to be ``n_features``. This was the default behavior, so programs using it
+  will continue to work as they did.
 
-   - :class:`sklearn.cluster.KMeans` now fits several orders of magnitude
-     faster on sparse data (the speedup depends on the sparsity). By
-     `Lars Buitinck`_.
-
-   - Reduce memory footprint of FastICA by `Denis Engemann`_ and
-     `Alexandre Gramfort`_.
+- :class:`sklearn.cluster.KMeans` now fits several orders of magnitude
+  faster on sparse data (the speedup depends on the sparsity). By
+  `Lars Buitinck`_.
+
+- Reduce memory footprint of FastICA by `Denis Engemann`_ and
+  `Alexandre Gramfort`_.
 
-   - Verbose output in :mod:`sklearn.ensemble.gradient_boosting` now uses
-     a column format and prints progress in decreasing frequency.
-     It also shows the remaining time. By `Peter Prettenhofer`_.
+- Verbose output in :mod:`sklearn.ensemble.gradient_boosting` now uses
+  a column format and prints progress in decreasing frequency.
+  It also shows the remaining time. By `Peter Prettenhofer`_.
 
-   - :mod:`sklearn.ensemble.gradient_boosting` provides out-of-bag improvement
-     :attr:`~sklearn.ensemble.GradientBoostingRegressor.oob_improvement_`
-     rather than the OOB score for model selection. An example that shows
-     how to use OOB estimates to select the number of trees was added.
-     By `Peter Prettenhofer`_.
+- :mod:`sklearn.ensemble.gradient_boosting` provides out-of-bag improvement
+  :attr:`~sklearn.ensemble.GradientBoostingRegressor.oob_improvement_`
+  rather than the OOB score for model selection. An example that shows
+  how to use OOB estimates to select the number of trees was added.
+  By `Peter Prettenhofer`_.
 
-   - Most metrics now support string labels for multiclass classification
-     by `Arnaud Joly`_ and `Lars Buitinck`_.
+- Most metrics now support string labels for multiclass classification
+  by `Arnaud Joly`_ and `Lars Buitinck`_.
 
-   - New OrthogonalMatchingPursuitCV class by `Alexandre Gramfort`_
-     and `Vlad Niculae`_.
+- New OrthogonalMatchingPursuitCV class by `Alexandre Gramfort`_
+  and `Vlad Niculae`_.
 
-   - Fixed a bug in :class:`sklearn.covariance.GraphLassoCV`: the
-     'alphas' parameter now works as expected when given a list of
-     values. By Philippe Gervais.
+- Fixed a bug in :class:`sklearn.covariance.GraphLassoCV`: the
+  'alphas' parameter now works as expected when given a list of
+  values. By Philippe Gervais.
 
-   - Fixed an important bug in :class:`sklearn.covariance.GraphLassoCV`
-     that prevented all folds provided by a CV object to be used (only
-     the first 3 were used). When providing a CV object, execution
-     time may thus increase significantly compared to the previous
-     version (bug results are correct now). By Philippe Gervais.
+- Fixed an important bug in :class:`sklearn.covariance.GraphLassoCV`
+  that prevented all folds provided by a CV object to be used (only
+  the first 3 were used). When providing a CV object, execution
+  time may thus increase significantly compared to the previous
+  version (bug results are correct now). By Philippe Gervais.
 
-   - :class:`cross_validation.cross_val_score` and the :mod:`grid_search`
-     module is now tested with multi-output data by `Arnaud Joly`_.
+- :class:`cross_validation.cross_val_score` and the :mod:`grid_search`
+  module is now tested with multi-output data by `Arnaud Joly`_.
 
-   - :func:`datasets.make_multilabel_classification` can now return
-     the output in label indicator multilabel format  by `Arnaud Joly`_.
+- :func:`datasets.make_multilabel_classification` can now return
+  the output in label indicator multilabel format  by `Arnaud Joly`_.
 
-   - K-nearest neighbors, :class:`neighbors.KNeighborsRegressor`
-     and :class:`neighbors.RadiusNeighborsRegressor`,
-     and radius neighbors, :class:`neighbors.RadiusNeighborsRegressor` and
-     :class:`neighbors.RadiusNeighborsClassifier` support multioutput data
-     by `Arnaud Joly`_.
+- K-nearest neighbors, :class:`neighbors.KNeighborsRegressor`
+  and :class:`neighbors.RadiusNeighborsRegressor`,
+  and radius neighbors, :class:`neighbors.RadiusNeighborsRegressor` and
+  :class:`neighbors.RadiusNeighborsClassifier` support multioutput data
+  by `Arnaud Joly`_.
 
-   - Random state in LibSVM-based estimators (:class:`svm.SVC`, :class:`NuSVC`,
-     :class:`OneClassSVM`, :class:`svm.SVR`, :class:`svm.NuSVR`) can now be
-     controlled.  This is useful to ensure consistency in the probability
-     estimates for the classifiers trained with ``probability=True``. By
-     `Vlad Niculae`_.
+- Random state in LibSVM-based estimators (:class:`svm.SVC`, :class:`NuSVC`,
+  :class:`OneClassSVM`, :class:`svm.SVR`, :class:`svm.NuSVR`) can now be
+  controlled.  This is useful to ensure consistency in the probability
+  estimates for the classifiers trained with ``probability=True``. By
+  `Vlad Niculae`_.
 
-   - Out-of-core learning support for discrete naive Bayes classifiers
-     :class:`sklearn.naive_bayes.MultinomialNB` and
-     :class:`sklearn.naive_bayes.BernoulliNB` by adding the ``partial_fit``
-     method by `Olivier Grisel`_.
+- Out-of-core learning support for discrete naive Bayes classifiers
+  :class:`sklearn.naive_bayes.MultinomialNB` and
+  :class:`sklearn.naive_bayes.BernoulliNB` by adding the ``partial_fit``
+  method by `Olivier Grisel`_.
 
-   - New website design and navigation by `Gilles Louppe`_, `Nelle Varoquaux`_,
-     Vincent Michel and `Andreas Müller`_.
+- New website design and navigation by `Gilles Louppe`_, `Nelle Varoquaux`_,
+  Vincent Michel and `Andreas Müller`_.
 
-   - Improved documentation on :ref:`multi-class, multi-label and multi-output
-     classification <multiclass>` by `Yannick Schwartz`_ and `Arnaud Joly`_.
+- Improved documentation on :ref:`multi-class, multi-label and multi-output
+  classification <multiclass>` by `Yannick Schwartz`_ and `Arnaud Joly`_.
 
-   - Better input and error handling in the :mod:`metrics` module by
-     `Arnaud Joly`_ and `Joel Nothman`_.
+- Better input and error handling in the :mod:`metrics` module by
+  `Arnaud Joly`_ and `Joel Nothman`_.
 
-   - Speed optimization of the :mod:`hmm` module by :user:`Mikhail Korobov <kmike>`
+- Speed optimization of the :mod:`hmm` module by :user:`Mikhail Korobov <kmike>`
 
-   - Significant speed improvements for :class:`sklearn.cluster.DBSCAN`
-     by `cleverless <https://github.com/cleverless>`_
+- Significant speed improvements for :class:`sklearn.cluster.DBSCAN`
+  by `cleverless <https://github.com/cleverless>`_
 
 
 API changes summary
 -------------------
 
-   - The :func:`auc_score` was renamed :func:`roc_auc_score`.
+- The :func:`auc_score` was renamed :func:`roc_auc_score`.
 
-   - Testing scikit-learn with ``sklearn.test()`` is deprecated. Use
-     ``nosetests sklearn`` from the command line.
+- Testing scikit-learn with ``sklearn.test()`` is deprecated. Use
+  ``nosetests sklearn`` from the command line.
 
-   - Feature importances in :class:`tree.DecisionTreeClassifier`,
-     :class:`tree.DecisionTreeRegressor` and all derived ensemble estimators
-     are now computed on the fly when accessing  the ``feature_importances_``
-     attribute. Setting ``compute_importances=True`` is no longer required.
-     By `Gilles Louppe`_.
+- Feature importances in :class:`tree.DecisionTreeClassifier`,
+  :class:`tree.DecisionTreeRegressor` and all derived ensemble estimators
+  are now computed on the fly when accessing  the ``feature_importances_``
+  attribute. Setting ``compute_importances=True`` is no longer required.
+  By `Gilles Louppe`_.
 
-   - :class:`linear_model.lasso_path` and
-     :class:`linear_model.enet_path` can return its results in the same
-     format as that of :class:`linear_model.lars_path`. This is done by
-     setting the ``return_models`` parameter to ``False``. By
-     `Jaques Grobler`_ and `Alexandre Gramfort`_
+- :class:`linear_model.lasso_path` and
+  :class:`linear_model.enet_path` can return its results in the same
+  format as that of :class:`linear_model.lars_path`. This is done by
+  setting the ``return_models`` parameter to ``False``. By
+  `Jaques Grobler`_ and `Alexandre Gramfort`_
 
-   - :class:`grid_search.IterGrid` was renamed to
-     :class:`grid_search.ParameterGrid`.
+- :class:`grid_search.IterGrid` was renamed to
+  :class:`grid_search.ParameterGrid`.
 
-   - Fixed bug in :class:`KFold` causing imperfect class balance in some
-     cases. By `Alexandre Gramfort`_ and Tadej Janež.
+- Fixed bug in :class:`KFold` causing imperfect class balance in some
+  cases. By `Alexandre Gramfort`_ and Tadej Janež.
 
-   - :class:`sklearn.neighbors.BallTree` has been refactored, and a
-     :class:`sklearn.neighbors.KDTree` has been
-     added which shares the same interface.  The Ball Tree now works with
-     a wide variety of distance metrics.  Both classes have many new
-     methods, including single-tree and dual-tree queries, breadth-first
-     and depth-first searching, and more advanced queries such as
-     kernel density estimation and 2-point correlation functions.
-     By `Jake Vanderplas`_
+- :class:`sklearn.neighbors.BallTree` has been refactored, and a
+  :class:`sklearn.neighbors.KDTree` has been
+  added which shares the same interface.  The Ball Tree now works with
+  a wide variety of distance metrics.  Both classes have many new
+  methods, including single-tree and dual-tree queries, breadth-first
+  and depth-first searching, and more advanced queries such as
+  kernel density estimation and 2-point correlation functions.
+  By `Jake Vanderplas`_
 
-   - Support for scipy.spatial.cKDTree within neighbors queries has been
-     removed, and the functionality replaced with the new :class:`KDTree`
-     class.
+- Support for scipy.spatial.cKDTree within neighbors queries has been
+  removed, and the functionality replaced with the new :class:`KDTree`
+  class.
 
-   - :class:`sklearn.neighbors.KernelDensity` has been added, which performs
-     efficient kernel density estimation with a variety of kernels.
+- :class:`sklearn.neighbors.KernelDensity` has been added, which performs
+  efficient kernel density estimation with a variety of kernels.
 
-   - :class:`sklearn.decomposition.KernelPCA` now always returns output with
-     ``n_components`` components, unless the new parameter ``remove_zero_eig``
-     is set to ``True``. This new behavior is consistent with the way
-     kernel PCA was always documented; previously, the removal of components
-     with zero eigenvalues was tacitly performed on all data.
+- :class:`sklearn.decomposition.KernelPCA` now always returns output with
+  ``n_components`` components, unless the new parameter ``remove_zero_eig``
+  is set to ``True``. This new behavior is consistent with the way
+  kernel PCA was always documented; previously, the removal of components
+  with zero eigenvalues was tacitly performed on all data.
 
-   - ``gcv_mode="auto"`` no longer tries to perform SVD on a densified
-     sparse matrix in :class:`sklearn.linear_model.RidgeCV`.
+- ``gcv_mode="auto"`` no longer tries to perform SVD on a densified
+  sparse matrix in :class:`sklearn.linear_model.RidgeCV`.
 
-   - Sparse matrix support in :class:`sklearn.decomposition.RandomizedPCA`
-     is now deprecated in favor of the new ``TruncatedSVD``.
+- Sparse matrix support in :class:`sklearn.decomposition.RandomizedPCA`
+  is now deprecated in favor of the new ``TruncatedSVD``.
 
-   - :class:`cross_validation.KFold` and
-     :class:`cross_validation.StratifiedKFold` now enforce `n_folds >= 2`
-     otherwise a ``ValueError`` is raised. By `Olivier Grisel`_.
+- :class:`cross_validation.KFold` and
+  :class:`cross_validation.StratifiedKFold` now enforce `n_folds >= 2`
+  otherwise a ``ValueError`` is raised. By `Olivier Grisel`_.
 
-   - :func:`datasets.load_files`'s ``charset`` and ``charset_errors``
-     parameters were renamed ``encoding`` and ``decode_errors``.
+- :func:`datasets.load_files`'s ``charset`` and ``charset_errors``
+  parameters were renamed ``encoding`` and ``decode_errors``.
 
-   - Attribute ``oob_score_`` in :class:`sklearn.ensemble.GradientBoostingRegressor`
-     and :class:`sklearn.ensemble.GradientBoostingClassifier`
-     is deprecated and has been replaced by ``oob_improvement_`` .
+- Attribute ``oob_score_`` in :class:`sklearn.ensemble.GradientBoostingRegressor`
+  and :class:`sklearn.ensemble.GradientBoostingClassifier`
+  is deprecated and has been replaced by ``oob_improvement_`` .
 
-   - Attributes in OrthogonalMatchingPursuit have been deprecated
-     (copy_X, Gram, ...) and precompute_gram renamed precompute
-     for consistency. See #2224.
+- Attributes in OrthogonalMatchingPursuit have been deprecated
+  (copy_X, Gram, ...) and precompute_gram renamed precompute
+  for consistency. See #2224.
 
-   - :class:`sklearn.preprocessing.StandardScaler` now converts integer input
-     to float, and raises a warning. Previously it rounded for dense integer
-     input.
+- :class:`sklearn.preprocessing.StandardScaler` now converts integer input
+  to float, and raises a warning. Previously it rounded for dense integer
+  input.
 
-   - :class:`sklearn.multiclass.OneVsRestClassifier` now has a
-     ``decision_function`` method. This will return the distance of each
-     sample from the decision boundary for each class, as long as the
-     underlying estimators implement the ``decision_function`` method.
-     By `Kyle Kastner`_.
+- :class:`sklearn.multiclass.OneVsRestClassifier` now has a
+  ``decision_function`` method. This will return the distance of each
+  sample from the decision boundary for each class, as long as the
+  underlying estimators implement the ``decision_function`` method.
+  By `Kyle Kastner`_.
 
-   - Better input validation, warning on unexpected shapes for y.
+- Better input validation, warning on unexpected shapes for y.
 
 People
 ------
@@ -3709,21 +3829,21 @@ The 0.13.1 release only fixes some bugs and does not add any new functionality.
 Changelog
 ---------
 
-    - Fixed a testing error caused by the function :func:`cross_validation.train_test_split` being
-      interpreted as a test by `Yaroslav Halchenko`_.
+- Fixed a testing error caused by the function :func:`cross_validation.train_test_split` being
+  interpreted as a test by `Yaroslav Halchenko`_.
 
-    - Fixed a bug in the reassignment of small clusters in the :class:`cluster.MiniBatchKMeans`
-      by `Gael Varoquaux`_.
+- Fixed a bug in the reassignment of small clusters in the :class:`cluster.MiniBatchKMeans`
+  by `Gael Varoquaux`_.
 
-    - Fixed default value of ``gamma`` in :class:`decomposition.KernelPCA` by `Lars Buitinck`_.
+- Fixed default value of ``gamma`` in :class:`decomposition.KernelPCA` by `Lars Buitinck`_.
 
-    - Updated joblib to ``0.7.0d`` by `Gael Varoquaux`_.
+- Updated joblib to ``0.7.0d`` by `Gael Varoquaux`_.
 
-    - Fixed scaling of the deviance in :class:`ensemble.GradientBoostingClassifier` by `Peter Prettenhofer`_.
+- Fixed scaling of the deviance in :class:`ensemble.GradientBoostingClassifier` by `Peter Prettenhofer`_.
 
-    - Better tie-breaking in :class:`multiclass.OneVsOneClassifier` by `Andreas Müller`_.
+- Better tie-breaking in :class:`multiclass.OneVsOneClassifier` by `Andreas Müller`_.
 
-    - Other small improvements to tests and documentation.
+- Other small improvements to tests and documentation.
 
 People
 ------
@@ -3755,263 +3875,263 @@ Version 0.13
 New Estimator Classes
 ---------------------
 
-   - :class:`dummy.DummyClassifier` and :class:`dummy.DummyRegressor`, two
-     data-independent predictors by `Mathieu Blondel`_. Useful to sanity-check
-     your estimators. See :ref:`dummy_estimators` in the user guide.
-     Multioutput support added by `Arnaud Joly`_.
+- :class:`dummy.DummyClassifier` and :class:`dummy.DummyRegressor`, two
+  data-independent predictors by `Mathieu Blondel`_. Useful to sanity-check
+  your estimators. See :ref:`dummy_estimators` in the user guide.
+  Multioutput support added by `Arnaud Joly`_.
 
-   - :class:`decomposition.FactorAnalysis`, a transformer implementing the
-     classical factor analysis, by `Christian Osendorfer`_ and `Alexandre
-     Gramfort`_. See :ref:`FA` in the user guide.
+- :class:`decomposition.FactorAnalysis`, a transformer implementing the
+  classical factor analysis, by `Christian Osendorfer`_ and `Alexandre
+  Gramfort`_. See :ref:`FA` in the user guide.
 
-   - :class:`feature_extraction.FeatureHasher`, a transformer implementing the
-     "hashing trick" for fast, low-memory feature extraction from string fields
-     by `Lars Buitinck`_ and :class:`feature_extraction.text.HashingVectorizer`
-     for text documents by `Olivier Grisel`_  See :ref:`feature_hashing` and
-     :ref:`hashing_vectorizer` for the documentation and sample usage.
+- :class:`feature_extraction.FeatureHasher`, a transformer implementing the
+  "hashing trick" for fast, low-memory feature extraction from string fields
+  by `Lars Buitinck`_ and :class:`feature_extraction.text.HashingVectorizer`
+  for text documents by `Olivier Grisel`_  See :ref:`feature_hashing` and
+  :ref:`hashing_vectorizer` for the documentation and sample usage.
 
-   - :class:`pipeline.FeatureUnion`, a transformer that concatenates
-     results of several other transformers by `Andreas Müller`_. See
-     :ref:`feature_union` in the user guide.
+- :class:`pipeline.FeatureUnion`, a transformer that concatenates
+  results of several other transformers by `Andreas Müller`_. See
+  :ref:`feature_union` in the user guide.
 
-   - :class:`random_projection.GaussianRandomProjection`,
-     :class:`random_projection.SparseRandomProjection` and the function
-     :func:`random_projection.johnson_lindenstrauss_min_dim`. The first two are
-     transformers implementing Gaussian and sparse random projection matrix
-     by `Olivier Grisel`_ and `Arnaud Joly`_.
-     See :ref:`random_projection` in the user guide.
+- :class:`random_projection.GaussianRandomProjection`,
+  :class:`random_projection.SparseRandomProjection` and the function
+  :func:`random_projection.johnson_lindenstrauss_min_dim`. The first two are
+  transformers implementing Gaussian and sparse random projection matrix
+  by `Olivier Grisel`_ and `Arnaud Joly`_.
+  See :ref:`random_projection` in the user guide.
 
-   - :class:`kernel_approximation.Nystroem`, a transformer for approximating
-     arbitrary kernels by `Andreas Müller`_. See
-     :ref:`nystroem_kernel_approx` in the user guide.
+- :class:`kernel_approximation.Nystroem`, a transformer for approximating
+  arbitrary kernels by `Andreas Müller`_. See
+  :ref:`nystroem_kernel_approx` in the user guide.
 
-   - :class:`preprocessing.OneHotEncoder`, a transformer that computes binary
-     encodings of categorical features by `Andreas Müller`_. See
-     :ref:`preprocessing_categorical_features` in the user guide.
+- :class:`preprocessing.OneHotEncoder`, a transformer that computes binary
+  encodings of categorical features by `Andreas Müller`_. See
+  :ref:`preprocessing_categorical_features` in the user guide.
 
-   - :class:`linear_model.PassiveAggressiveClassifier` and
-     :class:`linear_model.PassiveAggressiveRegressor`, predictors implementing
-     an efficient stochastic optimization for linear models by `Rob Zinkov`_ and
-     `Mathieu Blondel`_. See :ref:`passive_aggressive` in the user
-     guide.
+- :class:`linear_model.PassiveAggressiveClassifier` and
+  :class:`linear_model.PassiveAggressiveRegressor`, predictors implementing
+  an efficient stochastic optimization for linear models by `Rob Zinkov`_ and
+  `Mathieu Blondel`_. See :ref:`passive_aggressive` in the user
+  guide.
 
-   - :class:`ensemble.RandomTreesEmbedding`, a transformer for creating high-dimensional
-     sparse representations using ensembles of totally random trees by  `Andreas Müller`_.
-     See :ref:`random_trees_embedding` in the user guide.
+- :class:`ensemble.RandomTreesEmbedding`, a transformer for creating high-dimensional
+  sparse representations using ensembles of totally random trees by  `Andreas Müller`_.
+  See :ref:`random_trees_embedding` in the user guide.
 
-   - :class:`manifold.SpectralEmbedding` and function
-     :func:`manifold.spectral_embedding`, implementing the "laplacian
-     eigenmaps" transformation for non-linear dimensionality reduction by Wei
-     Li. See :ref:`spectral_embedding` in the user guide.
+- :class:`manifold.SpectralEmbedding` and function
+  :func:`manifold.spectral_embedding`, implementing the "laplacian
+  eigenmaps" transformation for non-linear dimensionality reduction by Wei
+  Li. See :ref:`spectral_embedding` in the user guide.
 
-   - :class:`isotonic.IsotonicRegression` by `Fabian Pedregosa`_, `Alexandre Gramfort`_
-     and `Nelle Varoquaux`_,
+- :class:`isotonic.IsotonicRegression` by `Fabian Pedregosa`_, `Alexandre Gramfort`_
+  and `Nelle Varoquaux`_,
 
 
 Changelog
 ---------
 
-   - :func:`metrics.zero_one_loss` (formerly ``metrics.zero_one``) now has
-     option for normalized output that reports the fraction of
-     misclassifications, rather than the raw number of misclassifications. By
-     Kyle Beauchamp.
+- :func:`metrics.zero_one_loss` (formerly ``metrics.zero_one``) now has
+  option for normalized output that reports the fraction of
+  misclassifications, rather than the raw number of misclassifications. By
+  Kyle Beauchamp.
 
-   - :class:`tree.DecisionTreeClassifier` and all derived ensemble models now
-     support sample weighting, by `Noel Dawe`_  and `Gilles Louppe`_.
+- :class:`tree.DecisionTreeClassifier` and all derived ensemble models now
+  support sample weighting, by `Noel Dawe`_  and `Gilles Louppe`_.
 
-   - Speedup improvement when using bootstrap samples in forests of randomized
-     trees, by `Peter Prettenhofer`_  and `Gilles Louppe`_.
+- Speedup improvement when using bootstrap samples in forests of randomized
+  trees, by `Peter Prettenhofer`_  and `Gilles Louppe`_.
 
-   - Partial dependence plots for :ref:`gradient_boosting` in
-     :func:`ensemble.partial_dependence.partial_dependence` by `Peter
-     Prettenhofer`_. See :ref:`sphx_glr_auto_examples_ensemble_plot_partial_dependence.py` for an
-     example.
+- Partial dependence plots for :ref:`gradient_boosting` in
+  :func:`ensemble.partial_dependence.partial_dependence` by `Peter
+  Prettenhofer`_. See :ref:`sphx_glr_auto_examples_ensemble_plot_partial_dependence.py` for an
+  example.
 
-   - The table of contents on the website has now been made expandable by
-     `Jaques Grobler`_.
+- The table of contents on the website has now been made expandable by
+  `Jaques Grobler`_.
 
-   - :class:`feature_selection.SelectPercentile` now breaks ties
-     deterministically instead of returning all equally ranked features.
+- :class:`feature_selection.SelectPercentile` now breaks ties
+  deterministically instead of returning all equally ranked features.
 
-   - :class:`feature_selection.SelectKBest` and
-     :class:`feature_selection.SelectPercentile` are more numerically stable
-     since they use scores, rather than p-values, to rank results. This means
-     that they might sometimes select different features than they did
-     previously.
+- :class:`feature_selection.SelectKBest` and
+  :class:`feature_selection.SelectPercentile` are more numerically stable
+  since they use scores, rather than p-values, to rank results. This means
+  that they might sometimes select different features than they did
+  previously.
 
-   - Ridge regression and ridge classification fitting with ``sparse_cg`` solver
-     no longer has quadratic memory complexity, by `Lars Buitinck`_ and
-     `Fabian Pedregosa`_.
+- Ridge regression and ridge classification fitting with ``sparse_cg`` solver
+  no longer has quadratic memory complexity, by `Lars Buitinck`_ and
+  `Fabian Pedregosa`_.
 
-   - Ridge regression and ridge classification now support a new fast solver
-     called ``lsqr``, by `Mathieu Blondel`_.
+- Ridge regression and ridge classification now support a new fast solver
+  called ``lsqr``, by `Mathieu Blondel`_.
 
-   - Speed up of :func:`metrics.precision_recall_curve` by Conrad Lee.
+- Speed up of :func:`metrics.precision_recall_curve` by Conrad Lee.
 
-   - Added support for reading/writing svmlight files with pairwise
-     preference attribute (qid in svmlight file format) in
-     :func:`datasets.dump_svmlight_file` and
-     :func:`datasets.load_svmlight_file` by `Fabian Pedregosa`_.
+- Added support for reading/writing svmlight files with pairwise
+  preference attribute (qid in svmlight file format) in
+  :func:`datasets.dump_svmlight_file` and
+  :func:`datasets.load_svmlight_file` by `Fabian Pedregosa`_.
 
-   - Faster and more robust :func:`metrics.confusion_matrix` and
-     :ref:`clustering_evaluation` by Wei Li.
+- Faster and more robust :func:`metrics.confusion_matrix` and
+  :ref:`clustering_evaluation` by Wei Li.
 
-   - :func:`cross_validation.cross_val_score` now works with precomputed kernels
-     and affinity matrices, by `Andreas Müller`_.
+- :func:`cross_validation.cross_val_score` now works with precomputed kernels
+  and affinity matrices, by `Andreas Müller`_.
 
-   - LARS algorithm made more numerically stable with heuristics to drop
-     regressors too correlated as well as to stop the path when
-     numerical noise becomes predominant, by `Gael Varoquaux`_.
+- LARS algorithm made more numerically stable with heuristics to drop
+  regressors too correlated as well as to stop the path when
+  numerical noise becomes predominant, by `Gael Varoquaux`_.
 
-   - Faster implementation of :func:`metrics.precision_recall_curve` by
-     Conrad Lee.
+- Faster implementation of :func:`metrics.precision_recall_curve` by
+  Conrad Lee.
 
-   - New kernel :class:`metrics.chi2_kernel` by `Andreas Müller`_, often used
-     in computer vision applications.
+- New kernel :class:`metrics.chi2_kernel` by `Andreas Müller`_, often used
+  in computer vision applications.
 
-   - Fix of longstanding bug in :class:`naive_bayes.BernoulliNB` fixed by
-     Shaun Jackman.
+- Fix of longstanding bug in :class:`naive_bayes.BernoulliNB` fixed by
+  Shaun Jackman.
 
-   - Implemented ``predict_proba`` in :class:`multiclass.OneVsRestClassifier`,
-     by Andrew Winterman.
+- Implemented ``predict_proba`` in :class:`multiclass.OneVsRestClassifier`,
+  by Andrew Winterman.
 
-   - Improve consistency in gradient boosting: estimators
-     :class:`ensemble.GradientBoostingRegressor` and
-     :class:`ensemble.GradientBoostingClassifier` use the estimator
-     :class:`tree.DecisionTreeRegressor` instead of the
-     :class:`tree._tree.Tree` data structure by `Arnaud Joly`_.
+- Improve consistency in gradient boosting: estimators
+  :class:`ensemble.GradientBoostingRegressor` and
+  :class:`ensemble.GradientBoostingClassifier` use the estimator
+  :class:`tree.DecisionTreeRegressor` instead of the
+  :class:`tree._tree.Tree` data structure by `Arnaud Joly`_.
 
-   - Fixed a floating point exception in the :ref:`decision trees <tree>`
-     module, by Seberg.
+- Fixed a floating point exception in the :ref:`decision trees <tree>`
+  module, by Seberg.
 
-   - Fix :func:`metrics.roc_curve` fails when y_true has only one class
-     by Wei Li.
+- Fix :func:`metrics.roc_curve` fails when y_true has only one class
+  by Wei Li.
 
-   - Add the :func:`metrics.mean_absolute_error` function which computes the
-     mean absolute error. The :func:`metrics.mean_squared_error`,
-     :func:`metrics.mean_absolute_error` and
-     :func:`metrics.r2_score` metrics support multioutput by `Arnaud Joly`_.
+- Add the :func:`metrics.mean_absolute_error` function which computes the
+  mean absolute error. The :func:`metrics.mean_squared_error`,
+  :func:`metrics.mean_absolute_error` and
+  :func:`metrics.r2_score` metrics support multioutput by `Arnaud Joly`_.
 
-   - Fixed ``class_weight`` support in :class:`svm.LinearSVC` and
-     :class:`linear_model.LogisticRegression` by `Andreas Müller`_. The meaning
-     of ``class_weight`` was reversed as erroneously higher weight meant less
-     positives of a given class in earlier releases.
+- Fixed ``class_weight`` support in :class:`svm.LinearSVC` and
+  :class:`linear_model.LogisticRegression` by `Andreas Müller`_. The meaning
+  of ``class_weight`` was reversed as erroneously higher weight meant less
+  positives of a given class in earlier releases.
 
-   - Improve narrative documentation and consistency in
-     :mod:`sklearn.metrics` for regression and classification metrics
-     by `Arnaud Joly`_.
+- Improve narrative documentation and consistency in
+  :mod:`sklearn.metrics` for regression and classification metrics
+  by `Arnaud Joly`_.
 
-   - Fixed a bug in :class:`sklearn.svm.SVC` when using csr-matrices with
-     unsorted indices by Xinfan Meng and `Andreas Müller`_.
+- Fixed a bug in :class:`sklearn.svm.SVC` when using csr-matrices with
+  unsorted indices by Xinfan Meng and `Andreas Müller`_.
 
-   - :class:`MiniBatchKMeans`: Add random reassignment of cluster centers
-     with little observations attached to them, by `Gael Varoquaux`_.
+- :class:`MiniBatchKMeans`: Add random reassignment of cluster centers
+  with little observations attached to them, by `Gael Varoquaux`_.
 
 
 API changes summary
 -------------------
-   - Renamed all occurrences of ``n_atoms`` to ``n_components`` for consistency.
-     This applies to :class:`decomposition.DictionaryLearning`,
-     :class:`decomposition.MiniBatchDictionaryLearning`,
-     :func:`decomposition.dict_learning`, :func:`decomposition.dict_learning_online`.
+- Renamed all occurrences of ``n_atoms`` to ``n_components`` for consistency.
+  This applies to :class:`decomposition.DictionaryLearning`,
+  :class:`decomposition.MiniBatchDictionaryLearning`,
+  :func:`decomposition.dict_learning`, :func:`decomposition.dict_learning_online`.
 
-   - Renamed all occurrences of ``max_iters`` to ``max_iter`` for consistency.
-     This applies to :class:`semi_supervised.LabelPropagation` and
-     :class:`semi_supervised.label_propagation.LabelSpreading`.
+- Renamed all occurrences of ``max_iters`` to ``max_iter`` for consistency.
+  This applies to :class:`semi_supervised.LabelPropagation` and
+  :class:`semi_supervised.label_propagation.LabelSpreading`.
 
-   - Renamed all occurrences of ``learn_rate`` to ``learning_rate`` for
-     consistency in :class:`ensemble.BaseGradientBoosting` and
-     :class:`ensemble.GradientBoostingRegressor`.
+- Renamed all occurrences of ``learn_rate`` to ``learning_rate`` for
+  consistency in :class:`ensemble.BaseGradientBoosting` and
+  :class:`ensemble.GradientBoostingRegressor`.
 
-   - The module ``sklearn.linear_model.sparse`` is gone. Sparse matrix support
-     was already integrated into the "regular" linear models.
+- The module ``sklearn.linear_model.sparse`` is gone. Sparse matrix support
+  was already integrated into the "regular" linear models.
 
-   - :func:`sklearn.metrics.mean_square_error`, which incorrectly returned the
-     accumulated error, was removed. Use ``mean_squared_error`` instead.
+- :func:`sklearn.metrics.mean_square_error`, which incorrectly returned the
+  accumulated error, was removed. Use ``mean_squared_error`` instead.
 
-   - Passing ``class_weight`` parameters to ``fit`` methods is no longer
-     supported. Pass them to estimator constructors instead.
+- Passing ``class_weight`` parameters to ``fit`` methods is no longer
+  supported. Pass them to estimator constructors instead.
 
-   - GMMs no longer have ``decode`` and ``rvs`` methods. Use the ``score``,
-     ``predict`` or ``sample`` methods instead.
+- GMMs no longer have ``decode`` and ``rvs`` methods. Use the ``score``,
+  ``predict`` or ``sample`` methods instead.
 
-   - The ``solver`` fit option in Ridge regression and classification is now
-     deprecated and will be removed in v0.14. Use the constructor option
-     instead.
+- The ``solver`` fit option in Ridge regression and classification is now
+  deprecated and will be removed in v0.14. Use the constructor option
+  instead.
 
-   - :class:`feature_extraction.text.DictVectorizer` now returns sparse
-     matrices in the CSR format, instead of COO.
+- :class:`feature_extraction.text.DictVectorizer` now returns sparse
+  matrices in the CSR format, instead of COO.
 
-   - Renamed ``k`` in :class:`cross_validation.KFold` and
-     :class:`cross_validation.StratifiedKFold` to ``n_folds``, renamed
-     ``n_bootstraps`` to ``n_iter`` in ``cross_validation.Bootstrap``.
+- Renamed ``k`` in :class:`cross_validation.KFold` and
+  :class:`cross_validation.StratifiedKFold` to ``n_folds``, renamed
+  ``n_bootstraps`` to ``n_iter`` in ``cross_validation.Bootstrap``.
 
-   - Renamed all occurrences of ``n_iterations`` to ``n_iter`` for consistency.
-     This applies to :class:`cross_validation.ShuffleSplit`,
-     :class:`cross_validation.StratifiedShuffleSplit`,
-     :func:`utils.randomized_range_finder` and :func:`utils.randomized_svd`.
+- Renamed all occurrences of ``n_iterations`` to ``n_iter`` for consistency.
+  This applies to :class:`cross_validation.ShuffleSplit`,
+  :class:`cross_validation.StratifiedShuffleSplit`,
+  :func:`utils.randomized_range_finder` and :func:`utils.randomized_svd`.
 
-   - Replaced ``rho`` in :class:`linear_model.ElasticNet` and
-     :class:`linear_model.SGDClassifier` by ``l1_ratio``. The ``rho`` parameter
-     had different meanings; ``l1_ratio`` was introduced to avoid confusion.
-     It has the same meaning as previously ``rho`` in
-     :class:`linear_model.ElasticNet` and ``(1-rho)`` in
-     :class:`linear_model.SGDClassifier`.
+- Replaced ``rho`` in :class:`linear_model.ElasticNet` and
+  :class:`linear_model.SGDClassifier` by ``l1_ratio``. The ``rho`` parameter
+  had different meanings; ``l1_ratio`` was introduced to avoid confusion.
+  It has the same meaning as previously ``rho`` in
+  :class:`linear_model.ElasticNet` and ``(1-rho)`` in
+  :class:`linear_model.SGDClassifier`.
 
-   - :class:`linear_model.LassoLars` and :class:`linear_model.Lars` now
-     store a list of paths in the case of multiple targets, rather than
-     an array of paths.
+- :class:`linear_model.LassoLars` and :class:`linear_model.Lars` now
+  store a list of paths in the case of multiple targets, rather than
+  an array of paths.
 
-   - The attribute ``gmm`` of :class:`hmm.GMMHMM` was renamed to ``gmm_``
-     to adhere more strictly with the API.
+- The attribute ``gmm`` of :class:`hmm.GMMHMM` was renamed to ``gmm_``
+  to adhere more strictly with the API.
 
-   - :func:`cluster.spectral_embedding` was moved to
-     :func:`manifold.spectral_embedding`.
+- :func:`cluster.spectral_embedding` was moved to
+  :func:`manifold.spectral_embedding`.
 
-   - Renamed ``eig_tol`` in :func:`manifold.spectral_embedding`,
-     :class:`cluster.SpectralClustering` to ``eigen_tol``, renamed ``mode``
-     to ``eigen_solver``.
+- Renamed ``eig_tol`` in :func:`manifold.spectral_embedding`,
+  :class:`cluster.SpectralClustering` to ``eigen_tol``, renamed ``mode``
+  to ``eigen_solver``.
 
-   - Renamed ``mode`` in :func:`manifold.spectral_embedding` and
-     :class:`cluster.SpectralClustering` to ``eigen_solver``.
+- Renamed ``mode`` in :func:`manifold.spectral_embedding` and
+  :class:`cluster.SpectralClustering` to ``eigen_solver``.
 
-   - ``classes_`` and ``n_classes_`` attributes of
-     :class:`tree.DecisionTreeClassifier` and all derived ensemble models are
-     now flat in case of single output problems and nested in case of
-     multi-output problems.
+- ``classes_`` and ``n_classes_`` attributes of
+  :class:`tree.DecisionTreeClassifier` and all derived ensemble models are
+  now flat in case of single output problems and nested in case of
+  multi-output problems.
 
-   - The ``estimators_`` attribute of
-     :class:`ensemble.gradient_boosting.GradientBoostingRegressor` and
-     :class:`ensemble.gradient_boosting.GradientBoostingClassifier` is now an
-     array of :class:'tree.DecisionTreeRegressor'.
+- The ``estimators_`` attribute of
+  :class:`ensemble.gradient_boosting.GradientBoostingRegressor` and
+  :class:`ensemble.gradient_boosting.GradientBoostingClassifier` is now an
+  array of :class:'tree.DecisionTreeRegressor'.
 
-   - Renamed ``chunk_size`` to ``batch_size`` in
-     :class:`decomposition.MiniBatchDictionaryLearning` and
-     :class:`decomposition.MiniBatchSparsePCA` for consistency.
+- Renamed ``chunk_size`` to ``batch_size`` in
+  :class:`decomposition.MiniBatchDictionaryLearning` and
+  :class:`decomposition.MiniBatchSparsePCA` for consistency.
 
-   - :class:`svm.SVC` and :class:`svm.NuSVC` now provide a ``classes_``
-     attribute and support arbitrary dtypes for labels ``y``.
-     Also, the dtype returned by ``predict`` now reflects the dtype of
-     ``y`` during ``fit`` (used to be ``np.float``).
+- :class:`svm.SVC` and :class:`svm.NuSVC` now provide a ``classes_``
+  attribute and support arbitrary dtypes for labels ``y``.
+  Also, the dtype returned by ``predict`` now reflects the dtype of
+  ``y`` during ``fit`` (used to be ``np.float``).
 
-   - Changed default test_size in :func:`cross_validation.train_test_split`
-     to None, added possibility to infer ``test_size`` from ``train_size`` in
-     :class:`cross_validation.ShuffleSplit` and
-     :class:`cross_validation.StratifiedShuffleSplit`.
+- Changed default test_size in :func:`cross_validation.train_test_split`
+  to None, added possibility to infer ``test_size`` from ``train_size`` in
+  :class:`cross_validation.ShuffleSplit` and
+  :class:`cross_validation.StratifiedShuffleSplit`.
 
-   - Renamed function :func:`sklearn.metrics.zero_one` to
-     :func:`sklearn.metrics.zero_one_loss`. Be aware that the default behavior
-     in :func:`sklearn.metrics.zero_one_loss` is different from
-     :func:`sklearn.metrics.zero_one`: ``normalize=False`` is changed to
-     ``normalize=True``.
+- Renamed function :func:`sklearn.metrics.zero_one` to
+  :func:`sklearn.metrics.zero_one_loss`. Be aware that the default behavior
+  in :func:`sklearn.metrics.zero_one_loss` is different from
+  :func:`sklearn.metrics.zero_one`: ``normalize=False`` is changed to
+  ``normalize=True``.
 
-   - Renamed function :func:`metrics.zero_one_score` to
-     :func:`metrics.accuracy_score`.
+- Renamed function :func:`metrics.zero_one_score` to
+  :func:`metrics.accuracy_score`.
 
-   - :func:`datasets.make_circles` now has the same number of inner and outer points.
+- :func:`datasets.make_circles` now has the same number of inner and outer points.
 
-   - In the Naive Bayes classifiers, the ``class_prior`` parameter was moved
-     from ``fit`` to ``__init__``.
+- In the Naive Bayes classifiers, the ``class_prior`` parameter was moved
+  from ``fit`` to ``__init__``.
 
 People
 ------
@@ -4098,27 +4218,27 @@ instead a set of bug fixes
 Changelog
 ----------
 
- - Improved numerical stability in spectral embedding by `Gael
-   Varoquaux`_
+- Improved numerical stability in spectral embedding by `Gael
+  Varoquaux`_
 
- - Doctest under windows 64bit by `Gael Varoquaux`_
+- Doctest under windows 64bit by `Gael Varoquaux`_
 
- - Documentation fixes for elastic net by `Andreas Müller`_ and
-   `Alexandre Gramfort`_
+- Documentation fixes for elastic net by `Andreas Müller`_ and
+  `Alexandre Gramfort`_
 
- - Proper behavior with fortran-ordered NumPy arrays by `Gael Varoquaux`_
+- Proper behavior with fortran-ordered NumPy arrays by `Gael Varoquaux`_
 
- - Make GridSearchCV work with non-CSR sparse matrix by `Lars Buitinck`_
+- Make GridSearchCV work with non-CSR sparse matrix by `Lars Buitinck`_
 
- - Fix parallel computing in MDS by `Gael Varoquaux`_
+- Fix parallel computing in MDS by `Gael Varoquaux`_
 
- - Fix Unicode support in count vectorizer by `Andreas Müller`_
+- Fix Unicode support in count vectorizer by `Andreas Müller`_
 
- - Fix MinCovDet breaking with X.shape = (3, 1) by :user:`Virgile Fritsch <VirgileFritsch>`
+- Fix MinCovDet breaking with X.shape = (3, 1) by :user:`Virgile Fritsch <VirgileFritsch>`
 
- - Fix clone of SGD objects by `Peter Prettenhofer`_
+- Fix clone of SGD objects by `Peter Prettenhofer`_
 
- - Stabilize GMM by :user:`Virgile Fritsch <VirgileFritsch>`
+- Stabilize GMM by :user:`Virgile Fritsch <VirgileFritsch>`
 
 People
 ------
@@ -4142,137 +4262,137 @@ Version 0.12
 Changelog
 ---------
 
-   - Various speed improvements of the :ref:`decision trees <tree>` module, by
-     `Gilles Louppe`_.
+- Various speed improvements of the :ref:`decision trees <tree>` module, by
+  `Gilles Louppe`_.
 
-   - :class:`ensemble.GradientBoostingRegressor` and
-     :class:`ensemble.GradientBoostingClassifier` now support feature subsampling
-     via the ``max_features`` argument, by `Peter Prettenhofer`_.
+- :class:`ensemble.GradientBoostingRegressor` and
+  :class:`ensemble.GradientBoostingClassifier` now support feature subsampling
+  via the ``max_features`` argument, by `Peter Prettenhofer`_.
 
-   - Added Huber and Quantile loss functions to
-     :class:`ensemble.GradientBoostingRegressor`, by `Peter Prettenhofer`_.
+- Added Huber and Quantile loss functions to
+  :class:`ensemble.GradientBoostingRegressor`, by `Peter Prettenhofer`_.
 
-   - :ref:`Decision trees <tree>` and :ref:`forests of randomized trees <forest>`
-     now support multi-output classification and regression problems, by
-     `Gilles Louppe`_.
+- :ref:`Decision trees <tree>` and :ref:`forests of randomized trees <forest>`
+  now support multi-output classification and regression problems, by
+  `Gilles Louppe`_.
 
-   - Added :class:`preprocessing.LabelEncoder`, a simple utility class to
-     normalize labels or transform non-numerical labels, by `Mathieu Blondel`_.
+- Added :class:`preprocessing.LabelEncoder`, a simple utility class to
+  normalize labels or transform non-numerical labels, by `Mathieu Blondel`_.
 
-   - Added the epsilon-insensitive loss and the ability to make probabilistic
-     predictions with the modified huber loss in :ref:`sgd`, by
-     `Mathieu Blondel`_.
+- Added the epsilon-insensitive loss and the ability to make probabilistic
+  predictions with the modified huber loss in :ref:`sgd`, by
+  `Mathieu Blondel`_.
 
-   - Added :ref:`multidimensional_scaling`, by Nelle Varoquaux.
+- Added :ref:`multidimensional_scaling`, by Nelle Varoquaux.
 
-   - SVMlight file format loader now detects compressed (gzip/bzip2) files and
-     decompresses them on the fly, by `Lars Buitinck`_.
+- SVMlight file format loader now detects compressed (gzip/bzip2) files and
+  decompresses them on the fly, by `Lars Buitinck`_.
 
-   - SVMlight file format serializer now preserves double precision floating
-     point values, by `Olivier Grisel`_.
+- SVMlight file format serializer now preserves double precision floating
+  point values, by `Olivier Grisel`_.
 
-   - A common testing framework for all estimators was added, by `Andreas Müller`_.
+- A common testing framework for all estimators was added, by `Andreas Müller`_.
 
-   - Understandable error messages for estimators that do not accept
-     sparse input by `Gael Varoquaux`_
+- Understandable error messages for estimators that do not accept
+  sparse input by `Gael Varoquaux`_
 
-   - Speedups in hierarchical clustering by `Gael Varoquaux`_. In
-     particular building the tree now supports early stopping. This is
-     useful when the number of clusters is not small compared to the
-     number of samples.
+- Speedups in hierarchical clustering by `Gael Varoquaux`_. In
+  particular building the tree now supports early stopping. This is
+  useful when the number of clusters is not small compared to the
+  number of samples.
 
-   - Add MultiTaskLasso and MultiTaskElasticNet for joint feature selection,
-     by `Alexandre Gramfort`_.
+- Add MultiTaskLasso and MultiTaskElasticNet for joint feature selection,
+  by `Alexandre Gramfort`_.
 
-   - Added :func:`metrics.auc_score` and
-     :func:`metrics.average_precision_score` convenience functions by `Andreas
-     Müller`_.
+- Added :func:`metrics.auc_score` and
+  :func:`metrics.average_precision_score` convenience functions by `Andreas
+  Müller`_.
 
-   - Improved sparse matrix support in the :ref:`feature_selection`
-     module by `Andreas Müller`_.
+- Improved sparse matrix support in the :ref:`feature_selection`
+  module by `Andreas Müller`_.
 
-   - New word boundaries-aware character n-gram analyzer for the
-     :ref:`text_feature_extraction` module by :user:`@kernc <kernc>`.
+- New word boundaries-aware character n-gram analyzer for the
+  :ref:`text_feature_extraction` module by :user:`@kernc <kernc>`.
 
-   - Fixed bug in spectral clustering that led to single point clusters
-     by `Andreas Müller`_.
+- Fixed bug in spectral clustering that led to single point clusters
+  by `Andreas Müller`_.
 
-   - In :class:`feature_extraction.text.CountVectorizer`, added an option to
-     ignore infrequent words, ``min_df`` by  `Andreas Müller`_.
+- In :class:`feature_extraction.text.CountVectorizer`, added an option to
+  ignore infrequent words, ``min_df`` by  `Andreas Müller`_.
 
-   - Add support for multiple targets in some linear models (ElasticNet, Lasso
-     and OrthogonalMatchingPursuit) by `Vlad Niculae`_ and
-     `Alexandre Gramfort`_.
+- Add support for multiple targets in some linear models (ElasticNet, Lasso
+  and OrthogonalMatchingPursuit) by `Vlad Niculae`_ and
+  `Alexandre Gramfort`_.
 
-   - Fixes in :class:`decomposition.ProbabilisticPCA` score function by Wei Li.
+- Fixes in :class:`decomposition.ProbabilisticPCA` score function by Wei Li.
 
-   - Fixed feature importance computation in
-     :ref:`gradient_boosting`.
+- Fixed feature importance computation in
+  :ref:`gradient_boosting`.
 
 API changes summary
 -------------------
 
-   - The old ``scikits.learn`` package has disappeared; all code should import
-     from ``sklearn`` instead, which was introduced in 0.9.
+- The old ``scikits.learn`` package has disappeared; all code should import
+  from ``sklearn`` instead, which was introduced in 0.9.
 
-   - In :func:`metrics.roc_curve`, the ``thresholds`` array is now returned
-     with it's order reversed, in order to keep it consistent with the order
-     of the returned ``fpr`` and ``tpr``.
+- In :func:`metrics.roc_curve`, the ``thresholds`` array is now returned
+  with it's order reversed, in order to keep it consistent with the order
+  of the returned ``fpr`` and ``tpr``.
 
-   - In :class:`hmm` objects, like :class:`hmm.GaussianHMM`,
-     :class:`hmm.MultinomialHMM`, etc., all parameters must be passed to the
-     object when initialising it and not through ``fit``. Now ``fit`` will
-     only accept the data as an input parameter.
+- In :class:`hmm` objects, like :class:`hmm.GaussianHMM`,
+  :class:`hmm.MultinomialHMM`, etc., all parameters must be passed to the
+  object when initialising it and not through ``fit``. Now ``fit`` will
+  only accept the data as an input parameter.
 
-   - For all SVM classes, a faulty behavior of ``gamma`` was fixed. Previously,
-     the default gamma value was only computed the first time ``fit`` was called
-     and then stored. It is now recalculated on every call to ``fit``.
+- For all SVM classes, a faulty behavior of ``gamma`` was fixed. Previously,
+  the default gamma value was only computed the first time ``fit`` was called
+  and then stored. It is now recalculated on every call to ``fit``.
 
-   - All ``Base`` classes are now abstract meta classes so that they can not be
-     instantiated.
+- All ``Base`` classes are now abstract meta classes so that they can not be
+  instantiated.
 
-   - :func:`cluster.ward_tree` now also returns the parent array. This is
-     necessary for early-stopping in which case the tree is not
-     completely built.
+- :func:`cluster.ward_tree` now also returns the parent array. This is
+  necessary for early-stopping in which case the tree is not
+  completely built.
 
-   - In :class:`feature_extraction.text.CountVectorizer` the parameters
-     ``min_n`` and ``max_n`` were joined to the parameter ``n_gram_range`` to
-     enable grid-searching both at once.
+- In :class:`feature_extraction.text.CountVectorizer` the parameters
+  ``min_n`` and ``max_n`` were joined to the parameter ``n_gram_range`` to
+  enable grid-searching both at once.
 
-   - In :class:`feature_extraction.text.CountVectorizer`, words that appear
-     only in one document are now ignored by default. To reproduce
-     the previous behavior, set ``min_df=1``.
+- In :class:`feature_extraction.text.CountVectorizer`, words that appear
+  only in one document are now ignored by default. To reproduce
+  the previous behavior, set ``min_df=1``.
 
-   - Fixed API inconsistency: :meth:`linear_model.SGDClassifier.predict_proba` now
-     returns 2d array when fit on two classes.
+- Fixed API inconsistency: :meth:`linear_model.SGDClassifier.predict_proba` now
+  returns 2d array when fit on two classes.
 
-   - Fixed API inconsistency: :meth:`discriminant_analysis.QuadraticDiscriminantAnalysis.decision_function`
-     and :meth:`discriminant_analysis.LinearDiscriminantAnalysis.decision_function` now return 1d arrays
-     when fit on two classes.
+- Fixed API inconsistency: :meth:`discriminant_analysis.QuadraticDiscriminantAnalysis.decision_function`
+  and :meth:`discriminant_analysis.LinearDiscriminantAnalysis.decision_function` now return 1d arrays
+  when fit on two classes.
 
-   - Grid of alphas used for fitting :class:`linear_model.LassoCV` and
-     :class:`linear_model.ElasticNetCV` is now stored
-     in the attribute ``alphas_`` rather than overriding the init parameter
-     ``alphas``.
+- Grid of alphas used for fitting :class:`linear_model.LassoCV` and
+  :class:`linear_model.ElasticNetCV` is now stored
+  in the attribute ``alphas_`` rather than overriding the init parameter
+  ``alphas``.
 
-   - Linear models when alpha is estimated by cross-validation store
-     the estimated value in the ``alpha_`` attribute rather than just
-     ``alpha`` or ``best_alpha``.
+- Linear models when alpha is estimated by cross-validation store
+  the estimated value in the ``alpha_`` attribute rather than just
+  ``alpha`` or ``best_alpha``.
 
-   - :class:`ensemble.GradientBoostingClassifier` now supports
-     :meth:`ensemble.GradientBoostingClassifier.staged_predict_proba`, and
-     :meth:`ensemble.GradientBoostingClassifier.staged_predict`.
+- :class:`ensemble.GradientBoostingClassifier` now supports
+  :meth:`ensemble.GradientBoostingClassifier.staged_predict_proba`, and
+  :meth:`ensemble.GradientBoostingClassifier.staged_predict`.
 
-   - :class:`svm.sparse.SVC` and other sparse SVM classes are now deprecated.
-     The all classes in the :ref:`svm` module now automatically select the
-     sparse or dense representation base on the input.
+- :class:`svm.sparse.SVC` and other sparse SVM classes are now deprecated.
+  The all classes in the :ref:`svm` module now automatically select the
+  sparse or dense representation base on the input.
 
-   - All clustering algorithms now interpret the array ``X`` given to ``fit`` as
-     input data, in particular :class:`cluster.SpectralClustering` and
-     :class:`cluster.AffinityPropagation` which previously expected affinity matrices.
+- All clustering algorithms now interpret the array ``X`` given to ``fit`` as
+  input data, in particular :class:`cluster.SpectralClustering` and
+  :class:`cluster.AffinityPropagation` which previously expected affinity matrices.
 
-   - For clustering algorithms that take the desired number of clusters as a parameter,
-     this parameter is now called ``n_clusters``.
+- For clustering algorithms that take the desired number of clusters as a parameter,
+  this parameter is now called ``n_clusters``.
 
 
 People
@@ -4340,176 +4460,176 @@ Changelog
 Highlights
 .............
 
-   - Gradient boosted regression trees (:ref:`gradient_boosting`)
-     for classification and regression by `Peter Prettenhofer`_
-     and `Scott White`_ .
+- Gradient boosted regression trees (:ref:`gradient_boosting`)
+  for classification and regression by `Peter Prettenhofer`_
+  and `Scott White`_ .
 
-   - Simple dict-based feature loader with support for categorical variables
-     (:class:`feature_extraction.DictVectorizer`) by `Lars Buitinck`_.
+- Simple dict-based feature loader with support for categorical variables
+  (:class:`feature_extraction.DictVectorizer`) by `Lars Buitinck`_.
 
-   - Added Matthews correlation coefficient (:func:`metrics.matthews_corrcoef`)
-     and added macro and micro average options to
-     :func:`metrics.precision_score`, :func:`metrics.recall_score` and
-     :func:`metrics.f1_score` by `Satrajit Ghosh`_.
+- Added Matthews correlation coefficient (:func:`metrics.matthews_corrcoef`)
+  and added macro and micro average options to
+  :func:`metrics.precision_score`, :func:`metrics.recall_score` and
+  :func:`metrics.f1_score` by `Satrajit Ghosh`_.
 
-   - :ref:`out_of_bag` of generalization error for :ref:`ensemble`
-     by `Andreas Müller`_.
+- :ref:`out_of_bag` of generalization error for :ref:`ensemble`
+  by `Andreas Müller`_.
 
-   - :ref:`randomized_l1`: Randomized sparse linear models for feature
-     selection, by `Alexandre Gramfort`_ and `Gael Varoquaux`_
+- Randomized sparse linear models for feature
+  selection, by `Alexandre Gramfort`_ and `Gael Varoquaux`_
 
-   - :ref:`label_propagation` for semi-supervised learning, by Clay
-     Woolam. **Note** the semi-supervised API is still work in progress,
-     and may change.
+- :ref:`label_propagation` for semi-supervised learning, by Clay
+  Woolam. **Note** the semi-supervised API is still work in progress,
+  and may change.
 
-   - Added BIC/AIC model selection to classical :ref:`gmm` and unified
-     the API with the remainder of scikit-learn, by `Bertrand Thirion`_
+- Added BIC/AIC model selection to classical :ref:`gmm` and unified
+  the API with the remainder of scikit-learn, by `Bertrand Thirion`_
 
-   - Added :class:`sklearn.cross_validation.StratifiedShuffleSplit`, which is
-     a :class:`sklearn.cross_validation.ShuffleSplit` with balanced splits,
-     by Yannick Schwartz.
+- Added :class:`sklearn.cross_validation.StratifiedShuffleSplit`, which is
+  a :class:`sklearn.cross_validation.ShuffleSplit` with balanced splits,
+  by Yannick Schwartz.
 
-   - :class:`sklearn.neighbors.NearestCentroid` classifier added, along with a
-     ``shrink_threshold`` parameter, which implements **shrunken centroid
-     classification**, by `Robert Layton`_.
+- :class:`sklearn.neighbors.NearestCentroid` classifier added, along with a
+  ``shrink_threshold`` parameter, which implements **shrunken centroid
+  classification**, by `Robert Layton`_.
 
 Other changes
 ..............
 
-   - Merged dense and sparse implementations of :ref:`sgd` module and
-     exposed utility extension types for sequential
-     datasets ``seq_dataset`` and weight vectors ``weight_vector``
-     by `Peter Prettenhofer`_.
+- Merged dense and sparse implementations of :ref:`sgd` module and
+  exposed utility extension types for sequential
+  datasets ``seq_dataset`` and weight vectors ``weight_vector``
+  by `Peter Prettenhofer`_.
 
-   - Added ``partial_fit`` (support for online/minibatch learning) and
-     warm_start to the :ref:`sgd` module by `Mathieu Blondel`_.
+- Added ``partial_fit`` (support for online/minibatch learning) and
+  warm_start to the :ref:`sgd` module by `Mathieu Blondel`_.
 
-   - Dense and sparse implementations of :ref:`svm` classes and
-     :class:`linear_model.LogisticRegression` merged by `Lars Buitinck`_.
+- Dense and sparse implementations of :ref:`svm` classes and
+  :class:`linear_model.LogisticRegression` merged by `Lars Buitinck`_.
 
-   - Regressors can now be used as base estimator in the :ref:`multiclass`
-     module by `Mathieu Blondel`_.
+- Regressors can now be used as base estimator in the :ref:`multiclass`
+  module by `Mathieu Blondel`_.
 
-   - Added n_jobs option to :func:`metrics.pairwise.pairwise_distances`
-     and :func:`metrics.pairwise.pairwise_kernels` for parallel computation,
-     by `Mathieu Blondel`_.
+- Added n_jobs option to :func:`metrics.pairwise.pairwise_distances`
+  and :func:`metrics.pairwise.pairwise_kernels` for parallel computation,
+  by `Mathieu Blondel`_.
 
-   - :ref:`k_means` can now be run in parallel, using the ``n_jobs`` argument
-     to either :ref:`k_means` or :class:`KMeans`, by `Robert Layton`_.
+- :ref:`k_means` can now be run in parallel, using the ``n_jobs`` argument
+  to either :ref:`k_means` or :class:`KMeans`, by `Robert Layton`_.
 
-   - Improved :ref:`cross_validation` and :ref:`grid_search` documentation
-     and introduced the new :func:`cross_validation.train_test_split`
-     helper function by `Olivier Grisel`_
+- Improved :ref:`cross_validation` and :ref:`grid_search` documentation
+  and introduced the new :func:`cross_validation.train_test_split`
+  helper function by `Olivier Grisel`_
 
-   - :class:`svm.SVC` members ``coef_`` and ``intercept_`` changed sign for
-     consistency with ``decision_function``; for ``kernel==linear``,
-     ``coef_`` was fixed in the one-vs-one case, by `Andreas Müller`_.
+- :class:`svm.SVC` members ``coef_`` and ``intercept_`` changed sign for
+  consistency with ``decision_function``; for ``kernel==linear``,
+  ``coef_`` was fixed in the one-vs-one case, by `Andreas Müller`_.
 
-   - Performance improvements to efficient leave-one-out cross-validated
-     Ridge regression, esp. for the ``n_samples > n_features`` case, in
-     :class:`linear_model.RidgeCV`, by Reuben Fletcher-Costin.
+- Performance improvements to efficient leave-one-out cross-validated
+  Ridge regression, esp. for the ``n_samples > n_features`` case, in
+  :class:`linear_model.RidgeCV`, by Reuben Fletcher-Costin.
 
-   - Refactoring and simplification of the :ref:`text_feature_extraction`
-     API and fixed a bug that caused possible negative IDF,
-     by `Olivier Grisel`_.
+- Refactoring and simplification of the :ref:`text_feature_extraction`
+  API and fixed a bug that caused possible negative IDF,
+  by `Olivier Grisel`_.
 
-   - Beam pruning option in :class:`_BaseHMM` module has been removed since it
-     is difficult to Cythonize. If you are interested in contributing a Cython
-     version, you can use the python version in the git history as a reference.
+- Beam pruning option in :class:`_BaseHMM` module has been removed since it
+  is difficult to Cythonize. If you are interested in contributing a Cython
+  version, you can use the python version in the git history as a reference.
 
-   - Classes in :ref:`neighbors` now support arbitrary Minkowski metric for
-     nearest neighbors searches. The metric can be specified by argument ``p``.
+- Classes in :ref:`neighbors` now support arbitrary Minkowski metric for
+  nearest neighbors searches. The metric can be specified by argument ``p``.
 
 API changes summary
 -------------------
 
-   - :class:`covariance.EllipticEnvelop` is now deprecated - Please use :class:`covariance.EllipticEnvelope`
-     instead.
+- :class:`covariance.EllipticEnvelop` is now deprecated - Please use :class:`covariance.EllipticEnvelope`
+  instead.
 
-   - ``NeighborsClassifier`` and ``NeighborsRegressor`` are gone in the module
-     :ref:`neighbors`. Use the classes :class:`KNeighborsClassifier`,
-     :class:`RadiusNeighborsClassifier`, :class:`KNeighborsRegressor`
-     and/or :class:`RadiusNeighborsRegressor` instead.
+- ``NeighborsClassifier`` and ``NeighborsRegressor`` are gone in the module
+  :ref:`neighbors`. Use the classes :class:`KNeighborsClassifier`,
+  :class:`RadiusNeighborsClassifier`, :class:`KNeighborsRegressor`
+  and/or :class:`RadiusNeighborsRegressor` instead.
 
-   - Sparse classes in the :ref:`sgd` module are now deprecated.
+- Sparse classes in the :ref:`sgd` module are now deprecated.
 
-   - In :class:`mixture.GMM`, :class:`mixture.DPGMM` and :class:`mixture.VBGMM`,
-     parameters must be passed to an object when initialising it and not through
-     ``fit``. Now ``fit`` will only accept the data as an input parameter.
+- In :class:`mixture.GMM`, :class:`mixture.DPGMM` and :class:`mixture.VBGMM`,
+  parameters must be passed to an object when initialising it and not through
+  ``fit``. Now ``fit`` will only accept the data as an input parameter.
 
-   - methods ``rvs`` and ``decode`` in :class:`GMM` module are now deprecated.
-     ``sample`` and ``score`` or ``predict`` should be used instead.
+- methods ``rvs`` and ``decode`` in :class:`GMM` module are now deprecated.
+  ``sample`` and ``score`` or ``predict`` should be used instead.
 
-   - attribute ``_scores`` and ``_pvalues`` in univariate feature selection
-     objects are now deprecated.
-     ``scores_`` or ``pvalues_`` should be used instead.
+- attribute ``_scores`` and ``_pvalues`` in univariate feature selection
+  objects are now deprecated.
+  ``scores_`` or ``pvalues_`` should be used instead.
 
-   - In :class:`LogisticRegression`, :class:`LinearSVC`, :class:`SVC` and
-     :class:`NuSVC`, the ``class_weight`` parameter is now an initialization
-     parameter, not a parameter to fit. This makes grid searches
-     over this parameter possible.
+- In :class:`LogisticRegression`, :class:`LinearSVC`, :class:`SVC` and
+  :class:`NuSVC`, the ``class_weight`` parameter is now an initialization
+  parameter, not a parameter to fit. This makes grid searches
+  over this parameter possible.
 
-   - LFW ``data`` is now always shape ``(n_samples, n_features)`` to be
-     consistent with the Olivetti faces dataset. Use ``images`` and
-     ``pairs`` attribute to access the natural images shapes instead.
+- LFW ``data`` is now always shape ``(n_samples, n_features)`` to be
+  consistent with the Olivetti faces dataset. Use ``images`` and
+  ``pairs`` attribute to access the natural images shapes instead.
 
-   - In :class:`svm.LinearSVC`, the meaning of the ``multi_class`` parameter
-     changed.  Options now are ``'ovr'`` and ``'crammer_singer'``, with
-     ``'ovr'`` being the default.  This does not change the default behavior
-     but hopefully is less confusing.
+- In :class:`svm.LinearSVC`, the meaning of the ``multi_class`` parameter
+  changed.  Options now are ``'ovr'`` and ``'crammer_singer'``, with
+  ``'ovr'`` being the default.  This does not change the default behavior
+  but hopefully is less confusing.
 
-   - Class :class:`feature_selection.text.Vectorizer` is deprecated and
-     replaced by :class:`feature_selection.text.TfidfVectorizer`.
+- Class :class:`feature_selection.text.Vectorizer` is deprecated and
+  replaced by :class:`feature_selection.text.TfidfVectorizer`.
 
-   - The preprocessor / analyzer nested structure for text feature
-     extraction has been removed. All those features are
-     now directly passed as flat constructor arguments
-     to :class:`feature_selection.text.TfidfVectorizer` and
-     :class:`feature_selection.text.CountVectorizer`, in particular the
-     following parameters are now used:
+- The preprocessor / analyzer nested structure for text feature
+  extraction has been removed. All those features are
+  now directly passed as flat constructor arguments
+  to :class:`feature_selection.text.TfidfVectorizer` and
+  :class:`feature_selection.text.CountVectorizer`, in particular the
+  following parameters are now used:
 
-       - ``analyzer`` can be ``'word'`` or ``'char'`` to switch the default
-         analysis scheme, or use a specific python callable (as previously).
+- ``analyzer`` can be ``'word'`` or ``'char'`` to switch the default
+  analysis scheme, or use a specific python callable (as previously).
 
-       - ``tokenizer`` and ``preprocessor`` have been introduced to make it
-         still possible to customize those steps with the new API.
+- ``tokenizer`` and ``preprocessor`` have been introduced to make it
+  still possible to customize those steps with the new API.
 
-       - ``input`` explicitly control how to interpret the sequence passed to
-         ``fit`` and ``predict``: filenames, file objects or direct (byte or
-         Unicode) strings.
+- ``input`` explicitly control how to interpret the sequence passed to
+  ``fit`` and ``predict``: filenames, file objects or direct (byte or
+  Unicode) strings.
 
-       - charset decoding is explicit and strict by default.
+- charset decoding is explicit and strict by default.
 
-       - the ``vocabulary``, fitted or not is now stored in the
-         ``vocabulary_`` attribute to be consistent with the project
-         conventions.
+- the ``vocabulary``, fitted or not is now stored in the
+  ``vocabulary_`` attribute to be consistent with the project
+  conventions.
 
-   - Class :class:`feature_selection.text.TfidfVectorizer` now derives directly
-     from :class:`feature_selection.text.CountVectorizer` to make grid
-     search trivial.
+- Class :class:`feature_selection.text.TfidfVectorizer` now derives directly
+  from :class:`feature_selection.text.CountVectorizer` to make grid
+  search trivial.
 
-   - methods ``rvs`` in :class:`_BaseHMM` module are now deprecated.
-     ``sample`` should be used instead.
+- methods ``rvs`` in :class:`_BaseHMM` module are now deprecated.
+  ``sample`` should be used instead.
 
-   - Beam pruning option in :class:`_BaseHMM` module is removed since it is
-     difficult to be Cythonized. If you are interested, you can look in the
-     history codes by git.
+- Beam pruning option in :class:`_BaseHMM` module is removed since it is
+  difficult to be Cythonized. If you are interested, you can look in the
+  history codes by git.
 
-   - The SVMlight format loader now supports files with both zero-based and
-     one-based column indices, since both occur "in the wild".
+- The SVMlight format loader now supports files with both zero-based and
+  one-based column indices, since both occur "in the wild".
 
-   - Arguments in class :class:`ShuffleSplit` are now consistent with
-     :class:`StratifiedShuffleSplit`. Arguments ``test_fraction`` and
-     ``train_fraction`` are deprecated and renamed to ``test_size`` and
-     ``train_size`` and can accept both ``float`` and ``int``.
+- Arguments in class :class:`ShuffleSplit` are now consistent with
+  :class:`StratifiedShuffleSplit`. Arguments ``test_fraction`` and
+  ``train_fraction`` are deprecated and renamed to ``test_size`` and
+  ``train_size`` and can accept both ``float`` and ``int``.
 
-   - Arguments in class :class:`Bootstrap` are now consistent with
-     :class:`StratifiedShuffleSplit`. Arguments ``n_test`` and
-     ``n_train`` are deprecated and renamed to ``test_size`` and
-     ``train_size`` and can accept both ``float`` and ``int``.
+- Arguments in class :class:`Bootstrap` are now consistent with
+  :class:`StratifiedShuffleSplit`. Arguments ``n_test`` and
+  ``n_train`` are deprecated and renamed to ``test_size`` and
+  ``train_size`` and can accept both ``float`` and ``int``.
 
-   - Argument ``p`` added to classes in :ref:`neighbors` to specify an
-     arbitrary Minkowski metric for nearest neighbors searches.
+- Argument ``p`` added to classes in :ref:`neighbors` to specify an
+  arbitrary Minkowski metric for nearest neighbors searches.
 
 
 People
@@ -4574,85 +4694,85 @@ Version 0.10
 Changelog
 ---------
 
-   - Python 2.5 compatibility was dropped; the minimum Python version needed
-     to use scikit-learn is now 2.6.
+- Python 2.5 compatibility was dropped; the minimum Python version needed
+  to use scikit-learn is now 2.6.
 
-   - :ref:`sparse_inverse_covariance` estimation using the graph Lasso, with
-     associated cross-validated estimator, by `Gael Varoquaux`_
+- :ref:`sparse_inverse_covariance` estimation using the graph Lasso, with
+  associated cross-validated estimator, by `Gael Varoquaux`_
 
-   - New :ref:`Tree <tree>` module by `Brian Holt`_, `Peter Prettenhofer`_,
-     `Satrajit Ghosh`_ and `Gilles Louppe`_. The module comes with complete
-     documentation and examples.
+- New :ref:`Tree <tree>` module by `Brian Holt`_, `Peter Prettenhofer`_,
+  `Satrajit Ghosh`_ and `Gilles Louppe`_. The module comes with complete
+  documentation and examples.
 
-   - Fixed a bug in the RFE module by `Gilles Louppe`_ (issue #378).
+- Fixed a bug in the RFE module by `Gilles Louppe`_ (issue #378).
 
-   - Fixed a memory leak in :ref:`svm` module by `Brian Holt`_ (issue #367).
+- Fixed a memory leak in :ref:`svm` module by `Brian Holt`_ (issue #367).
 
-   - Faster tests by `Fabian Pedregosa`_ and others.
+- Faster tests by `Fabian Pedregosa`_ and others.
 
-   - Silhouette Coefficient cluster analysis evaluation metric added as
-     :func:`sklearn.metrics.silhouette_score` by Robert Layton.
+- Silhouette Coefficient cluster analysis evaluation metric added as
+  :func:`sklearn.metrics.silhouette_score` by Robert Layton.
 
-   - Fixed a bug in :ref:`k_means` in the handling of the ``n_init`` parameter:
-     the clustering algorithm used to be run ``n_init`` times but the last
-     solution was retained instead of the best solution by `Olivier Grisel`_.
+- Fixed a bug in :ref:`k_means` in the handling of the ``n_init`` parameter:
+  the clustering algorithm used to be run ``n_init`` times but the last
+  solution was retained instead of the best solution by `Olivier Grisel`_.
 
-   - Minor refactoring in :ref:`sgd` module; consolidated dense and sparse
-     predict methods; Enhanced test time performance by converting model
-     parameters to fortran-style arrays after fitting (only multi-class).
+- Minor refactoring in :ref:`sgd` module; consolidated dense and sparse
+  predict methods; Enhanced test time performance by converting model
+  parameters to fortran-style arrays after fitting (only multi-class).
 
-   - Adjusted Mutual Information metric added as
-     :func:`sklearn.metrics.adjusted_mutual_info_score` by Robert Layton.
+- Adjusted Mutual Information metric added as
+  :func:`sklearn.metrics.adjusted_mutual_info_score` by Robert Layton.
 
-   - Models like SVC/SVR/LinearSVC/LogisticRegression from libsvm/liblinear
-     now support scaling of C regularization parameter by the number of
-     samples by `Alexandre Gramfort`_.
+- Models like SVC/SVR/LinearSVC/LogisticRegression from libsvm/liblinear
+  now support scaling of C regularization parameter by the number of
+  samples by `Alexandre Gramfort`_.
 
-   - New :ref:`Ensemble Methods <ensemble>` module by `Gilles Louppe`_ and
-     `Brian Holt`_. The module comes with the random forest algorithm and the
-     extra-trees method, along with documentation and examples.
+- New :ref:`Ensemble Methods <ensemble>` module by `Gilles Louppe`_ and
+  `Brian Holt`_. The module comes with the random forest algorithm and the
+  extra-trees method, along with documentation and examples.
 
-   - :ref:`outlier_detection`: outlier and novelty detection, by
-     :user:`Virgile Fritsch <VirgileFritsch>`.
+- :ref:`outlier_detection`: outlier and novelty detection, by
+  :user:`Virgile Fritsch <VirgileFritsch>`.
 
-   - :ref:`kernel_approximation`: a transform implementing kernel
-     approximation for fast SGD on non-linear kernels by
-     `Andreas Müller`_.
+- :ref:`kernel_approximation`: a transform implementing kernel
+  approximation for fast SGD on non-linear kernels by
+  `Andreas Müller`_.
 
-   - Fixed a bug due to atom swapping in :ref:`OMP` by `Vlad Niculae`_.
+- Fixed a bug due to atom swapping in :ref:`OMP` by `Vlad Niculae`_.
 
-   - :ref:`SparseCoder` by `Vlad Niculae`_.
+- :ref:`SparseCoder` by `Vlad Niculae`_.
 
-   - :ref:`mini_batch_kmeans` performance improvements by `Olivier Grisel`_.
+- :ref:`mini_batch_kmeans` performance improvements by `Olivier Grisel`_.
 
-   - :ref:`k_means` support for sparse matrices by `Mathieu Blondel`_.
+- :ref:`k_means` support for sparse matrices by `Mathieu Blondel`_.
 
-   - Improved documentation for developers and for the :mod:`sklearn.utils`
-     module, by `Jake Vanderplas`_.
+- Improved documentation for developers and for the :mod:`sklearn.utils`
+  module, by `Jake Vanderplas`_.
 
-   - Vectorized 20newsgroups dataset loader
-     (:func:`sklearn.datasets.fetch_20newsgroups_vectorized`) by
-     `Mathieu Blondel`_.
+- Vectorized 20newsgroups dataset loader
+  (:func:`sklearn.datasets.fetch_20newsgroups_vectorized`) by
+  `Mathieu Blondel`_.
 
-   - :ref:`multiclass` by `Lars Buitinck`_.
+- :ref:`multiclass` by `Lars Buitinck`_.
 
-   - Utilities for fast computation of mean and variance for sparse matrices
-     by `Mathieu Blondel`_.
+- Utilities for fast computation of mean and variance for sparse matrices
+  by `Mathieu Blondel`_.
 
-   - Make :func:`sklearn.preprocessing.scale` and
-     :class:`sklearn.preprocessing.Scaler` work on sparse matrices by
-     `Olivier Grisel`_
+- Make :func:`sklearn.preprocessing.scale` and
+  :class:`sklearn.preprocessing.Scaler` work on sparse matrices by
+  `Olivier Grisel`_
 
-   - Feature importances using decision trees and/or forest of trees,
-     by `Gilles Louppe`_.
+- Feature importances using decision trees and/or forest of trees,
+  by `Gilles Louppe`_.
 
-   - Parallel implementation of forests of randomized trees by
-     `Gilles Louppe`_.
+- Parallel implementation of forests of randomized trees by
+  `Gilles Louppe`_.
 
-   - :class:`sklearn.cross_validation.ShuffleSplit` can subsample the train
-     sets as well as the test sets by `Olivier Grisel`_.
+- :class:`sklearn.cross_validation.ShuffleSplit` can subsample the train
+  sets as well as the test sets by `Olivier Grisel`_.
 
-   - Errors in the build of the documentation fixed by `Andreas Müller`_.
+- Errors in the build of the documentation fixed by `Andreas Müller`_.
 
 
 API changes summary
@@ -4661,55 +4781,55 @@ API changes summary
 Here are the code migration instructions when upgrading from scikit-learn
 version 0.9:
 
-  - Some estimators that may overwrite their inputs to save memory previously
-    had ``overwrite_`` parameters; these have been replaced with ``copy_``
-    parameters with exactly the opposite meaning.
+- Some estimators that may overwrite their inputs to save memory previously
+  had ``overwrite_`` parameters; these have been replaced with ``copy_``
+  parameters with exactly the opposite meaning.
 
-    This particularly affects some of the estimators in :mod:`linear_model`.
-    The default behavior is still to copy everything passed in.
+  This particularly affects some of the estimators in :mod:`linear_model`.
+  The default behavior is still to copy everything passed in.
 
-  - The SVMlight dataset loader :func:`sklearn.datasets.load_svmlight_file` no
-    longer supports loading two files at once; use ``load_svmlight_files``
-    instead. Also, the (unused) ``buffer_mb`` parameter is gone.
+- The SVMlight dataset loader :func:`sklearn.datasets.load_svmlight_file` no
+  longer supports loading two files at once; use ``load_svmlight_files``
+  instead. Also, the (unused) ``buffer_mb`` parameter is gone.
 
-  - Sparse estimators in the :ref:`sgd` module use dense parameter vector
-    ``coef_`` instead of ``sparse_coef_``. This significantly improves
-    test time performance.
+- Sparse estimators in the :ref:`sgd` module use dense parameter vector
+  ``coef_`` instead of ``sparse_coef_``. This significantly improves
+  test time performance.
 
-  - The :ref:`covariance` module now has a robust estimator of
-    covariance, the Minimum Covariance Determinant estimator.
+- The :ref:`covariance` module now has a robust estimator of
+  covariance, the Minimum Covariance Determinant estimator.
 
-  - Cluster evaluation metrics in :mod:`metrics.cluster` have been refactored
-    but the changes are backwards compatible. They have been moved to the
-    :mod:`metrics.cluster.supervised`, along with
-    :mod:`metrics.cluster.unsupervised` which contains the Silhouette
-    Coefficient.
+- Cluster evaluation metrics in :mod:`metrics.cluster` have been refactored
+  but the changes are backwards compatible. They have been moved to the
+  :mod:`metrics.cluster.supervised`, along with
+  :mod:`metrics.cluster.unsupervised` which contains the Silhouette
+  Coefficient.
 
-  - The ``permutation_test_score`` function now behaves the same way as
-    ``cross_val_score`` (i.e. uses the mean score across the folds.)
+- The ``permutation_test_score`` function now behaves the same way as
+  ``cross_val_score`` (i.e. uses the mean score across the folds.)
 
-  - Cross Validation generators now use integer indices (``indices=True``)
-    by default instead of boolean masks. This make it more intuitive to
-    use with sparse matrix data.
+- Cross Validation generators now use integer indices (``indices=True``)
+  by default instead of boolean masks. This make it more intuitive to
+  use with sparse matrix data.
 
-  - The functions used for sparse coding, ``sparse_encode`` and
-    ``sparse_encode_parallel`` have been combined into
-    :func:`sklearn.decomposition.sparse_encode`, and the shapes of the arrays
-    have been transposed for consistency with the matrix factorization setting,
-    as opposed to the regression setting.
+- The functions used for sparse coding, ``sparse_encode`` and
+  ``sparse_encode_parallel`` have been combined into
+  :func:`sklearn.decomposition.sparse_encode`, and the shapes of the arrays
+  have been transposed for consistency with the matrix factorization setting,
+  as opposed to the regression setting.
 
-  - Fixed an off-by-one error in the SVMlight/LibSVM file format handling;
-    files generated using :func:`sklearn.datasets.dump_svmlight_file` should be
-    re-generated. (They should continue to work, but accidentally had one
-    extra column of zeros prepended.)
+- Fixed an off-by-one error in the SVMlight/LibSVM file format handling;
+  files generated using :func:`sklearn.datasets.dump_svmlight_file` should be
+  re-generated. (They should continue to work, but accidentally had one
+  extra column of zeros prepended.)
 
-  - ``BaseDictionaryLearning`` class replaced by ``SparseCodingMixin``.
+- ``BaseDictionaryLearning`` class replaced by ``SparseCodingMixin``.
 
-  - :func:`sklearn.utils.extmath.fast_svd` has been renamed
-    :func:`sklearn.utils.extmath.randomized_svd` and the default
-    oversampling is now fixed to 10 additional random vectors instead
-    of doubling the number of components to extract. The new behavior
-    follows the reference paper.
+- :func:`sklearn.utils.extmath.fast_svd` has been renamed
+  :func:`sklearn.utils.extmath.randomized_svd` and the default
+  oversampling is now fixed to 10 additional random vectors instead
+  of doubling the number of components to extract. The new behavior
+  follows the reference paper.
 
 
 People
@@ -4791,84 +4911,84 @@ This release also includes the dictionary-learning work developed by
 Changelog
 ---------
 
-   - New :ref:`manifold` module by `Jake Vanderplas`_ and
-     `Fabian Pedregosa`_.
+- New :ref:`manifold` module by `Jake Vanderplas`_ and
+  `Fabian Pedregosa`_.
 
-   - New :ref:`Dirichlet Process <dirichlet_process>` Gaussian Mixture
-     Model by `Alexandre Passos`_
+- New :ref:`Dirichlet Process <dirichlet_process>` Gaussian Mixture
+  Model by `Alexandre Passos`_
 
-   - :ref:`neighbors` module refactoring by `Jake Vanderplas`_ :
-     general refactoring, support for sparse matrices in input, speed and
-     documentation improvements. See the next section for a full list of API
-     changes.
+- :ref:`neighbors` module refactoring by `Jake Vanderplas`_ :
+  general refactoring, support for sparse matrices in input, speed and
+  documentation improvements. See the next section for a full list of API
+  changes.
 
-   - Improvements on the :ref:`feature_selection` module by
-     `Gilles Louppe`_ : refactoring of the RFE classes, documentation
-     rewrite, increased efficiency and minor API changes.
+- Improvements on the :ref:`feature_selection` module by
+  `Gilles Louppe`_ : refactoring of the RFE classes, documentation
+  rewrite, increased efficiency and minor API changes.
 
-   - :ref:`SparsePCA` by `Vlad Niculae`_, `Gael Varoquaux`_ and
-     `Alexandre Gramfort`_
+- :ref:`SparsePCA` by `Vlad Niculae`_, `Gael Varoquaux`_ and
+  `Alexandre Gramfort`_
 
-   - Printing an estimator now behaves independently of architectures
-     and Python version thanks to :user:`Jean Kossaifi <JeanKossaifi>`.
+- Printing an estimator now behaves independently of architectures
+  and Python version thanks to :user:`Jean Kossaifi <JeanKossaifi>`.
 
-   - :ref:`Loader for libsvm/svmlight format <libsvm_loader>` by
-     `Mathieu Blondel`_ and `Lars Buitinck`_
+- :ref:`Loader for libsvm/svmlight format <libsvm_loader>` by
+  `Mathieu Blondel`_ and `Lars Buitinck`_
 
-   - Documentation improvements: thumbnails in
-     :ref:`example gallery <examples-index>` by `Fabian Pedregosa`_.
+- Documentation improvements: thumbnails in
+  example gallery by `Fabian Pedregosa`_.
 
-   - Important bugfixes in :ref:`svm` module (segfaults, bad
-     performance) by `Fabian Pedregosa`_.
+- Important bugfixes in :ref:`svm` module (segfaults, bad
+  performance) by `Fabian Pedregosa`_.
 
-   - Added :ref:`multinomial_naive_bayes` and :ref:`bernoulli_naive_bayes`
-     by `Lars Buitinck`_
+- Added :ref:`multinomial_naive_bayes` and :ref:`bernoulli_naive_bayes`
+  by `Lars Buitinck`_
 
-   - Text feature extraction optimizations by Lars Buitinck
+- Text feature extraction optimizations by Lars Buitinck
 
-   - Chi-Square feature selection
-     (:func:`feature_selection.univariate_selection.chi2`) by `Lars Buitinck`_.
+- Chi-Square feature selection
+  (:func:`feature_selection.univariate_selection.chi2`) by `Lars Buitinck`_.
 
-   - :ref:`sample_generators` module refactoring by `Gilles Louppe`_
+- :ref:`sample_generators` module refactoring by `Gilles Louppe`_
 
-   - :ref:`multiclass` by `Mathieu Blondel`_
+- :ref:`multiclass` by `Mathieu Blondel`_
 
-   - Ball tree rewrite by `Jake Vanderplas`_
+- Ball tree rewrite by `Jake Vanderplas`_
 
-   - Implementation of :ref:`dbscan` algorithm by Robert Layton
+- Implementation of :ref:`dbscan` algorithm by Robert Layton
 
-   - Kmeans predict and transform by Robert Layton
+- Kmeans predict and transform by Robert Layton
 
-   - Preprocessing module refactoring by `Olivier Grisel`_
+- Preprocessing module refactoring by `Olivier Grisel`_
 
-   - Faster mean shift by Conrad Lee
+- Faster mean shift by Conrad Lee
 
-   - New ``Bootstrap``, :ref:`ShuffleSplit` and various other
-     improvements in cross validation schemes by `Olivier Grisel`_ and
-     `Gael Varoquaux`_
+- New ``Bootstrap``, :ref:`ShuffleSplit` and various other
+  improvements in cross validation schemes by `Olivier Grisel`_ and
+  `Gael Varoquaux`_
 
-   - Adjusted Rand index and V-Measure clustering evaluation metrics by `Olivier Grisel`_
+- Adjusted Rand index and V-Measure clustering evaluation metrics by `Olivier Grisel`_
 
-   - Added :class:`Orthogonal Matching Pursuit <linear_model.OrthogonalMatchingPursuit>` by `Vlad Niculae`_
+- Added :class:`Orthogonal Matching Pursuit <linear_model.OrthogonalMatchingPursuit>` by `Vlad Niculae`_
 
-   - Added 2D-patch extractor utilities in the :ref:`feature_extraction` module by `Vlad Niculae`_
+- Added 2D-patch extractor utilities in the :ref:`feature_extraction` module by `Vlad Niculae`_
 
-   - Implementation of :class:`linear_model.LassoLarsCV`
-     (cross-validated Lasso solver using the Lars algorithm) and
-     :class:`linear_model.LassoLarsIC` (BIC/AIC model
-     selection in Lars) by `Gael Varoquaux`_
-     and `Alexandre Gramfort`_
+- Implementation of :class:`linear_model.LassoLarsCV`
+  (cross-validated Lasso solver using the Lars algorithm) and
+  :class:`linear_model.LassoLarsIC` (BIC/AIC model
+  selection in Lars) by `Gael Varoquaux`_
+  and `Alexandre Gramfort`_
 
-   - Scalability improvements to :func:`metrics.roc_curve` by Olivier Hervieu
+- Scalability improvements to :func:`metrics.roc_curve` by Olivier Hervieu
 
-   - Distance helper functions :func:`metrics.pairwise.pairwise_distances`
-     and :func:`metrics.pairwise.pairwise_kernels` by Robert Layton
+- Distance helper functions :func:`metrics.pairwise.pairwise_distances`
+  and :func:`metrics.pairwise.pairwise_kernels` by Robert Layton
 
-   - :class:`Mini-Batch K-Means <cluster.MiniBatchKMeans>` by Nelle Varoquaux and Peter Prettenhofer.
+- :class:`Mini-Batch K-Means <cluster.MiniBatchKMeans>` by Nelle Varoquaux and Peter Prettenhofer.
 
-   - :ref:`mldata` utilities by Pietro Berkes.
+- :ref:`mldata` utilities by Pietro Berkes.
 
-   - :ref:`olivetti_faces` by `David Warde-Farley`_.
+- :ref:`olivetti_faces` by `David Warde-Farley`_.
 
 
 API changes summary
@@ -4877,71 +4997,71 @@ API changes summary
 Here are the code migration instructions when upgrading from scikit-learn
 version 0.8:
 
-  - The ``scikits.learn`` package was renamed ``sklearn``. There is
-    still a ``scikits.learn`` package alias for backward compatibility.
+- The ``scikits.learn`` package was renamed ``sklearn``. There is
+  still a ``scikits.learn`` package alias for backward compatibility.
 
-    Third-party projects with a dependency on scikit-learn 0.9+ should
-    upgrade their codebase. For instance, under Linux / MacOSX just run
-    (make a backup first!)::
+  Third-party projects with a dependency on scikit-learn 0.9+ should
+  upgrade their codebase. For instance, under Linux / MacOSX just run
+  (make a backup first!)::
 
       find -name "*.py" | xargs sed -i 's/\bscikits.learn\b/sklearn/g'
 
-  - Estimators no longer accept model parameters as ``fit`` arguments:
-    instead all parameters must be only be passed as constructor
-    arguments or using the now public ``set_params`` method inherited
-    from :class:`base.BaseEstimator`.
+- Estimators no longer accept model parameters as ``fit`` arguments:
+  instead all parameters must be only be passed as constructor
+  arguments or using the now public ``set_params`` method inherited
+  from :class:`base.BaseEstimator`.
 
-    Some estimators can still accept keyword arguments on the ``fit``
-    but this is restricted to data-dependent values (e.g. a Gram matrix
-    or an affinity matrix that are precomputed from the ``X`` data matrix.
+  Some estimators can still accept keyword arguments on the ``fit``
+  but this is restricted to data-dependent values (e.g. a Gram matrix
+  or an affinity matrix that are precomputed from the ``X`` data matrix.
 
-  - The ``cross_val`` package has been renamed to ``cross_validation``
-    although there is also a ``cross_val`` package alias in place for
-    backward compatibility.
+- The ``cross_val`` package has been renamed to ``cross_validation``
+  although there is also a ``cross_val`` package alias in place for
+  backward compatibility.
 
-    Third-party projects with a dependency on scikit-learn 0.9+ should
-    upgrade their codebase. For instance, under Linux / MacOSX just run
-    (make a backup first!)::
+  Third-party projects with a dependency on scikit-learn 0.9+ should
+  upgrade their codebase. For instance, under Linux / MacOSX just run
+  (make a backup first!)::
 
       find -name "*.py" | xargs sed -i 's/\bcross_val\b/cross_validation/g'
 
-  - The ``score_func`` argument of the
-    ``sklearn.cross_validation.cross_val_score`` function is now expected
-    to accept ``y_test`` and ``y_predicted`` as only arguments for
-    classification and regression tasks or ``X_test`` for unsupervised
-    estimators.
+- The ``score_func`` argument of the
+  ``sklearn.cross_validation.cross_val_score`` function is now expected
+  to accept ``y_test`` and ``y_predicted`` as only arguments for
+  classification and regression tasks or ``X_test`` for unsupervised
+  estimators.
 
-  - ``gamma`` parameter for support vector machine algorithms is set
-    to ``1 / n_features`` by default, instead of ``1 / n_samples``.
+- ``gamma`` parameter for support vector machine algorithms is set
+  to ``1 / n_features`` by default, instead of ``1 / n_samples``.
 
-  - The ``sklearn.hmm`` has been marked as orphaned: it will be removed
-    from scikit-learn in version 0.11 unless someone steps up to
-    contribute documentation, examples and fix lurking numerical
-    stability issues.
+- The ``sklearn.hmm`` has been marked as orphaned: it will be removed
+  from scikit-learn in version 0.11 unless someone steps up to
+  contribute documentation, examples and fix lurking numerical
+  stability issues.
 
-  - ``sklearn.neighbors`` has been made into a submodule.  The two previously
-    available estimators, ``NeighborsClassifier`` and ``NeighborsRegressor``
-    have been marked as deprecated.  Their functionality has been divided
-    among five new classes: ``NearestNeighbors`` for unsupervised neighbors
-    searches, ``KNeighborsClassifier`` & ``RadiusNeighborsClassifier``
-    for supervised classification problems, and ``KNeighborsRegressor``
-    & ``RadiusNeighborsRegressor`` for supervised regression problems.
+- ``sklearn.neighbors`` has been made into a submodule.  The two previously
+  available estimators, ``NeighborsClassifier`` and ``NeighborsRegressor``
+  have been marked as deprecated.  Their functionality has been divided
+  among five new classes: ``NearestNeighbors`` for unsupervised neighbors
+  searches, ``KNeighborsClassifier`` & ``RadiusNeighborsClassifier``
+  for supervised classification problems, and ``KNeighborsRegressor``
+  & ``RadiusNeighborsRegressor`` for supervised regression problems.
 
-  - ``sklearn.ball_tree.BallTree`` has been moved to
-    ``sklearn.neighbors.BallTree``.  Using the former will generate a warning.
+- ``sklearn.ball_tree.BallTree`` has been moved to
+  ``sklearn.neighbors.BallTree``.  Using the former will generate a warning.
 
-  - ``sklearn.linear_model.LARS()`` and related classes (LassoLARS,
-    LassoLARSCV, etc.) have been renamed to
-    ``sklearn.linear_model.Lars()``.
+- ``sklearn.linear_model.LARS()`` and related classes (LassoLARS,
+  LassoLARSCV, etc.) have been renamed to
+  ``sklearn.linear_model.Lars()``.
 
-  - All distance metrics and kernels in ``sklearn.metrics.pairwise`` now have a Y
-    parameter, which by default is None. If not given, the result is the distance
-    (or kernel similarity) between each sample in Y. If given, the result is the
-    pairwise distance (or kernel similarity) between samples in X to Y.
+- All distance metrics and kernels in ``sklearn.metrics.pairwise`` now have a Y
+  parameter, which by default is None. If not given, the result is the distance
+  (or kernel similarity) between each sample in Y. If given, the result is the
+  pairwise distance (or kernel similarity) between samples in X to Y.
 
-  - ``sklearn.metrics.pairwise.l1_distance`` is now called ``manhattan_distance``,
-    and by default returns the pairwise distance. For the component wise distance,
-    set the parameter ``sum_over_features`` to ``False``.
+- ``sklearn.metrics.pairwise.l1_distance`` is now called ``manhattan_distance``,
+  and by default returns the pairwise distance. For the component wise distance,
+  set the parameter ``sum_over_features`` to ``False``.
 
 Backward compatibility package aliases and other deprecated classes and
 functions will be removed in version 0.11.
@@ -4952,42 +5072,42 @@ People
 
 38 people contributed to this release.
 
-   - 387  `Vlad Niculae`_
-   - 320  `Olivier Grisel`_
-   - 192  `Lars Buitinck`_
-   - 179  `Gael Varoquaux`_
-   - 168  `Fabian Pedregosa`_ (`INRIA`_, `Parietal Team`_)
-   - 127  `Jake Vanderplas`_
-   - 120  `Mathieu Blondel`_
-   - 85  `Alexandre Passos`_
-   - 67  `Alexandre Gramfort`_
-   - 57  `Peter Prettenhofer`_
-   - 56  `Gilles Louppe`_
-   - 42  Robert Layton
-   - 38  Nelle Varoquaux
-   - 32  :user:`Jean Kossaifi <JeanKossaifi>`
-   - 30  Conrad Lee
-   - 22  Pietro Berkes
-   - 18  andy
-   - 17  David Warde-Farley
-   - 12  Brian Holt
-   - 11  Robert
-   - 8  Amit Aides
-   - 8  :user:`Virgile Fritsch <VirgileFritsch>`
-   - 7  `Yaroslav Halchenko`_
-   - 6  Salvatore Masecchia
-   - 5  Paolo Losi
-   - 4  Vincent Schut
-   - 3  Alexis Metaireau
-   - 3  Bryan Silverthorn
-   - 3  `Andreas Müller`_
-   - 2  Minwoo Jake Lee
-   - 1  Emmanuelle Gouillart
-   - 1  Keith Goodman
-   - 1  Lucas Wiman
-   - 1  `Nicolas Pinto`_
-   - 1  Thouis (Ray) Jones
-   - 1  Tim Sheerman-Chase
+- 387  `Vlad Niculae`_
+- 320  `Olivier Grisel`_
+- 192  `Lars Buitinck`_
+- 179  `Gael Varoquaux`_
+- 168  `Fabian Pedregosa`_ (`INRIA`_, `Parietal Team`_)
+- 127  `Jake Vanderplas`_
+- 120  `Mathieu Blondel`_
+- 85  `Alexandre Passos`_
+- 67  `Alexandre Gramfort`_
+- 57  `Peter Prettenhofer`_
+- 56  `Gilles Louppe`_
+- 42  Robert Layton
+- 38  Nelle Varoquaux
+- 32  :user:`Jean Kossaifi <JeanKossaifi>`
+- 30  Conrad Lee
+- 22  Pietro Berkes
+- 18  andy
+- 17  David Warde-Farley
+- 12  Brian Holt
+- 11  Robert
+- 8  Amit Aides
+- 8  :user:`Virgile Fritsch <VirgileFritsch>`
+- 7  `Yaroslav Halchenko`_
+- 6  Salvatore Masecchia
+- 5  Paolo Losi
+- 4  Vincent Schut
+- 3  Alexis Metaireau
+- 3  Bryan Silverthorn
+- 3  `Andreas Müller`_
+- 2  Minwoo Jake Lee
+- 1  Emmanuelle Gouillart
+- 1  Keith Goodman
+- 1  Lucas Wiman
+- 1  `Nicolas Pinto`_
+- 1  Thouis (Ray) Jones
+- 1  Tim Sheerman-Chase
 
 
 .. _changes_0_8:
@@ -5010,53 +5130,53 @@ Changelog
 
 Several new modules where introduced during this release:
 
-  - New :ref:`hierarchical_clustering` module by Vincent Michel,
-    `Bertrand Thirion`_, `Alexandre Gramfort`_ and `Gael Varoquaux`_.
+- New :ref:`hierarchical_clustering` module by Vincent Michel,
+  `Bertrand Thirion`_, `Alexandre Gramfort`_ and `Gael Varoquaux`_.
 
-  - :ref:`kernel_pca` implementation by `Mathieu Blondel`_
+- :ref:`kernel_pca` implementation by `Mathieu Blondel`_
 
-  - :ref:`labeled_faces_in_the_wild` by `Olivier Grisel`_.
+- :ref:`labeled_faces_in_the_wild` by `Olivier Grisel`_.
 
-  - New :ref:`cross_decomposition` module by `Edouard Duchesnay`_.
+- New :ref:`cross_decomposition` module by `Edouard Duchesnay`_.
 
-  - :ref:`NMF` module `Vlad Niculae`_
+- :ref:`NMF` module `Vlad Niculae`_
 
-  - Implementation of the :ref:`oracle_approximating_shrinkage` algorithm by
-    :user:`Virgile Fritsch <VirgileFritsch>` in the :ref:`covariance` module.
+- Implementation of the :ref:`oracle_approximating_shrinkage` algorithm by
+  :user:`Virgile Fritsch <VirgileFritsch>` in the :ref:`covariance` module.
 
 
 Some other modules benefited from significant improvements or cleanups.
 
 
-  - Initial support for Python 3: builds and imports cleanly,
-    some modules are usable while others have failing tests by `Fabian Pedregosa`_.
+- Initial support for Python 3: builds and imports cleanly,
+  some modules are usable while others have failing tests by `Fabian Pedregosa`_.
 
-  - :class:`decomposition.PCA` is now usable from the Pipeline object by `Olivier Grisel`_.
+- :class:`decomposition.PCA` is now usable from the Pipeline object by `Olivier Grisel`_.
 
-  - Guide :ref:`performance-howto` by `Olivier Grisel`_.
+- Guide :ref:`performance-howto` by `Olivier Grisel`_.
 
-  - Fixes for memory leaks in libsvm bindings, 64-bit safer BallTree by Lars Buitinck.
+- Fixes for memory leaks in libsvm bindings, 64-bit safer BallTree by Lars Buitinck.
 
-  - bug and style fixing in :ref:`k_means` algorithm by Jan Schlüter.
+- bug and style fixing in :ref:`k_means` algorithm by Jan Schlüter.
 
-  - Add attribute converged to Gaussian Mixture Models by Vincent Schut.
+- Add attribute converged to Gaussian Mixture Models by Vincent Schut.
 
-  - Implemented ``transform``, ``predict_log_proba`` in
-    :class:`discriminant_analysis.LinearDiscriminantAnalysis` By `Mathieu Blondel`_.
+- Implemented ``transform``, ``predict_log_proba`` in
+  :class:`discriminant_analysis.LinearDiscriminantAnalysis` By `Mathieu Blondel`_.
 
-  - Refactoring in the :ref:`svm` module and bug fixes by `Fabian Pedregosa`_,
-    `Gael Varoquaux`_ and Amit Aides.
+- Refactoring in the :ref:`svm` module and bug fixes by `Fabian Pedregosa`_,
+  `Gael Varoquaux`_ and Amit Aides.
 
-  - Refactored SGD module (removed code duplication, better variable naming),
-    added interface for sample weight by `Peter Prettenhofer`_.
+- Refactored SGD module (removed code duplication, better variable naming),
+  added interface for sample weight by `Peter Prettenhofer`_.
 
-  - Wrapped BallTree with Cython by Thouis (Ray) Jones.
+- Wrapped BallTree with Cython by Thouis (Ray) Jones.
 
-  - Added function :func:`svm.l1_min_c` by Paolo Losi.
+- Added function :func:`svm.l1_min_c` by Paolo Losi.
 
-  - Typos, doc style, etc. by `Yaroslav Halchenko`_, `Gael Varoquaux`_,
-    `Olivier Grisel`_, Yann Malet, `Nicolas Pinto`_, Lars Buitinck and
-    `Fabian Pedregosa`_.
+- Typos, doc style, etc. by `Yaroslav Halchenko`_, `Gael Varoquaux`_,
+  `Olivier Grisel`_, Yann Malet, `Nicolas Pinto`_, Lars Buitinck and
+  `Fabian Pedregosa`_.
 
 
 People
@@ -5065,17 +5185,17 @@ People
 People that made this release possible preceded by number of commits:
 
 
-   - 159  `Olivier Grisel`_
-   - 96  `Gael Varoquaux`_
-   - 96  `Vlad Niculae`_
-   - 94  `Fabian Pedregosa`_
-   - 36  `Alexandre Gramfort`_
-   - 32  Paolo Losi
-   - 31  `Edouard Duchesnay`_
-   - 30  `Mathieu Blondel`_
-   - 25  `Peter Prettenhofer`_
-   - 22  `Nicolas Pinto`_
-   - 11  :user:`Virgile Fritsch <VirgileFritsch>`
+- 159  `Olivier Grisel`_
+- 96  `Gael Varoquaux`_
+- 96  `Vlad Niculae`_
+- 94  `Fabian Pedregosa`_
+- 36  `Alexandre Gramfort`_
+- 32  Paolo Losi
+- 31  `Edouard Duchesnay`_
+- 30  `Mathieu Blondel`_
+- 25  `Peter Prettenhofer`_
+- 22  `Nicolas Pinto`_
+- 11  :user:`Virgile Fritsch <VirgileFritsch>`
    -  7  Lars Buitinck
    -  6  Vincent Michel
    -  5  `Bertrand Thirion`_
@@ -5109,56 +5229,56 @@ preceding release, no new modules where added to this release.
 Changelog
 ---------
 
-  - Performance improvements for Gaussian Mixture Model sampling [Jan
-    Schlüter].
+- Performance improvements for Gaussian Mixture Model sampling [Jan
+  Schlüter].
 
-  - Implementation of efficient leave-one-out cross-validated Ridge in
-    :class:`linear_model.RidgeCV` [`Mathieu Blondel`_]
+- Implementation of efficient leave-one-out cross-validated Ridge in
+  :class:`linear_model.RidgeCV` [`Mathieu Blondel`_]
 
-  - Better handling of collinearity and early stopping in
-    :func:`linear_model.lars_path` [`Alexandre Gramfort`_ and `Fabian
-    Pedregosa`_].
+- Better handling of collinearity and early stopping in
+  :func:`linear_model.lars_path` [`Alexandre Gramfort`_ and `Fabian
+  Pedregosa`_].
 
-  - Fixes for liblinear ordering of labels and sign of coefficients
-    [Dan Yamins, Paolo Losi, `Mathieu Blondel`_ and `Fabian Pedregosa`_].
+- Fixes for liblinear ordering of labels and sign of coefficients
+  [Dan Yamins, Paolo Losi, `Mathieu Blondel`_ and `Fabian Pedregosa`_].
 
-  - Performance improvements for Nearest Neighbors algorithm in
-    high-dimensional spaces [`Fabian Pedregosa`_].
+- Performance improvements for Nearest Neighbors algorithm in
+  high-dimensional spaces [`Fabian Pedregosa`_].
 
-  - Performance improvements for :class:`cluster.KMeans` [`Gael
-    Varoquaux`_ and `James Bergstra`_].
+- Performance improvements for :class:`cluster.KMeans` [`Gael
+  Varoquaux`_ and `James Bergstra`_].
 
-  - Sanity checks for SVM-based classes [`Mathieu Blondel`_].
+- Sanity checks for SVM-based classes [`Mathieu Blondel`_].
 
-  - Refactoring of :class:`neighbors.NeighborsClassifier` and
-    :func:`neighbors.kneighbors_graph`: added different algorithms for
-    the k-Nearest Neighbor Search and implemented a more stable
-    algorithm for finding barycenter weights. Also added some
-    developer documentation for this module, see
-    `notes_neighbors
-    <https://github.com/scikit-learn/scikit-learn/wiki/Neighbors-working-notes>`_ for more information [`Fabian Pedregosa`_].
+- Refactoring of :class:`neighbors.NeighborsClassifier` and
+  :func:`neighbors.kneighbors_graph`: added different algorithms for
+  the k-Nearest Neighbor Search and implemented a more stable
+  algorithm for finding barycenter weights. Also added some
+  developer documentation for this module, see
+  `notes_neighbors
+  <https://github.com/scikit-learn/scikit-learn/wiki/Neighbors-working-notes>`_ for more information [`Fabian Pedregosa`_].
 
-  - Documentation improvements: Added :class:`pca.RandomizedPCA` and
-    :class:`linear_model.LogisticRegression` to the class
-    reference. Also added references of matrices used for clustering
-    and other fixes [`Gael Varoquaux`_, `Fabian Pedregosa`_, `Mathieu
-    Blondel`_, `Olivier Grisel`_, Virgile Fritsch , Emmanuelle
-    Gouillart]
+- Documentation improvements: Added :class:`pca.RandomizedPCA` and
+  :class:`linear_model.LogisticRegression` to the class
+  reference. Also added references of matrices used for clustering
+  and other fixes [`Gael Varoquaux`_, `Fabian Pedregosa`_, `Mathieu
+  Blondel`_, `Olivier Grisel`_, Virgile Fritsch , Emmanuelle
+  Gouillart]
 
-  - Binded decision_function in classes that make use of liblinear_,
-    dense and sparse variants, like :class:`svm.LinearSVC` or
-    :class:`linear_model.LogisticRegression` [`Fabian Pedregosa`_].
+- Binded decision_function in classes that make use of liblinear_,
+  dense and sparse variants, like :class:`svm.LinearSVC` or
+  :class:`linear_model.LogisticRegression` [`Fabian Pedregosa`_].
 
-  - Performance and API improvements to
-    :func:`metrics.euclidean_distances` and to
-    :class:`pca.RandomizedPCA` [`James Bergstra`_].
+- Performance and API improvements to
+  :func:`metrics.euclidean_distances` and to
+  :class:`pca.RandomizedPCA` [`James Bergstra`_].
 
-  - Fix compilation issues under NetBSD [Kamel Ibn Hassen Derouiche]
+- Fix compilation issues under NetBSD [Kamel Ibn Hassen Derouiche]
 
-  - Allow input sequences of different lengths in :class:`hmm.GaussianHMM`
-    [`Ron Weiss`_].
+- Allow input sequences of different lengths in :class:`hmm.GaussianHMM`
+  [`Ron Weiss`_].
 
-  - Fix bug in affinity propagation caused by incorrect indexing [Xinfan Meng]
+- Fix bug in affinity propagation caused by incorrect indexing [Xinfan Meng]
 
 
 People
@@ -5166,23 +5286,23 @@ People
 
 People that made this release possible preceded by number of commits:
 
-    - 85  `Fabian Pedregosa`_
-    - 67  `Mathieu Blondel`_
-    - 20  `Alexandre Gramfort`_
-    - 19  `James Bergstra`_
-    - 14  Dan Yamins
-    - 13  `Olivier Grisel`_
-    - 12  `Gael Varoquaux`_
-    - 4  `Edouard Duchesnay`_
-    - 4  `Ron Weiss`_
-    - 2  Satrajit Ghosh
-    - 2  Vincent Dubourg
-    - 1  Emmanuelle Gouillart
-    - 1  Kamel Ibn Hassen Derouiche
-    - 1  Paolo Losi
-    - 1  VirgileFritsch
-    - 1  `Yaroslav Halchenko`_
-    - 1  Xinfan Meng
+- 85  `Fabian Pedregosa`_
+- 67  `Mathieu Blondel`_
+- 20  `Alexandre Gramfort`_
+- 19  `James Bergstra`_
+- 14  Dan Yamins
+- 13  `Olivier Grisel`_
+- 12  `Gael Varoquaux`_
+- 4  `Edouard Duchesnay`_
+- 4  `Ron Weiss`_
+- 2  Satrajit Ghosh
+- 2  Vincent Dubourg
+- 1  Emmanuelle Gouillart
+- 1  Kamel Ibn Hassen Derouiche
+- 1  Paolo Losi
+- 1  VirgileFritsch
+- 1  `Yaroslav Halchenko`_
+- 1  Xinfan Meng
 
 
 .. _changes_0_6:
@@ -5201,56 +5321,56 @@ applications to real-world datasets.
 Changelog
 ---------
 
-  - New `stochastic gradient
-    <http://scikit-learn.org/stable/modules/sgd.html>`_ descent
-    module by Peter Prettenhofer. The module comes with complete
-    documentation and examples.
+- New `stochastic gradient
+  <http://scikit-learn.org/stable/modules/sgd.html>`_ descent
+  module by Peter Prettenhofer. The module comes with complete
+  documentation and examples.
 
-  - Improved svm module: memory consumption has been reduced by 50%,
-    heuristic to automatically set class weights, possibility to
-    assign weights to samples (see
-    :ref:`sphx_glr_auto_examples_svm_plot_weighted_samples.py` for an example).
+- Improved svm module: memory consumption has been reduced by 50%,
+  heuristic to automatically set class weights, possibility to
+  assign weights to samples (see
+  :ref:`sphx_glr_auto_examples_svm_plot_weighted_samples.py` for an example).
 
-  - New :ref:`gaussian_process` module by Vincent Dubourg. This module
-    also has great documentation and some very neat examples. See
-    example_gaussian_process_plot_gp_regression.py or
-    example_gaussian_process_plot_gp_probabilistic_classification_after_regression.py
-    for a taste of what can be done.
+- New :ref:`gaussian_process` module by Vincent Dubourg. This module
+  also has great documentation and some very neat examples. See
+  example_gaussian_process_plot_gp_regression.py or
+  example_gaussian_process_plot_gp_probabilistic_classification_after_regression.py
+  for a taste of what can be done.
 
-  - It is now possible to use liblinear’s Multi-class SVC (option
-    multi_class in :class:`svm.LinearSVC`)
+- It is now possible to use liblinear’s Multi-class SVC (option
+  multi_class in :class:`svm.LinearSVC`)
 
-  - New features and performance improvements of text feature
-    extraction.
+- New features and performance improvements of text feature
+  extraction.
 
-  - Improved sparse matrix support, both in main classes
-    (:class:`grid_search.GridSearchCV`) as in modules
-    sklearn.svm.sparse and sklearn.linear_model.sparse.
+- Improved sparse matrix support, both in main classes
+  (:class:`grid_search.GridSearchCV`) as in modules
+  sklearn.svm.sparse and sklearn.linear_model.sparse.
 
-  - Lots of cool new examples and a new section that uses real-world
-    datasets was created. These include:
-    :ref:`sphx_glr_auto_examples_applications_plot_face_recognition.py`,
-    :ref:`sphx_glr_auto_examples_applications_plot_species_distribution_modeling.py`,
-    :ref:`sphx_glr_auto_examples_applications_svm_gui.py`,
-    :ref:`sphx_glr_auto_examples_applications_wikipedia_principal_eigenvector.py` and
-    others.
+- Lots of cool new examples and a new section that uses real-world
+  datasets was created. These include:
+  :ref:`sphx_glr_auto_examples_applications_plot_face_recognition.py`,
+  :ref:`sphx_glr_auto_examples_applications_plot_species_distribution_modeling.py`,
+  :ref:`sphx_glr_auto_examples_applications_svm_gui.py`,
+  :ref:`sphx_glr_auto_examples_applications_wikipedia_principal_eigenvector.py` and
+  others.
 
-  - Faster :ref:`least_angle_regression` algorithm. It is now 2x
-    faster than the R version on worst case and up to 10x times faster
-    on some cases.
+- Faster :ref:`least_angle_regression` algorithm. It is now 2x
+  faster than the R version on worst case and up to 10x times faster
+  on some cases.
 
-  - Faster coordinate descent algorithm. In particular, the full path
-    version of lasso (:func:`linear_model.lasso_path`) is more than
-    200x times faster than before.
+- Faster coordinate descent algorithm. In particular, the full path
+  version of lasso (:func:`linear_model.lasso_path`) is more than
+  200x times faster than before.
 
-  - It is now possible to get probability estimates from a
-    :class:`linear_model.LogisticRegression` model.
+- It is now possible to get probability estimates from a
+  :class:`linear_model.LogisticRegression` model.
 
-  - module renaming: the glm module has been renamed to linear_model,
-    the gmm module has been included into the more general mixture
-    model and the sgd module has been included in linear_model.
+- module renaming: the glm module has been renamed to linear_model,
+  the gmm module has been included into the more general mixture
+  model and the sgd module has been included in linear_model.
 
-  - Lots of bug fixes and documentation improvements.
+- Lots of bug fixes and documentation improvements.
 
 
 People
@@ -5300,86 +5420,86 @@ Changelog
 New classes
 -----------
 
-    - Support for sparse matrices in some classifiers of modules
-      ``svm`` and ``linear_model`` (see :class:`svm.sparse.SVC`,
-      :class:`svm.sparse.SVR`, :class:`svm.sparse.LinearSVC`,
-      :class:`linear_model.sparse.Lasso`, :class:`linear_model.sparse.ElasticNet`)
+- Support for sparse matrices in some classifiers of modules
+  ``svm`` and ``linear_model`` (see :class:`svm.sparse.SVC`,
+  :class:`svm.sparse.SVR`, :class:`svm.sparse.LinearSVC`,
+  :class:`linear_model.sparse.Lasso`, :class:`linear_model.sparse.ElasticNet`)
 
-    - New :class:`pipeline.Pipeline` object to compose different estimators.
+- New :class:`pipeline.Pipeline` object to compose different estimators.
 
-    - Recursive Feature Elimination routines in module
-      :ref:`feature_selection`.
+- Recursive Feature Elimination routines in module
+  :ref:`feature_selection`.
 
-    - Addition of various classes capable of cross validation in the
-      linear_model module (:class:`linear_model.LassoCV`, :class:`linear_model.ElasticNetCV`,
-      etc.).
+- Addition of various classes capable of cross validation in the
+  linear_model module (:class:`linear_model.LassoCV`, :class:`linear_model.ElasticNetCV`,
+  etc.).
 
-    - New, more efficient LARS algorithm implementation. The Lasso
-      variant of the algorithm is also implemented. See
-      :class:`linear_model.lars_path`, :class:`linear_model.Lars` and
-      :class:`linear_model.LassoLars`.
+- New, more efficient LARS algorithm implementation. The Lasso
+  variant of the algorithm is also implemented. See
+  :class:`linear_model.lars_path`, :class:`linear_model.Lars` and
+  :class:`linear_model.LassoLars`.
 
-    - New Hidden Markov Models module (see classes
-      :class:`hmm.GaussianHMM`, :class:`hmm.MultinomialHMM`,
-      :class:`hmm.GMMHMM`)
+- New Hidden Markov Models module (see classes
+  :class:`hmm.GaussianHMM`, :class:`hmm.MultinomialHMM`,
+  :class:`hmm.GMMHMM`)
 
-    - New module feature_extraction (see :ref:`class reference
-      <feature_extraction_ref>`)
+- New module feature_extraction (see :ref:`class reference
+  <feature_extraction_ref>`)
 
-    - New FastICA algorithm in module sklearn.fastica
+- New FastICA algorithm in module sklearn.fastica
 
 
 Documentation
 -------------
 
-    - Improved documentation for many modules, now separating
-      narrative documentation from the class reference. As an example,
-      see `documentation for the SVM module
-      <http://scikit-learn.org/stable/modules/svm.html>`_ and the
-      complete `class reference
-      <http://scikit-learn.org/stable/modules/classes.html>`_.
+- Improved documentation for many modules, now separating
+  narrative documentation from the class reference. As an example,
+  see `documentation for the SVM module
+  <http://scikit-learn.org/stable/modules/svm.html>`_ and the
+  complete `class reference
+  <http://scikit-learn.org/stable/modules/classes.html>`_.
 
 Fixes
 -----
 
-    - API changes: adhere variable names to PEP-8, give more
-      meaningful names.
+- API changes: adhere variable names to PEP-8, give more
+  meaningful names.
 
-    - Fixes for svm module to run on a shared memory context
-      (multiprocessing).
+- Fixes for svm module to run on a shared memory context
+  (multiprocessing).
 
-    - It is again possible to generate latex (and thus PDF) from the
-      sphinx docs.
+- It is again possible to generate latex (and thus PDF) from the
+  sphinx docs.
 
 Examples
 --------
 
-    - new examples using some of the mlcomp datasets:
-      ``sphx_glr_auto_examples_mlcomp_sparse_document_classification.py`` (since removed) and
-      :ref:`sphx_glr_auto_examples_text_document_classification_20newsgroups.py`
+- new examples using some of the mlcomp datasets:
+  ``sphx_glr_auto_examples_mlcomp_sparse_document_classification.py`` (since removed) and
+  :ref:`sphx_glr_auto_examples_text_document_classification_20newsgroups.py`
 
-    - Many more examples. `See here
-      <http://scikit-learn.org/stable/auto_examples/index.html>`_
-      the full list of examples.
+- Many more examples. `See here
+  <http://scikit-learn.org/stable/auto_examples/index.html>`_
+  the full list of examples.
 
 
 External dependencies
 ---------------------
 
-    - Joblib is now a dependency of this package, although it is
-      shipped with (sklearn.externals.joblib).
+- Joblib is now a dependency of this package, although it is
+  shipped with (sklearn.externals.joblib).
 
 Removed modules
 ---------------
 
-    - Module ann (Artificial Neural Networks) has been removed from
-      the distribution. Users wanting this sort of algorithms should
-      take a look into pybrain.
+- Module ann (Artificial Neural Networks) has been removed from
+  the distribution. Users wanting this sort of algorithms should
+  take a look into pybrain.
 
 Misc
 ----
 
-    - New sphinx theme for the web page.
+- New sphinx theme for the web page.
 
 
 Authors
@@ -5413,37 +5533,37 @@ Changelog
 
 Major changes in this release include:
 
-    - Coordinate Descent algorithm (Lasso, ElasticNet) refactoring &
-      speed improvements (roughly 100x times faster).
+- Coordinate Descent algorithm (Lasso, ElasticNet) refactoring &
+  speed improvements (roughly 100x times faster).
 
-    - Coordinate Descent Refactoring (and bug fixing) for consistency
-      with R's package GLMNET.
+- Coordinate Descent Refactoring (and bug fixing) for consistency
+  with R's package GLMNET.
 
-    - New metrics module.
+- New metrics module.
 
-    - New GMM module contributed by Ron Weiss.
+- New GMM module contributed by Ron Weiss.
 
-    - Implementation of the LARS algorithm (without Lasso variant for now).
+- Implementation of the LARS algorithm (without Lasso variant for now).
 
-    - feature_selection module redesign.
+- feature_selection module redesign.
 
-    - Migration to GIT as version control system.
+- Migration to GIT as version control system.
 
-    - Removal of obsolete attrselect module.
+- Removal of obsolete attrselect module.
 
-    - Rename of private compiled extensions (added underscore).
+- Rename of private compiled extensions (added underscore).
 
-    - Removal of legacy unmaintained code.
+- Removal of legacy unmaintained code.
 
-    - Documentation improvements (both docstring and rst).
+- Documentation improvements (both docstring and rst).
 
-    - Improvement of the build system to (optionally) link with MKL.
-      Also, provide a lite BLAS implementation in case no system-wide BLAS is
-      found.
+- Improvement of the build system to (optionally) link with MKL.
+  Also, provide a lite BLAS implementation in case no system-wide BLAS is
+  found.
 
-    - Lots of new examples.
+- Lots of new examples.
 
-    - Many, many bug fixes ...
+- Many, many bug fixes ...
 
 
 Authors