From ca334af087e3ab50a4227b208b145a49dfbce233 Mon Sep 17 00:00:00 2001
From: MDouriez <marie.douriez@gmail.com>
Date: Sat, 2 Nov 2019 15:57:59 -0700
Subject: [PATCH 01/11] documentation for random_state in forests

---
 sklearn/ensemble/_forest.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/sklearn/ensemble/_forest.py b/sklearn/ensemble/_forest.py
index a1c6d1b255523..ec93e66aab69e 100644
--- a/sklearn/ensemble/_forest.py
+++ b/sklearn/ensemble/_forest.py
@@ -960,10 +960,11 @@ class RandomForestClassifier(ForestClassifier):
         <n_jobs>` for more details.
 
     random_state : int, RandomState instance or None, optional (default=None)
-        If int, random_state is the seed used by the random number generator;
-        If RandomState instance, random_state is the random number generator;
-        If None, the random number generator is the RandomState instance used
-        by `np.random`.
+        Controls both the randomness of the bootstrapping of the samples used
+        when building trees (if `bootstrap=True``) and the sampling of the
+        features to consider when looking for the best split at each node
+        (if ``max_features < n_features``).
+        See :term:`Glossary <random_state>` for details.
 
     verbose : int, optional (default=0)
         Controls the verbosity when fitting and predicting.

From 70d63d16370d83e6aa70b1d7f89394947c632f36 Mon Sep 17 00:00:00 2001
From: MDouriez <marie.douriez@gmail.com>
Date: Sat, 2 Nov 2019 16:01:49 -0700
Subject: [PATCH 02/11] move note to parameter

---
 sklearn/ensemble/_forest.py | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/sklearn/ensemble/_forest.py b/sklearn/ensemble/_forest.py
index ec93e66aab69e..677f485fdfdc4 100644
--- a/sklearn/ensemble/_forest.py
+++ b/sklearn/ensemble/_forest.py
@@ -964,6 +964,12 @@ class RandomForestClassifier(ForestClassifier):
         when building trees (if `bootstrap=True``) and the sampling of the
         features to consider when looking for the best split at each node
         (if ``max_features < n_features``).
+        The features are always randomly permuted at each split. Therefore,
+        the best found split may vary, even with the same training data,
+        ``max_features=n_features`` and ``bootstrap=False``, if the improvement
+        of the criterion is identical for several splits enumerated during the
+        search of the best split. To obtain a deterministic behaviour during
+        fitting, ``random_state`` has to be fixed.
         See :term:`Glossary <random_state>` for details.
 
     verbose : int, optional (default=0)
@@ -1080,13 +1086,6 @@ class labels (multi-output problem).
     reduce memory consumption, the complexity and size of the trees should be
     controlled by setting those parameter values.
 
-    The features are always randomly permuted at each split. Therefore,
-    the best found split may vary, even with the same training data,
-    ``max_features=n_features`` and ``bootstrap=False``, if the improvement
-    of the criterion is identical for several splits enumerated during the
-    search of the best split. To obtain a deterministic behaviour during
-    fitting, ``random_state`` has to be fixed.
-
     References
     ----------
 

From 8ff9ed872e5172af9ee2a39da34cb2e80498efee Mon Sep 17 00:00:00 2001
From: MDouriez <marie.douriez@gmail.com>
Date: Sat, 2 Nov 2019 16:13:39 -0700
Subject: [PATCH 03/11] same for RandomForestRegressor

---
 sklearn/ensemble/_forest.py | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/sklearn/ensemble/_forest.py b/sklearn/ensemble/_forest.py
index 677f485fdfdc4..5f12e58bb8acb 100644
--- a/sklearn/ensemble/_forest.py
+++ b/sklearn/ensemble/_forest.py
@@ -961,15 +961,15 @@ class RandomForestClassifier(ForestClassifier):
 
     random_state : int, RandomState instance or None, optional (default=None)
         Controls both the randomness of the bootstrapping of the samples used
-        when building trees (if `bootstrap=True``) and the sampling of the
+        when building trees (if ``bootstrap=True``) and the sampling of the
         features to consider when looking for the best split at each node
         (if ``max_features < n_features``).
-        The features are always randomly permuted at each split. Therefore,
-        the best found split may vary, even with the same training data,
-        ``max_features=n_features`` and ``bootstrap=False``, if the improvement
-        of the criterion is identical for several splits enumerated during the
-        search of the best split. To obtain a deterministic behaviour during
-        fitting, ``random_state`` has to be fixed.
+        Also note that the features are always randomly permuted at each split.
+        Therefore, the best found split may vary, even with the same training
+        data, ``max_features=n_features`` and ``bootstrap=False``, if the
+        improvement of the criterion is identical for several splits enumerated
+        during the search of the best split. To obtain a deterministic
+        behaviour during fitting, ``random_state`` has to be fixed.
         See :term:`Glossary <random_state>` for details.
 
     verbose : int, optional (default=0)
@@ -1276,10 +1276,17 @@ class RandomForestRegressor(ForestRegressor):
         <n_jobs>` for more details.
 
     random_state : int, RandomState instance or None, optional (default=None)
-        If int, random_state is the seed used by the random number generator;
-        If RandomState instance, random_state is the random number generator;
-        If None, the random number generator is the RandomState instance used
-        by `np.random`.
+        Controls both the randomness of the bootstrapping of the samples used
+        when building trees (if ``bootstrap=True``) and the sampling of the
+        features to consider when looking for the best split at each node
+        (if ``max_features < n_features``).
+        Also note that the features are always randomly permuted at each split.
+        Therefore, the best found split may vary, even with the same training
+        data, ``max_features=n_features`` and ``bootstrap=False``, if the
+        improvement of the criterion is identical for several splits enumerated
+        during the search of the best split. To obtain a deterministic
+        behaviour during fitting, ``random_state`` has to be fixed.
+        See :term:`Glossary <random_state>` for details.
 
     verbose : int, optional (default=0)
         Controls the verbosity when fitting and predicting.
@@ -1357,13 +1364,6 @@ class RandomForestRegressor(ForestRegressor):
     reduce memory consumption, the complexity and size of the trees should be
     controlled by setting those parameter values.
 
-    The features are always randomly permuted at each split. Therefore,
-    the best found split may vary, even with the same training data,
-    ``max_features=n_features`` and ``bootstrap=False``, if the improvement
-    of the criterion is identical for several splits enumerated during the
-    search of the best split. To obtain a deterministic behaviour during
-    fitting, ``random_state`` has to be fixed.
-
     The default value ``max_features="auto"`` uses ``n_features``
     rather than ``n_features / 3``. The latter was originally suggested in
     [1], whereas the former was more recently justified empirically in [2].

From 48ed17a8084b5275e479788270722b9b587f67e5 Mon Sep 17 00:00:00 2001
From: MDouriez <marie.douriez@gmail.com>
Date: Sat, 2 Nov 2019 16:41:09 -0700
Subject: [PATCH 04/11] add doc for ExtraTreesRegressor and
 ExtraTreesClassifier

---
 sklearn/ensemble/_forest.py | 26 ++++++++++++++++----------
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/sklearn/ensemble/_forest.py b/sklearn/ensemble/_forest.py
index 5f12e58bb8acb..899511dba6225 100644
--- a/sklearn/ensemble/_forest.py
+++ b/sklearn/ensemble/_forest.py
@@ -1538,7 +1538,7 @@ class ExtraTreesClassifier(ForestClassifier):
 
     bootstrap : boolean, optional (default=False)
         Whether bootstrap samples are used when building trees. If False, the
-        whole datset is used to build each tree.
+        whole dataset is used to build each tree.
 
     oob_score : bool, optional (default=False)
         Whether to use out-of-bag samples to estimate
@@ -1552,10 +1552,13 @@ class ExtraTreesClassifier(ForestClassifier):
         <n_jobs>` for more details.
 
     random_state : int, RandomState instance or None, optional (default=None)
-        If int, random_state is the seed used by the random number generator;
-        If RandomState instance, random_state is the random number generator;
-        If None, the random number generator is the RandomState instance used
-        by `np.random`.
+        Controls 3 sources of randomness:
+        - the bootstrapping of the samples used when building trees
+          (if ``bootstrap=True``)
+        - the sampling of the features to consider when looking for the best
+          split at each node (if ``max_features < n_features``)
+        - the draw of the splits for each of the `max_features`
+        See :term:`Glossary <random_state>` for details.
 
     verbose : int, optional (default=0)
         Controls the verbosity when fitting and predicting.
@@ -1842,7 +1845,7 @@ class ExtraTreesRegressor(ForestRegressor):
 
     bootstrap : boolean, optional (default=False)
         Whether bootstrap samples are used when building trees. If False, the
-        whole datset is used to build each tree.
+        whole dataset is used to build each tree.
 
     oob_score : bool, optional (default=False)
         Whether to use out-of-bag samples to estimate the R^2 on unseen data.
@@ -1855,10 +1858,13 @@ class ExtraTreesRegressor(ForestRegressor):
         <n_jobs>` for more details.
 
     random_state : int, RandomState instance or None, optional (default=None)
-        If int, random_state is the seed used by the random number generator;
-        If RandomState instance, random_state is the random number generator;
-        If None, the random number generator is the RandomState instance used
-        by `np.random`.
+        Controls 3 sources of randomness:
+        - the bootstrapping of the samples used when building trees
+          (if ``bootstrap=True``)
+        - the sampling of the features to consider when looking for the best
+          split at each node (if ``max_features < n_features``)
+        - the draw of the splits for each of the `max_features`
+        See :term:`Glossary <random_state>` for details.
 
     verbose : int, optional (default=0)
         Controls the verbosity when fitting and predicting.

From 07204b3ada5f4dea607b69e01d81201fb8ccd204 Mon Sep 17 00:00:00 2001
From: Marie Douriez <mdouriez@lyft.com>
Date: Mon, 18 Nov 2019 09:18:00 -0800
Subject: [PATCH 05/11] skip line

---
 sklearn/ensemble/_forest.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sklearn/ensemble/_forest.py b/sklearn/ensemble/_forest.py
index 899511dba6225..e2ee3b52e17f7 100644
--- a/sklearn/ensemble/_forest.py
+++ b/sklearn/ensemble/_forest.py
@@ -1553,6 +1553,7 @@ class ExtraTreesClassifier(ForestClassifier):
 
     random_state : int, RandomState instance or None, optional (default=None)
         Controls 3 sources of randomness:
+        
         - the bootstrapping of the samples used when building trees
           (if ``bootstrap=True``)
         - the sampling of the features to consider when looking for the best

From 07ae312271879b9012e8264907916d7888a9c7e1 Mon Sep 17 00:00:00 2001
From: Marie Douriez <mdouriez@lyft.com>
Date: Mon, 18 Nov 2019 13:53:06 -0800
Subject: [PATCH 06/11] lint

---
 sklearn/ensemble/_forest.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/ensemble/_forest.py b/sklearn/ensemble/_forest.py
index e2ee3b52e17f7..63ef66387200a 100644
--- a/sklearn/ensemble/_forest.py
+++ b/sklearn/ensemble/_forest.py
@@ -1553,7 +1553,7 @@ class ExtraTreesClassifier(ForestClassifier):
 
     random_state : int, RandomState instance or None, optional (default=None)
         Controls 3 sources of randomness:
-        
+
         - the bootstrapping of the samples used when building trees
           (if ``bootstrap=True``)
         - the sampling of the features to consider when looking for the best

From 3d810f93139b90fb374c492eefeeb63fc9fed5d0 Mon Sep 17 00:00:00 2001
From: MDouriez <marie.douriez@gmail.com>
Date: Mon, 18 Nov 2019 19:34:05 -0800
Subject: [PATCH 07/11] move note back to where it was

---
 sklearn/ensemble/_forest.py | 27 ++++++++++++++-------------
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/sklearn/ensemble/_forest.py b/sklearn/ensemble/_forest.py
index 63ef66387200a..24c00cd896473 100644
--- a/sklearn/ensemble/_forest.py
+++ b/sklearn/ensemble/_forest.py
@@ -964,12 +964,6 @@ class RandomForestClassifier(ForestClassifier):
         when building trees (if ``bootstrap=True``) and the sampling of the
         features to consider when looking for the best split at each node
         (if ``max_features < n_features``).
-        Also note that the features are always randomly permuted at each split.
-        Therefore, the best found split may vary, even with the same training
-        data, ``max_features=n_features`` and ``bootstrap=False``, if the
-        improvement of the criterion is identical for several splits enumerated
-        during the search of the best split. To obtain a deterministic
-        behaviour during fitting, ``random_state`` has to be fixed.
         See :term:`Glossary <random_state>` for details.
 
     verbose : int, optional (default=0)
@@ -1086,6 +1080,13 @@ class labels (multi-output problem).
     reduce memory consumption, the complexity and size of the trees should be
     controlled by setting those parameter values.
 
+    The features are always randomly permuted at each split. Therefore,
+    the best found split may vary, even with the same training data,
+    ``max_features=n_features`` and ``bootstrap=False``, if the improvement
+    of the criterion is identical for several splits enumerated during the
+    search of the best split. To obtain a deterministic behaviour during
+    fitting, ``random_state`` has to be fixed.
+
     References
     ----------
 
@@ -1280,13 +1281,6 @@ class RandomForestRegressor(ForestRegressor):
         when building trees (if ``bootstrap=True``) and the sampling of the
         features to consider when looking for the best split at each node
         (if ``max_features < n_features``).
-        Also note that the features are always randomly permuted at each split.
-        Therefore, the best found split may vary, even with the same training
-        data, ``max_features=n_features`` and ``bootstrap=False``, if the
-        improvement of the criterion is identical for several splits enumerated
-        during the search of the best split. To obtain a deterministic
-        behaviour during fitting, ``random_state`` has to be fixed.
-        See :term:`Glossary <random_state>` for details.
 
     verbose : int, optional (default=0)
         Controls the verbosity when fitting and predicting.
@@ -1364,6 +1358,13 @@ class RandomForestRegressor(ForestRegressor):
     reduce memory consumption, the complexity and size of the trees should be
     controlled by setting those parameter values.
 
+    The features are always randomly permuted at each split. Therefore,
+    the best found split may vary, even with the same training data,
+    ``max_features=n_features`` and ``bootstrap=False``, if the improvement
+    of the criterion is identical for several splits enumerated during the
+    search of the best split. To obtain a deterministic behaviour during
+    fitting, ``random_state`` has to be fixed.
+
     The default value ``max_features="auto"`` uses ``n_features``
     rather than ``n_features / 3``. The latter was originally suggested in
     [1], whereas the former was more recently justified empirically in [2].

From 27e5b62b5740ea036513bed6f919d26f48ed4ae9 Mon Sep 17 00:00:00 2001
From: MDouriez <marie.douriez@gmail.com>
Date: Mon, 18 Nov 2019 19:39:11 -0800
Subject: [PATCH 08/11] add Glossary in RandomForestRegressor

---
 sklearn/ensemble/_forest.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sklearn/ensemble/_forest.py b/sklearn/ensemble/_forest.py
index 24c00cd896473..37634096418a4 100644
--- a/sklearn/ensemble/_forest.py
+++ b/sklearn/ensemble/_forest.py
@@ -1281,6 +1281,7 @@ class RandomForestRegressor(ForestRegressor):
         when building trees (if ``bootstrap=True``) and the sampling of the
         features to consider when looking for the best split at each node
         (if ``max_features < n_features``).
+        See :term:`Glossary <random_state>` for details.
 
     verbose : int, optional (default=0)
         Controls the verbosity when fitting and predicting.

From c8fd2354f058ad2fdc68456041d3215c311b6038 Mon Sep 17 00:00:00 2001
From: MDouriez <marie.douriez@gmail.com>
Date: Mon, 18 Nov 2019 19:50:01 -0800
Subject: [PATCH 09/11] adding description for RandomTreesEmbedding

---
 sklearn/ensemble/_forest.py | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/sklearn/ensemble/_forest.py b/sklearn/ensemble/_forest.py
index 37634096418a4..ab475758e80fa 100644
--- a/sklearn/ensemble/_forest.py
+++ b/sklearn/ensemble/_forest.py
@@ -1862,6 +1862,7 @@ class ExtraTreesRegressor(ForestRegressor):
 
     random_state : int, RandomState instance or None, optional (default=None)
         Controls 3 sources of randomness:
+
         - the bootstrapping of the samples used when building trees
           (if ``bootstrap=True``)
         - the sampling of the features to consider when looking for the best
@@ -2094,10 +2095,14 @@ class RandomTreesEmbedding(BaseForest):
         <n_jobs>` for more details.
 
     random_state : int, RandomState instance or None, optional (default=None)
-        If int, random_state is the seed used by the random number generator;
-        If RandomState instance, random_state is the random number generator;
-        If None, the random number generator is the RandomState instance used
-        by `np.random`.
+        Controls 3 sources of randomness:
+
+        - the bootstrapping of the samples used when building trees
+          (if ``bootstrap=True``)
+        - the sampling of the features to consider when looking for the best
+          split at each node (if ``max_features < n_features``)
+        - the draw of the splits for each of the `max_features`
+        See :term:`Glossary <random_state>` for details.
 
     verbose : int, optional (default=0)
         Controls the verbosity when fitting and predicting.

From bcdd5de7c821789fe450dda032d14bf95f444448 Mon Sep 17 00:00:00 2001
From: MDouriez <marie.douriez@gmail.com>
Date: Mon, 18 Nov 2019 20:24:33 -0800
Subject: [PATCH 10/11] small fix

---
 sklearn/ensemble/_forest.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/ensemble/_forest.py b/sklearn/ensemble/_forest.py
index 20c449d363709..f07baf6dc596a 100644
--- a/sklearn/ensemble/_forest.py
+++ b/sklearn/ensemble/_forest.py
@@ -2102,7 +2102,7 @@ class RandomTreesEmbedding(BaseForest):
 
         - the bootstrapping of the samples used when building trees
           (if ``bootstrap=True``)
-        - the sampling of the features to consider when looking for the best
+        - the sampling of the features to consider when looking for a
           split at each node (if ``max_features < n_features``)
         - the draw of the splits for each of the `max_features`
         See :term:`Glossary <random_state>` for details.

From 5c8c0982b509fb1c4d630288da8279739179d90b Mon Sep 17 00:00:00 2001
From: MDouriez <marie.douriez@gmail.com>
Date: Mon, 18 Nov 2019 21:47:29 -0800
Subject: [PATCH 11/11] correct description for RandomTreesEmbedding

---
 sklearn/ensemble/_forest.py | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/sklearn/ensemble/_forest.py b/sklearn/ensemble/_forest.py
index f07baf6dc596a..e7a0b8e56dde8 100644
--- a/sklearn/ensemble/_forest.py
+++ b/sklearn/ensemble/_forest.py
@@ -2098,13 +2098,8 @@ class RandomTreesEmbedding(BaseForest):
         <n_jobs>` for more details.
 
     random_state : int, RandomState instance or None, optional (default=None)
-        Controls 3 sources of randomness:
-
-        - the bootstrapping of the samples used when building trees
-          (if ``bootstrap=True``)
-        - the sampling of the features to consider when looking for a
-          split at each node (if ``max_features < n_features``)
-        - the draw of the splits for each of the `max_features`
+        Controls the generation of the random `y` used to fit the trees
+        and the draw of the splits for each feature at the trees' nodes.
         See :term:`Glossary <random_state>` for details.
 
     verbose : int, optional (default=0)