From d5b638332975bfbf59ca8c728556aa35e16acb82 Mon Sep 17 00:00:00 2001 From: genvalen Date: Fri, 7 Jan 2022 23:53:20 -0500 Subject: [PATCH 1/8] Update gbc docs to include accepted range of scalar params --- sklearn/ensemble/_gb.py | 39 +++++++++++++++++++++++---------------- 1 file changed, 23 insertions(+), 16 deletions(-) diff --git a/sklearn/ensemble/_gb.py b/sklearn/ensemble/_gb.py index f4cffd3665716..65b126238060d 100644 --- a/sklearn/ensemble/_gb.py +++ b/sklearn/ensemble/_gb.py @@ -920,11 +920,13 @@ class GradientBoostingClassifier(ClassifierMixin, BaseGradientBoosting): learning_rate : float, default=0.1 Learning rate shrinks the contribution of each tree by `learning_rate`. There is a trade-off between learning_rate and n_estimators. + Values must be in the range `(0.0, inf)`. n_estimators : int, default=100 The number of boosting stages to perform. Gradient boosting is fairly robust to over-fitting so a large number usually results in better performance. + Values must be in the range `[1, inf)`. subsample : float, default=1.0 The fraction of samples to be used for fitting the individual base @@ -932,6 +934,7 @@ class GradientBoostingClassifier(ClassifierMixin, BaseGradientBoosting): Boosting. `subsample` interacts with the parameter `n_estimators`. Choosing `subsample < 1.0` leads to a reduction of variance and an increase in bias. + Values must be in the range `(0, 1]`. criterion : {'friedman_mse', 'squared_error', 'mse', 'mae'}, \ default='friedman_mse' @@ -957,10 +960,9 @@ class GradientBoostingClassifier(ClassifierMixin, BaseGradientBoosting): min_samples_split : int or float, default=2 The minimum number of samples required to split an internal node: - - If int, then consider `min_samples_split` as the minimum number. - - If float, then `min_samples_split` is a fraction and - `ceil(min_samples_split * n_samples)` are the minimum - number of samples for each split. + - If int, values must be in the range `[2, n_samples]`. + - If float, values must be in the range `(0, 1]` and `min_samples_split` + will be `ceil(min_samples_split * n_samples)`. .. versionchanged:: 0.18 Added float values for fractions. @@ -972,10 +974,9 @@ class GradientBoostingClassifier(ClassifierMixin, BaseGradientBoosting): right branches. This may have the effect of smoothing the model, especially in regression. - - If int, then consider `min_samples_leaf` as the minimum number. - - If float, then `min_samples_leaf` is a fraction and - `ceil(min_samples_leaf * n_samples)` are the minimum - number of samples for each node. + - If int, values must be in the range `[1, n_samples]`. + - If float, values must be in the range `(0, 0.5]` and `min_samples_leaf` + will be `ceil(min_samples_leaf * n_samples)`. .. versionchanged:: 0.18 Added float values for fractions. @@ -984,16 +985,18 @@ class GradientBoostingClassifier(ClassifierMixin, BaseGradientBoosting): The minimum weighted fraction of the sum total of weights (of all the input samples) required to be at a leaf node. Samples have equal weight when sample_weight is not provided. + Values must be in the range `[0, 0.5]`. max_depth : int, default=3 The maximum depth of the individual regression estimators. The maximum depth limits the number of nodes in the tree. Tune this parameter for best performance; the best value depends on the interaction of the input variables. + Values must be in the range `[1, inf)`. min_impurity_decrease : float, default=0.0 A node will be split if this split induces a decrease of the impurity - greater than or equal to this value. + greater than or equal to this value. Values must be in the range `[0.0, inf)`. The weighted impurity decrease equation is the following:: @@ -1028,10 +1031,9 @@ class GradientBoostingClassifier(ClassifierMixin, BaseGradientBoosting): max_features : {'auto', 'sqrt', 'log2'}, int or float, default=None The number of features to consider when looking for the best split: - - If int, then consider `max_features` features at each split. - - If float, then `max_features` is a fraction and - `int(max_features * n_features)` features are considered at each - split. + - If int, values must be in the range `[1, number of features]`. + - If float, values must be in the range `(0, 1]` and the features + considered at each split will be `int(max_features * n_features)`. - If 'auto', then `max_features=sqrt(n_features)`. - If 'sqrt', then `max_features=sqrt(n_features)`. - If 'log2', then `max_features=log2(n_features)`. @@ -1048,11 +1050,13 @@ class GradientBoostingClassifier(ClassifierMixin, BaseGradientBoosting): Enable verbose output. If 1 then it prints progress and performance once in a while (the more trees the lower the frequency). If greater than 1 then it prints progress and performance for every tree. + Values must be in the range `[0, inf)`. max_leaf_nodes : int, default=None Grow trees with ``max_leaf_nodes`` in best-first fashion. Best nodes are defined as relative reduction in impurity. - If None then unlimited number of leaf nodes. + Values must be in the range `[2, inf)`. + If None, then unlimited number of leaf nodes. warm_start : bool, default=False When set to ``True``, reuse the solution of the previous call to fit @@ -1074,6 +1078,7 @@ class GradientBoostingClassifier(ClassifierMixin, BaseGradientBoosting): data as validation and terminate training when validation score is not improving in all of the previous ``n_iter_no_change`` numbers of iterations. The split is stratified. + Values must be in the range `[1, inf)`. .. versionadded:: 0.20 @@ -1081,14 +1086,16 @@ class GradientBoostingClassifier(ClassifierMixin, BaseGradientBoosting): Tolerance for the early stopping. When the loss is not improving by at least tol for ``n_iter_no_change`` iterations (if set to a number), the training stops. + Values must be in the range `(0, inf)`. .. versionadded:: 0.20 ccp_alpha : non-negative float, default=0.0 Complexity parameter used for Minimal Cost-Complexity Pruning. The subtree with the largest cost complexity that is smaller than - ``ccp_alpha`` will be chosen. By default, no pruning is performed. See - :ref:`minimal_cost_complexity_pruning` for details. + ``ccp_alpha`` will be chosen. By default, no pruning is performed. + Values must be in the range `[0, inf)`. + See :ref:`minimal_cost_complexity_pruning` for details. .. versionadded:: 0.22 From 236c8eabca385a59822cd3350408f32ed7bb988b Mon Sep 17 00:00:00 2001 From: genvalen Date: Sat, 8 Jan 2022 19:38:28 -0500 Subject: [PATCH 2/8] Update gbr docs to include accepted range of scalar params --- sklearn/ensemble/_gb.py | 46 +++++++++++++++++++++++++---------------- 1 file changed, 28 insertions(+), 18 deletions(-) diff --git a/sklearn/ensemble/_gb.py b/sklearn/ensemble/_gb.py index 65b126238060d..e6596ad011a2c 100644 --- a/sklearn/ensemble/_gb.py +++ b/sklearn/ensemble/_gb.py @@ -996,7 +996,8 @@ class GradientBoostingClassifier(ClassifierMixin, BaseGradientBoosting): min_impurity_decrease : float, default=0.0 A node will be split if this split induces a decrease of the impurity - greater than or equal to this value. Values must be in the range `[0.0, inf)`. + greater than or equal to this value. + Values must be in the range `[0.0, inf)`. The weighted impurity decrease equation is the following:: @@ -1065,7 +1066,7 @@ class GradientBoostingClassifier(ClassifierMixin, BaseGradientBoosting): validation_fraction : float, default=0.1 The proportion of training data to set aside as validation set for - early stopping. Must be between 0 and 1. + early stopping. Values must be in the range `(0, 1)`. Only used if ``n_iter_no_change`` is set to an integer. .. versionadded:: 0.20 @@ -1504,11 +1505,13 @@ class GradientBoostingRegressor(RegressorMixin, BaseGradientBoosting): learning_rate : float, default=0.1 Learning rate shrinks the contribution of each tree by `learning_rate`. There is a trade-off between learning_rate and n_estimators. + Values must be in the range `(0.0, inf)`. n_estimators : int, default=100 The number of boosting stages to perform. Gradient boosting is fairly robust to over-fitting so a large number usually results in better performance. + Values must be in the range `[1, inf)`. subsample : float, default=1.0 The fraction of samples to be used for fitting the individual base @@ -1516,6 +1519,7 @@ class GradientBoostingRegressor(RegressorMixin, BaseGradientBoosting): Boosting. `subsample` interacts with the parameter `n_estimators`. Choosing `subsample < 1.0` leads to a reduction of variance and an increase in bias. + Values must be in the range `(0, 1]`. criterion : {'friedman_mse', 'squared_error', 'mse', 'mae'}, \ default='friedman_mse' @@ -1540,10 +1544,9 @@ class GradientBoostingRegressor(RegressorMixin, BaseGradientBoosting): min_samples_split : int or float, default=2 The minimum number of samples required to split an internal node: - - If int, then consider `min_samples_split` as the minimum number. - - If float, then `min_samples_split` is a fraction and - `ceil(min_samples_split * n_samples)` are the minimum - number of samples for each split. + - If int, values must be in the range `[2, n_samples]`. + - If float, values must be in the range `(0, 1]` and `min_samples_split` + will be `ceil(min_samples_split * n_samples)`. .. versionchanged:: 0.18 Added float values for fractions. @@ -1555,10 +1558,9 @@ class GradientBoostingRegressor(RegressorMixin, BaseGradientBoosting): right branches. This may have the effect of smoothing the model, especially in regression. - - If int, then consider `min_samples_leaf` as the minimum number. - - If float, then `min_samples_leaf` is a fraction and - `ceil(min_samples_leaf * n_samples)` are the minimum - number of samples for each node. + - If int, values must be in the range `[1, n_samples]`. + - If float, values must be in the range `(0, 0.5]` and `min_samples_leaf` + will be `ceil(min_samples_leaf * n_samples)`. .. versionchanged:: 0.18 Added float values for fractions. @@ -1567,16 +1569,19 @@ class GradientBoostingRegressor(RegressorMixin, BaseGradientBoosting): The minimum weighted fraction of the sum total of weights (of all the input samples) required to be at a leaf node. Samples have equal weight when sample_weight is not provided. + Values must be in the range `[0, 0.5]`. max_depth : int, default=3 Maximum depth of the individual regression estimators. The maximum depth limits the number of nodes in the tree. Tune this parameter for best performance; the best value depends on the interaction of the input variables. + Values must be in the range `[1, inf)`. min_impurity_decrease : float, default=0.0 A node will be split if this split induces a decrease of the impurity greater than or equal to this value. + Values must be in the range `[0.0, inf)`. The weighted impurity decrease equation is the following:: @@ -1612,10 +1617,9 @@ class GradientBoostingRegressor(RegressorMixin, BaseGradientBoosting): max_features : {'auto', 'sqrt', 'log2'}, int or float, default=None The number of features to consider when looking for the best split: - - If int, then consider `max_features` features at each split. - - If float, then `max_features` is a fraction and - `int(max_features * n_features)` features are considered at each - split. + - If int, values must be in the range `[1, number of features]`. + - If float, values must be in the range `(0, 1]` and the features + considered at each split will be `int(max_features * n_features)`. - If "auto", then `max_features=n_features`. - If "sqrt", then `max_features=sqrt(n_features)`. - If "log2", then `max_features=log2(n_features)`. @@ -1631,16 +1635,19 @@ class GradientBoostingRegressor(RegressorMixin, BaseGradientBoosting): alpha : float, default=0.9 The alpha-quantile of the huber loss function and the quantile loss function. Only if ``loss='huber'`` or ``loss='quantile'``. + Values mus be in the range `(0, 1)`. verbose : int, default=0 Enable verbose output. If 1 then it prints progress and performance once in a while (the more trees the lower the frequency). If greater than 1 then it prints progress and performance for every tree. + Values must be in the range `[0, inf)`. max_leaf_nodes : int, default=None Grow trees with ``max_leaf_nodes`` in best-first fashion. Best nodes are defined as relative reduction in impurity. - If None then unlimited number of leaf nodes. + Values must be in the range `[2, inf)`. + If None, then unlimited number of leaf nodes. warm_start : bool, default=False When set to ``True``, reuse the solution of the previous call to fit @@ -1649,7 +1656,7 @@ class GradientBoostingRegressor(RegressorMixin, BaseGradientBoosting): validation_fraction : float, default=0.1 The proportion of training data to set aside as validation set for - early stopping. Must be between 0 and 1. + early stopping. Values must be in the range `(0, 1)`. Only used if ``n_iter_no_change`` is set to an integer. .. versionadded:: 0.20 @@ -1662,6 +1669,7 @@ class GradientBoostingRegressor(RegressorMixin, BaseGradientBoosting): data as validation and terminate training when validation score is not improving in all of the previous ``n_iter_no_change`` numbers of iterations. + Values must be in the range `[1, inf)`. .. versionadded:: 0.20 @@ -1669,14 +1677,16 @@ class GradientBoostingRegressor(RegressorMixin, BaseGradientBoosting): Tolerance for the early stopping. When the loss is not improving by at least tol for ``n_iter_no_change`` iterations (if set to a number), the training stops. + Values must be in the range `(0, inf)`. .. versionadded:: 0.20 ccp_alpha : non-negative float, default=0.0 Complexity parameter used for Minimal Cost-Complexity Pruning. The subtree with the largest cost complexity that is smaller than - ``ccp_alpha`` will be chosen. By default, no pruning is performed. See - :ref:`minimal_cost_complexity_pruning` for details. + ``ccp_alpha`` will be chosen. By default, no pruning is performed. + Values must be in the range `[0, inf)`. + See :ref:`minimal_cost_complexity_pruning` for details. .. versionadded:: 0.22 From ad783a69c26e36ca1fe2d33a8b23951553362c41 Mon Sep 17 00:00:00 2001 From: genvalen Date: Mon, 10 Jan 2022 22:46:06 -0500 Subject: [PATCH 3/8] update docs for warm_start --- sklearn/ensemble/_gb.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sklearn/ensemble/_gb.py b/sklearn/ensemble/_gb.py index e6596ad011a2c..b01b33597636d 100644 --- a/sklearn/ensemble/_gb.py +++ b/sklearn/ensemble/_gb.py @@ -1062,7 +1062,8 @@ class GradientBoostingClassifier(ClassifierMixin, BaseGradientBoosting): warm_start : bool, default=False When set to ``True``, reuse the solution of the previous call to fit and add more estimators to the ensemble, otherwise, just erase the - previous solution. See :term:`the Glossary `. + previous solution. Values must be `True` or `False`. + See :term:`the Glossary `. validation_fraction : float, default=0.1 The proportion of training data to set aside as validation set for @@ -1652,7 +1653,8 @@ class GradientBoostingRegressor(RegressorMixin, BaseGradientBoosting): warm_start : bool, default=False When set to ``True``, reuse the solution of the previous call to fit and add more estimators to the ensemble, otherwise, just erase the - previous solution. See :term:`the Glossary `. + previous solution. Values must be `True` or `False`. + See :term:`the Glossary `. validation_fraction : float, default=0.1 The proportion of training data to set aside as validation set for From 69eb0c221a70224a5fe9bd61dd2b3a66d3fdfafa Mon Sep 17 00:00:00 2001 From: genvalen Date: Mon, 24 Jan 2022 14:11:50 -0500 Subject: [PATCH 4/8] min_samples_leaf: update upper bound to inf --- sklearn/ensemble/_gb.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sklearn/ensemble/_gb.py b/sklearn/ensemble/_gb.py index b01b33597636d..27d88889b0af7 100644 --- a/sklearn/ensemble/_gb.py +++ b/sklearn/ensemble/_gb.py @@ -974,8 +974,8 @@ class GradientBoostingClassifier(ClassifierMixin, BaseGradientBoosting): right branches. This may have the effect of smoothing the model, especially in regression. - - If int, values must be in the range `[1, n_samples]`. - - If float, values must be in the range `(0, 0.5]` and `min_samples_leaf` + - If int, values must be in the range `[1, inf)`. + - If float, values must be in the range `(0, inf)` and `min_samples_leaf` will be `ceil(min_samples_leaf * n_samples)`. .. versionchanged:: 0.18 @@ -1559,8 +1559,8 @@ class GradientBoostingRegressor(RegressorMixin, BaseGradientBoosting): right branches. This may have the effect of smoothing the model, especially in regression. - - If int, values must be in the range `[1, n_samples]`. - - If float, values must be in the range `(0, 0.5]` and `min_samples_leaf` + - If int, values must be in the range `[1, inf)`. + - If float, values must be in the range `(0, inf)` and `min_samples_leaf` will be `ceil(min_samples_leaf * n_samples)`. .. versionchanged:: 0.18 From 99864f0faf7886de9f0aa7c477d3c582df9d6d74 Mon Sep 17 00:00:00 2001 From: genvalen Date: Mon, 31 Jan 2022 00:47:42 -0500 Subject: [PATCH 5/8] Update bounds for min_samples_split, min_samples_leaf, and slight edit alpha --- sklearn/ensemble/_gb.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/sklearn/ensemble/_gb.py b/sklearn/ensemble/_gb.py index 27d88889b0af7..519e6cf6c7bb3 100644 --- a/sklearn/ensemble/_gb.py +++ b/sklearn/ensemble/_gb.py @@ -960,7 +960,7 @@ class GradientBoostingClassifier(ClassifierMixin, BaseGradientBoosting): min_samples_split : int or float, default=2 The minimum number of samples required to split an internal node: - - If int, values must be in the range `[2, n_samples]`. + - If int, values must be in the range `[2, inf)`. - If float, values must be in the range `(0, 1]` and `min_samples_split` will be `ceil(min_samples_split * n_samples)`. @@ -975,7 +975,7 @@ class GradientBoostingClassifier(ClassifierMixin, BaseGradientBoosting): especially in regression. - If int, values must be in the range `[1, inf)`. - - If float, values must be in the range `(0, inf)` and `min_samples_leaf` + - If float, values must be in the range `(0, 1]` and `min_samples_leaf` will be `ceil(min_samples_leaf * n_samples)`. .. versionchanged:: 0.18 @@ -1545,7 +1545,7 @@ class GradientBoostingRegressor(RegressorMixin, BaseGradientBoosting): min_samples_split : int or float, default=2 The minimum number of samples required to split an internal node: - - If int, values must be in the range `[2, n_samples]`. + - If int, values must be in the range `[2, inf)`. - If float, values must be in the range `(0, 1]` and `min_samples_split` will be `ceil(min_samples_split * n_samples)`. @@ -1560,7 +1560,7 @@ class GradientBoostingRegressor(RegressorMixin, BaseGradientBoosting): especially in regression. - If int, values must be in the range `[1, inf)`. - - If float, values must be in the range `(0, inf)` and `min_samples_leaf` + - If float, values must be in the range `(0, 1]` and `min_samples_leaf` will be `ceil(min_samples_leaf * n_samples)`. .. versionchanged:: 0.18 @@ -1636,7 +1636,7 @@ class GradientBoostingRegressor(RegressorMixin, BaseGradientBoosting): alpha : float, default=0.9 The alpha-quantile of the huber loss function and the quantile loss function. Only if ``loss='huber'`` or ``loss='quantile'``. - Values mus be in the range `(0, 1)`. + Values must be in the range `(0, 1)`. verbose : int, default=0 Enable verbose output. If 1 then it prints progress and performance From 22ea6b192e6be84a4f9e3d49ca4d171649cfa5b9 Mon Sep 17 00:00:00 2001 From: genvalen Date: Mon, 7 Feb 2022 14:56:39 -0500 Subject: [PATCH 6/8] Update sklearn/ensemble/_gb.py Co-authored-by: Thomas J. Fan --- sklearn/ensemble/_gb.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/ensemble/_gb.py b/sklearn/ensemble/_gb.py index 519e6cf6c7bb3..26aabf937611b 100644 --- a/sklearn/ensemble/_gb.py +++ b/sklearn/ensemble/_gb.py @@ -934,7 +934,7 @@ class GradientBoostingClassifier(ClassifierMixin, BaseGradientBoosting): Boosting. `subsample` interacts with the parameter `n_estimators`. Choosing `subsample < 1.0` leads to a reduction of variance and an increase in bias. - Values must be in the range `(0, 1]`. + Values must be in the range `(0.0, 1.0]`. criterion : {'friedman_mse', 'squared_error', 'mse', 'mae'}, \ default='friedman_mse' From 4ffebd2a4aafbe4d6cd7950f636d6bd9529e23e7 Mon Sep 17 00:00:00 2001 From: genvalen Date: Mon, 7 Feb 2022 14:56:48 -0500 Subject: [PATCH 7/8] Update sklearn/ensemble/_gb.py Co-authored-by: Thomas J. Fan --- sklearn/ensemble/_gb.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/ensemble/_gb.py b/sklearn/ensemble/_gb.py index 26aabf937611b..631c6694a0e42 100644 --- a/sklearn/ensemble/_gb.py +++ b/sklearn/ensemble/_gb.py @@ -975,7 +975,7 @@ class GradientBoostingClassifier(ClassifierMixin, BaseGradientBoosting): especially in regression. - If int, values must be in the range `[1, inf)`. - - If float, values must be in the range `(0, 1]` and `min_samples_leaf` + - If float, values must be in the range `(0.0, 1.0]` and `min_samples_leaf` will be `ceil(min_samples_leaf * n_samples)`. .. versionchanged:: 0.18 From 51b53afcc7f9791336fc94482a1ac71f34826716 Mon Sep 17 00:00:00 2001 From: genvalen Date: Mon, 7 Feb 2022 15:04:20 -0500 Subject: [PATCH 8/8] Apply suggestions from code review Co-authored-by: Thomas J. Fan --- sklearn/ensemble/_gb.py | 42 ++++++++++++++++++++--------------------- 1 file changed, 20 insertions(+), 22 deletions(-) diff --git a/sklearn/ensemble/_gb.py b/sklearn/ensemble/_gb.py index 631c6694a0e42..0546754099f5e 100644 --- a/sklearn/ensemble/_gb.py +++ b/sklearn/ensemble/_gb.py @@ -961,7 +961,7 @@ class GradientBoostingClassifier(ClassifierMixin, BaseGradientBoosting): The minimum number of samples required to split an internal node: - If int, values must be in the range `[2, inf)`. - - If float, values must be in the range `(0, 1]` and `min_samples_split` + - If float, values must be in the range `(0.0, 1.0]` and `min_samples_split` will be `ceil(min_samples_split * n_samples)`. .. versionchanged:: 0.18 @@ -985,7 +985,7 @@ class GradientBoostingClassifier(ClassifierMixin, BaseGradientBoosting): The minimum weighted fraction of the sum total of weights (of all the input samples) required to be at a leaf node. Samples have equal weight when sample_weight is not provided. - Values must be in the range `[0, 0.5]`. + Values must be in the range `[0.0, 0.5]`. max_depth : int, default=3 The maximum depth of the individual regression estimators. The maximum @@ -1032,8 +1032,8 @@ class GradientBoostingClassifier(ClassifierMixin, BaseGradientBoosting): max_features : {'auto', 'sqrt', 'log2'}, int or float, default=None The number of features to consider when looking for the best split: - - If int, values must be in the range `[1, number of features]`. - - If float, values must be in the range `(0, 1]` and the features + - If int, values must be in the range `[1, inf)`. + - If float, values must be in the range `(0.0, 1.0]` and the features considered at each split will be `int(max_features * n_features)`. - If 'auto', then `max_features=sqrt(n_features)`. - If 'sqrt', then `max_features=sqrt(n_features)`. @@ -1057,17 +1057,16 @@ class GradientBoostingClassifier(ClassifierMixin, BaseGradientBoosting): Grow trees with ``max_leaf_nodes`` in best-first fashion. Best nodes are defined as relative reduction in impurity. Values must be in the range `[2, inf)`. - If None, then unlimited number of leaf nodes. + If `None`, then unlimited number of leaf nodes. warm_start : bool, default=False When set to ``True``, reuse the solution of the previous call to fit and add more estimators to the ensemble, otherwise, just erase the - previous solution. Values must be `True` or `False`. - See :term:`the Glossary `. + previous solution. See :term:`the Glossary `. validation_fraction : float, default=0.1 The proportion of training data to set aside as validation set for - early stopping. Values must be in the range `(0, 1)`. + early stopping. Values must be in the range `(0.0, 1.0)`. Only used if ``n_iter_no_change`` is set to an integer. .. versionadded:: 0.20 @@ -1088,7 +1087,7 @@ class GradientBoostingClassifier(ClassifierMixin, BaseGradientBoosting): Tolerance for the early stopping. When the loss is not improving by at least tol for ``n_iter_no_change`` iterations (if set to a number), the training stops. - Values must be in the range `(0, inf)`. + Values must be in the range `(0.0, inf)`. .. versionadded:: 0.20 @@ -1096,7 +1095,7 @@ class GradientBoostingClassifier(ClassifierMixin, BaseGradientBoosting): Complexity parameter used for Minimal Cost-Complexity Pruning. The subtree with the largest cost complexity that is smaller than ``ccp_alpha`` will be chosen. By default, no pruning is performed. - Values must be in the range `[0, inf)`. + Values must be in the range `[0.0, inf)`. See :ref:`minimal_cost_complexity_pruning` for details. .. versionadded:: 0.22 @@ -1520,7 +1519,7 @@ class GradientBoostingRegressor(RegressorMixin, BaseGradientBoosting): Boosting. `subsample` interacts with the parameter `n_estimators`. Choosing `subsample < 1.0` leads to a reduction of variance and an increase in bias. - Values must be in the range `(0, 1]`. + Values must be in the range `(0.0, 1.0]`. criterion : {'friedman_mse', 'squared_error', 'mse', 'mae'}, \ default='friedman_mse' @@ -1546,7 +1545,7 @@ class GradientBoostingRegressor(RegressorMixin, BaseGradientBoosting): The minimum number of samples required to split an internal node: - If int, values must be in the range `[2, inf)`. - - If float, values must be in the range `(0, 1]` and `min_samples_split` + - If float, values must be in the range `(0.0, 1.0]` and `min_samples_split` will be `ceil(min_samples_split * n_samples)`. .. versionchanged:: 0.18 @@ -1560,7 +1559,7 @@ class GradientBoostingRegressor(RegressorMixin, BaseGradientBoosting): especially in regression. - If int, values must be in the range `[1, inf)`. - - If float, values must be in the range `(0, 1]` and `min_samples_leaf` + - If float, values must be in the range `(0.0, 1.0]` and `min_samples_leaf` will be `ceil(min_samples_leaf * n_samples)`. .. versionchanged:: 0.18 @@ -1570,7 +1569,7 @@ class GradientBoostingRegressor(RegressorMixin, BaseGradientBoosting): The minimum weighted fraction of the sum total of weights (of all the input samples) required to be at a leaf node. Samples have equal weight when sample_weight is not provided. - Values must be in the range `[0, 0.5]`. + Values must be in the range `[0.0, 0.5]`. max_depth : int, default=3 Maximum depth of the individual regression estimators. The maximum @@ -1618,8 +1617,8 @@ class GradientBoostingRegressor(RegressorMixin, BaseGradientBoosting): max_features : {'auto', 'sqrt', 'log2'}, int or float, default=None The number of features to consider when looking for the best split: - - If int, values must be in the range `[1, number of features]`. - - If float, values must be in the range `(0, 1]` and the features + - If int, values must be in the range `[1, inf)`. + - If float, values must be in the range `(0.0, 1.0]` and the features considered at each split will be `int(max_features * n_features)`. - If "auto", then `max_features=n_features`. - If "sqrt", then `max_features=sqrt(n_features)`. @@ -1636,7 +1635,7 @@ class GradientBoostingRegressor(RegressorMixin, BaseGradientBoosting): alpha : float, default=0.9 The alpha-quantile of the huber loss function and the quantile loss function. Only if ``loss='huber'`` or ``loss='quantile'``. - Values must be in the range `(0, 1)`. + Values must be in the range `(0.0, 1.0)`. verbose : int, default=0 Enable verbose output. If 1 then it prints progress and performance @@ -1653,12 +1652,11 @@ class GradientBoostingRegressor(RegressorMixin, BaseGradientBoosting): warm_start : bool, default=False When set to ``True``, reuse the solution of the previous call to fit and add more estimators to the ensemble, otherwise, just erase the - previous solution. Values must be `True` or `False`. - See :term:`the Glossary `. + previous solution. See :term:`the Glossary `. validation_fraction : float, default=0.1 The proportion of training data to set aside as validation set for - early stopping. Values must be in the range `(0, 1)`. + early stopping. Values must be in the range `(0.0, 1.0)`. Only used if ``n_iter_no_change`` is set to an integer. .. versionadded:: 0.20 @@ -1679,7 +1677,7 @@ class GradientBoostingRegressor(RegressorMixin, BaseGradientBoosting): Tolerance for the early stopping. When the loss is not improving by at least tol for ``n_iter_no_change`` iterations (if set to a number), the training stops. - Values must be in the range `(0, inf)`. + Values must be in the range `(0.0, inf)`. .. versionadded:: 0.20 @@ -1687,7 +1685,7 @@ class GradientBoostingRegressor(RegressorMixin, BaseGradientBoosting): Complexity parameter used for Minimal Cost-Complexity Pruning. The subtree with the largest cost complexity that is smaller than ``ccp_alpha`` will be chosen. By default, no pruning is performed. - Values must be in the range `[0, inf)`. + Values must be in the range `[0.0, inf)`. See :ref:`minimal_cost_complexity_pruning` for details. .. versionadded:: 0.22