From 7227be4799cce5366283dad9951a68e2eb3b7f9a Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Wed, 17 Oct 2018 16:48:27 -0400 Subject: [PATCH 1/3] Added ChangedBehaviorWarning to decision tree about the min_impurity_split --- sklearn/tree/tree.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/sklearn/tree/tree.py b/sklearn/tree/tree.py index 2357ce3f508e4..eab1f53e220e6 100644 --- a/sklearn/tree/tree.py +++ b/sklearn/tree/tree.py @@ -36,6 +36,7 @@ from ..utils import compute_sample_weight from ..utils.multiclass import check_classification_targets from ..utils.validation import check_is_fitted +from ..exceptions import ChangedBehaviorWarning from ._criterion import Criterion from ._splitter import Splitter @@ -291,15 +292,23 @@ def fit(self, X, y, sample_weight=None, check_input=True, np.sum(sample_weight)) if self.min_impurity_split is not None: - warnings.warn("The min_impurity_split parameter is deprecated and" - " will be removed in version 0.21. " - "Use the min_impurity_decrease parameter instead.", - DeprecationWarning) + if self.min_impurity_split != float('-inf'): + warnings.warn( + "The min_impurity_split parameter is deprecated and" + " will be removed in version 0.21. " + "Use the min_impurity_decrease parameter instead.", + DeprecationWarning) min_impurity_split = self.min_impurity_split else: + warnings.warn("The min_impurity_split parameter is deprecated and " + "will be removed in version 0.21. However it still " + "defaults to 1e-7 if it is not set. To silence this " + "warning and get the future behavior from 0.21, " + "set it to float('-inf').", + ChangedBehaviorWarning) min_impurity_split = 1e-7 - if min_impurity_split < 0.: + if min_impurity_split < 0. and min_impurity_split != float('-inf'): raise ValueError("min_impurity_split must be greater than " "or equal to 0") From 66cf284104d9c2509ca841d89a21eea357f6d917 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Tue, 23 Oct 2018 14:16:19 -0400 Subject: [PATCH 2/3] Removed ChangedBehaviorWarning and changed DeprecationWarning message + docstring updates --- sklearn/ensemble/forest.py | 35 ++++++++++--------- sklearn/ensemble/gradient_boosting.py | 14 ++++---- sklearn/tree/tree.py | 48 ++++++++++++--------------- 3 files changed, 50 insertions(+), 47 deletions(-) diff --git a/sklearn/ensemble/forest.py b/sklearn/ensemble/forest.py index 6c3bb93e2c071..054b2a45a018c 100644 --- a/sklearn/ensemble/forest.py +++ b/sklearn/ensemble/forest.py @@ -850,14 +850,15 @@ class RandomForestClassifier(ForestClassifier): .. versionadded:: 0.19 - min_impurity_split : float, + min_impurity_split : float, (default=1e-7) Threshold for early stopping in tree growth. A node will split if its impurity is above the threshold, otherwise it is a leaf. .. deprecated:: 0.19 ``min_impurity_split`` has been deprecated in favor of - ``min_impurity_decrease`` in 0.19 and will be removed in 0.21. - Use ``min_impurity_decrease`` instead. + ``min_impurity_decrease`` in 0.19 and will be removed in 0.25. + Use ``min_impurity_decrease`` instead. Also, the default value + will change from 1e-7 to 0 in 0.23. bootstrap : boolean, optional (default=True) Whether bootstrap samples are used when building trees. @@ -1139,14 +1140,15 @@ class RandomForestRegressor(ForestRegressor): .. versionadded:: 0.19 - min_impurity_split : float, + min_impurity_split : float, (default=1e-7) Threshold for early stopping in tree growth. A node will split if its impurity is above the threshold, otherwise it is a leaf. .. deprecated:: 0.19 ``min_impurity_split`` has been deprecated in favor of - ``min_impurity_decrease`` in 0.19 and will be removed in 0.21. - Use ``min_impurity_decrease`` instead. + ``min_impurity_decrease`` in 0.19 and will be removed in 0.25. + Use ``min_impurity_decrease`` instead. Also, the default value + will change from 1e-7 to 0 in 0.23. bootstrap : boolean, optional (default=True) Whether bootstrap samples are used when building trees. @@ -1388,14 +1390,15 @@ class ExtraTreesClassifier(ForestClassifier): .. versionadded:: 0.19 - min_impurity_split : float, + min_impurity_split : float, (default=1e-7) Threshold for early stopping in tree growth. A node will split if its impurity is above the threshold, otherwise it is a leaf. .. deprecated:: 0.19 ``min_impurity_split`` has been deprecated in favor of - ``min_impurity_decrease`` in 0.19 and will be removed in 0.21. - Use ``min_impurity_decrease`` instead. + ``min_impurity_decrease`` in 0.19 and will be removed in 0.25. + Use ``min_impurity_decrease`` instead. Also, the default value + will change from 1e-7 to 0 in 0.23. bootstrap : boolean, optional (default=False) Whether bootstrap samples are used when building trees. @@ -1648,14 +1651,15 @@ class ExtraTreesRegressor(ForestRegressor): .. versionadded:: 0.19 - min_impurity_split : float, + min_impurity_split : float, (default=1e-7) Threshold for early stopping in tree growth. A node will split if its impurity is above the threshold, otherwise it is a leaf. .. deprecated:: 0.19 ``min_impurity_split`` has been deprecated in favor of - ``min_impurity_decrease`` in 0.19 and will be removed in 0.21. - Use ``min_impurity_decrease`` instead. + ``min_impurity_decrease`` in 0.19 and will be removed in 0.25. + Use ``min_impurity_decrease`` instead. Also, the default value + will change from 1e-7 to 0 in 0.23. bootstrap : boolean, optional (default=False) Whether bootstrap samples are used when building trees. @@ -1848,14 +1852,15 @@ class RandomTreesEmbedding(BaseForest): .. versionadded:: 0.19 - min_impurity_split : float, + min_impurity_split : float, (default=1e-7) Threshold for early stopping in tree growth. A node will split if its impurity is above the threshold, otherwise it is a leaf. .. deprecated:: 0.19 ``min_impurity_split`` has been deprecated in favor of - ``min_impurity_decrease`` in 0.19 and will be removed in 0.21. - Use ``min_impurity_decrease`` instead. + ``min_impurity_decrease`` in 0.19 and will be removed in 0.25. + Use ``min_impurity_decrease`` instead. Also, the default value + will change from 1e-7 to 0 in 0.23. sparse_output : bool, optional (default=True) Whether or not to return a sparse CSR matrix, as default behavior, diff --git a/sklearn/ensemble/gradient_boosting.py b/sklearn/ensemble/gradient_boosting.py index 39da7de21a166..ad239ab2fc4bd 100644 --- a/sklearn/ensemble/gradient_boosting.py +++ b/sklearn/ensemble/gradient_boosting.py @@ -1782,14 +1782,15 @@ class GradientBoostingClassifier(BaseGradientBoosting, ClassifierMixin): .. versionadded:: 0.19 - min_impurity_split : float, + min_impurity_split : float, (default=1e-7) Threshold for early stopping in tree growth. A node will split if its impurity is above the threshold, otherwise it is a leaf. .. deprecated:: 0.19 ``min_impurity_split`` has been deprecated in favor of - ``min_impurity_decrease`` in 0.19 and will be removed in 0.21. - Use ``min_impurity_decrease`` instead. + ``min_impurity_decrease`` in 0.19 and will be removed in 0.25. + Use ``min_impurity_decrease`` instead. Also, the default value + will change from 1e-7 to 0 in 0.23. init : estimator, optional An estimator object that is used to compute the initial @@ -2241,14 +2242,15 @@ class GradientBoostingRegressor(BaseGradientBoosting, RegressorMixin): .. versionadded:: 0.19 - min_impurity_split : float, + min_impurity_split : float, (default=1e-7) Threshold for early stopping in tree growth. A node will split if its impurity is above the threshold, otherwise it is a leaf. .. deprecated:: 0.19 ``min_impurity_split`` has been deprecated in favor of - ``min_impurity_decrease`` in 0.19 and will be removed in 0.21. - Use ``min_impurity_decrease`` instead. + ``min_impurity_decrease`` in 0.19 and will be removed in 0.25. + Use ``min_impurity_decrease`` instead. Also, the default value + will change from 1e-7 to 0 in 0.23. init : estimator, optional (default=None) An estimator object that is used to compute the initial diff --git a/sklearn/tree/tree.py b/sklearn/tree/tree.py index eab1f53e220e6..23bb1793e7a54 100644 --- a/sklearn/tree/tree.py +++ b/sklearn/tree/tree.py @@ -36,7 +36,6 @@ from ..utils import compute_sample_weight from ..utils.multiclass import check_classification_targets from ..utils.validation import check_is_fitted -from ..exceptions import ChangedBehaviorWarning from ._criterion import Criterion from ._splitter import Splitter @@ -292,23 +291,16 @@ def fit(self, X, y, sample_weight=None, check_input=True, np.sum(sample_weight)) if self.min_impurity_split is not None: - if self.min_impurity_split != float('-inf'): - warnings.warn( - "The min_impurity_split parameter is deprecated and" - " will be removed in version 0.21. " - "Use the min_impurity_decrease parameter instead.", - DeprecationWarning) + warnings.warn("The min_impurity_split parameter is deprecated. " + "Its default value will change from 1e-7 to 0 in " + "version 0.23, and will be removed in 0.25. " + "Use the min_impurity_decrease parameter instead.", + DeprecationWarning) min_impurity_split = self.min_impurity_split else: - warnings.warn("The min_impurity_split parameter is deprecated and " - "will be removed in version 0.21. However it still " - "defaults to 1e-7 if it is not set. To silence this " - "warning and get the future behavior from 0.21, " - "set it to float('-inf').", - ChangedBehaviorWarning) min_impurity_split = 1e-7 - if min_impurity_split < 0. and min_impurity_split != float('-inf'): + if min_impurity_split < 0.: raise ValueError("min_impurity_split must be greater than " "or equal to 0") @@ -636,14 +628,15 @@ class DecisionTreeClassifier(BaseDecisionTree, ClassifierMixin): .. versionadded:: 0.19 - min_impurity_split : float, + min_impurity_split : float, (default=1e-7) Threshold for early stopping in tree growth. A node will split if its impurity is above the threshold, otherwise it is a leaf. .. deprecated:: 0.19 ``min_impurity_split`` has been deprecated in favor of - ``min_impurity_decrease`` in 0.19 and will be removed in 0.21. - Use ``min_impurity_decrease`` instead. + ``min_impurity_decrease`` in 0.19 and will be removed in 0.25. + Use ``min_impurity_decrease`` instead. Also, the default value + will change from 1e-7 to 0 in 0.23. class_weight : dict, list of dicts, "balanced" or None, default=None Weights associated with classes in the form ``{class_label: weight}``. @@ -1008,14 +1001,15 @@ class DecisionTreeRegressor(BaseDecisionTree, RegressorMixin): .. versionadded:: 0.19 - min_impurity_split : float, + min_impurity_split : float, (default=1e-7) Threshold for early stopping in tree growth. A node will split if its impurity is above the threshold, otherwise it is a leaf. .. deprecated:: 0.19 ``min_impurity_split`` has been deprecated in favor of - ``min_impurity_decrease`` in 0.19 and will be removed in 0.21. - Use ``min_impurity_decrease`` instead. + ``min_impurity_decrease`` in 0.19 and will be removed in 0.25. + Use ``min_impurity_decrease`` instead. Also, the default value + will change from 1e-7 to 0 in 0.23. presort : bool, optional (default=False) Whether to presort the data to speed up the finding of best splits in @@ -1270,14 +1264,15 @@ class ExtraTreeClassifier(DecisionTreeClassifier): .. versionadded:: 0.19 - min_impurity_split : float, + min_impurity_split : float, (default=1e-7) Threshold for early stopping in tree growth. A node will split if its impurity is above the threshold, otherwise it is a leaf. .. deprecated:: 0.19 ``min_impurity_split`` has been deprecated in favor of - ``min_impurity_decrease`` in 0.19 and will be removed in 0.21. - Use ``min_impurity_decrease`` instead. + ``min_impurity_decrease`` in 0.19 and will be removed in 0.25. + Use ``min_impurity_decrease`` instead. Also, the default value + will change from 1e-7 to 0 in 0.23. class_weight : dict, list of dicts, "balanced" or None, default=None Weights associated with classes in the form ``{class_label: weight}``. @@ -1453,14 +1448,15 @@ class ExtraTreeRegressor(DecisionTreeRegressor): .. versionadded:: 0.19 - min_impurity_split : float, + min_impurity_split : float, (default=1e-7) Threshold for early stopping in tree growth. A node will split if its impurity is above the threshold, otherwise it is a leaf. .. deprecated:: 0.19 ``min_impurity_split`` has been deprecated in favor of - ``min_impurity_decrease`` in 0.19 and will be removed in 0.21. - Use ``min_impurity_decrease`` instead. + ``min_impurity_decrease`` in 0.19 and will be removed in 0.25. + Use ``min_impurity_decrease`` instead. Also, the default value + will change from 1e-7 to 0 in 0.23. max_leaf_nodes : int or None, optional (default=None) Grow a tree with ``max_leaf_nodes`` in best-first fashion. From 5882bafd9c981e5f6757c544e93bf8e5dde605fd Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Wed, 24 Oct 2018 14:35:22 -0400 Subject: [PATCH 3/3] Changed wording --- sklearn/ensemble/forest.py | 31 ++++++++++++++------------- sklearn/ensemble/gradient_boosting.py | 12 +++++------ sklearn/tree/tree.py | 26 +++++++++++----------- 3 files changed, 35 insertions(+), 34 deletions(-) diff --git a/sklearn/ensemble/forest.py b/sklearn/ensemble/forest.py index 054b2a45a018c..9c05fab050164 100644 --- a/sklearn/ensemble/forest.py +++ b/sklearn/ensemble/forest.py @@ -856,9 +856,10 @@ class RandomForestClassifier(ForestClassifier): .. deprecated:: 0.19 ``min_impurity_split`` has been deprecated in favor of - ``min_impurity_decrease`` in 0.19 and will be removed in 0.25. - Use ``min_impurity_decrease`` instead. Also, the default value - will change from 1e-7 to 0 in 0.23. + ``min_impurity_decrease`` in 0.19. The default value of + ``min_impurity_split`` will change from 1e-7 to 0 in 0.23 and it + will be removed in 0.25. Use ``min_impurity_decrease`` instead. + bootstrap : boolean, optional (default=True) Whether bootstrap samples are used when building trees. @@ -1146,9 +1147,9 @@ class RandomForestRegressor(ForestRegressor): .. deprecated:: 0.19 ``min_impurity_split`` has been deprecated in favor of - ``min_impurity_decrease`` in 0.19 and will be removed in 0.25. - Use ``min_impurity_decrease`` instead. Also, the default value - will change from 1e-7 to 0 in 0.23. + ``min_impurity_decrease`` in 0.19. The default value of + ``min_impurity_split`` will change from 1e-7 to 0 in 0.23 and it + will be removed in 0.25. Use ``min_impurity_decrease`` instead. bootstrap : boolean, optional (default=True) Whether bootstrap samples are used when building trees. @@ -1396,9 +1397,9 @@ class ExtraTreesClassifier(ForestClassifier): .. deprecated:: 0.19 ``min_impurity_split`` has been deprecated in favor of - ``min_impurity_decrease`` in 0.19 and will be removed in 0.25. - Use ``min_impurity_decrease`` instead. Also, the default value - will change from 1e-7 to 0 in 0.23. + ``min_impurity_decrease`` in 0.19. The default value of + ``min_impurity_split`` will change from 1e-7 to 0 in 0.23 and it + will be removed in 0.25. Use ``min_impurity_decrease`` instead. bootstrap : boolean, optional (default=False) Whether bootstrap samples are used when building trees. @@ -1657,9 +1658,9 @@ class ExtraTreesRegressor(ForestRegressor): .. deprecated:: 0.19 ``min_impurity_split`` has been deprecated in favor of - ``min_impurity_decrease`` in 0.19 and will be removed in 0.25. - Use ``min_impurity_decrease`` instead. Also, the default value - will change from 1e-7 to 0 in 0.23. + ``min_impurity_decrease`` in 0.19. The default value of + ``min_impurity_split`` will change from 1e-7 to 0 in 0.23 and it + will be removed in 0.25. Use ``min_impurity_decrease`` instead. bootstrap : boolean, optional (default=False) Whether bootstrap samples are used when building trees. @@ -1858,9 +1859,9 @@ class RandomTreesEmbedding(BaseForest): .. deprecated:: 0.19 ``min_impurity_split`` has been deprecated in favor of - ``min_impurity_decrease`` in 0.19 and will be removed in 0.25. - Use ``min_impurity_decrease`` instead. Also, the default value - will change from 1e-7 to 0 in 0.23. + ``min_impurity_decrease`` in 0.19. The default value of + ``min_impurity_split`` will change from 1e-7 to 0 in 0.23 and it + will be removed in 0.25. Use ``min_impurity_decrease`` instead. sparse_output : bool, optional (default=True) Whether or not to return a sparse CSR matrix, as default behavior, diff --git a/sklearn/ensemble/gradient_boosting.py b/sklearn/ensemble/gradient_boosting.py index ad239ab2fc4bd..b15ed82833fd6 100644 --- a/sklearn/ensemble/gradient_boosting.py +++ b/sklearn/ensemble/gradient_boosting.py @@ -1788,9 +1788,9 @@ class GradientBoostingClassifier(BaseGradientBoosting, ClassifierMixin): .. deprecated:: 0.19 ``min_impurity_split`` has been deprecated in favor of - ``min_impurity_decrease`` in 0.19 and will be removed in 0.25. - Use ``min_impurity_decrease`` instead. Also, the default value - will change from 1e-7 to 0 in 0.23. + ``min_impurity_decrease`` in 0.19. The default value of + ``min_impurity_split`` will change from 1e-7 to 0 in 0.23 and it + will be removed in 0.25. Use ``min_impurity_decrease`` instead. init : estimator, optional An estimator object that is used to compute the initial @@ -2248,9 +2248,9 @@ class GradientBoostingRegressor(BaseGradientBoosting, RegressorMixin): .. deprecated:: 0.19 ``min_impurity_split`` has been deprecated in favor of - ``min_impurity_decrease`` in 0.19 and will be removed in 0.25. - Use ``min_impurity_decrease`` instead. Also, the default value - will change from 1e-7 to 0 in 0.23. + ``min_impurity_decrease`` in 0.19. The default value of + ``min_impurity_split`` will change from 1e-7 to 0 in 0.23 and it + will be removed in 0.25. Use ``min_impurity_decrease`` instead. init : estimator, optional (default=None) An estimator object that is used to compute the initial diff --git a/sklearn/tree/tree.py b/sklearn/tree/tree.py index 23bb1793e7a54..faa83efbb7703 100644 --- a/sklearn/tree/tree.py +++ b/sklearn/tree/tree.py @@ -293,7 +293,7 @@ def fit(self, X, y, sample_weight=None, check_input=True, if self.min_impurity_split is not None: warnings.warn("The min_impurity_split parameter is deprecated. " "Its default value will change from 1e-7 to 0 in " - "version 0.23, and will be removed in 0.25. " + "version 0.23, and it will be removed in 0.25. " "Use the min_impurity_decrease parameter instead.", DeprecationWarning) min_impurity_split = self.min_impurity_split @@ -634,9 +634,9 @@ class DecisionTreeClassifier(BaseDecisionTree, ClassifierMixin): .. deprecated:: 0.19 ``min_impurity_split`` has been deprecated in favor of - ``min_impurity_decrease`` in 0.19 and will be removed in 0.25. - Use ``min_impurity_decrease`` instead. Also, the default value - will change from 1e-7 to 0 in 0.23. + ``min_impurity_decrease`` in 0.19. The default value of + ``min_impurity_split`` will change from 1e-7 to 0 in 0.23 and it + will be removed in 0.25. Use ``min_impurity_decrease`` instead. class_weight : dict, list of dicts, "balanced" or None, default=None Weights associated with classes in the form ``{class_label: weight}``. @@ -1007,9 +1007,9 @@ class DecisionTreeRegressor(BaseDecisionTree, RegressorMixin): .. deprecated:: 0.19 ``min_impurity_split`` has been deprecated in favor of - ``min_impurity_decrease`` in 0.19 and will be removed in 0.25. - Use ``min_impurity_decrease`` instead. Also, the default value - will change from 1e-7 to 0 in 0.23. + ``min_impurity_decrease`` in 0.19. The default value of + ``min_impurity_split`` will change from 1e-7 to 0 in 0.23 and it + will be removed in 0.25. Use ``min_impurity_decrease`` instead. presort : bool, optional (default=False) Whether to presort the data to speed up the finding of best splits in @@ -1270,9 +1270,9 @@ class ExtraTreeClassifier(DecisionTreeClassifier): .. deprecated:: 0.19 ``min_impurity_split`` has been deprecated in favor of - ``min_impurity_decrease`` in 0.19 and will be removed in 0.25. - Use ``min_impurity_decrease`` instead. Also, the default value - will change from 1e-7 to 0 in 0.23. + ``min_impurity_decrease`` in 0.19. The default value of + ``min_impurity_split`` will change from 1e-7 to 0 in 0.23 and it + will be removed in 0.25. Use ``min_impurity_decrease`` instead. class_weight : dict, list of dicts, "balanced" or None, default=None Weights associated with classes in the form ``{class_label: weight}``. @@ -1454,9 +1454,9 @@ class ExtraTreeRegressor(DecisionTreeRegressor): .. deprecated:: 0.19 ``min_impurity_split`` has been deprecated in favor of - ``min_impurity_decrease`` in 0.19 and will be removed in 0.25. - Use ``min_impurity_decrease`` instead. Also, the default value - will change from 1e-7 to 0 in 0.23. + ``min_impurity_decrease`` in 0.19. The default value of + ``min_impurity_split`` will change from 1e-7 to 0 in 0.23 and it + will be removed in 0.25. Use ``min_impurity_decrease`` instead. max_leaf_nodes : int or None, optional (default=None) Grow a tree with ``max_leaf_nodes`` in best-first fashion.