From b1bc2deb8119ce4f2314ad5b88658bd9b09a1507 Mon Sep 17 00:00:00 2001 From: "johann.faouzi" Date: Tue, 30 Jul 2019 09:05:31 +0200 Subject: [PATCH 01/33] Update docstrings and change default value --- .../_hist_gradient_boosting/gradient_boosting.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py index dc040ed1fa409..5df5102392319 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py +++ b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py @@ -622,7 +622,9 @@ class HistGradientBoostingRegressor(BaseHistGradientBoosting, RegressorMixin): for big datasets (n_samples >= 10 000). The input data ``X`` is pre-binned into integer-valued bins, which considerably reduces the number of splitting points to consider, and allows the algorithm to leverage - integer-based data structures. For small sample sizes, + integer-based data structures. Early stopping is the default behavior, as + it usually makes the fitting process much faster without a substantial + difference in terms of predictive performance. For small sample sizes, :class:`GradientBoostingRegressor` might be preferred since binning may lead to split points that are too approximate in this setting. @@ -690,7 +692,7 @@ class HistGradientBoostingRegressor(BaseHistGradientBoosting, RegressorMixin): Proportion (or absolute size) of training data to set aside as validation data for early stopping. If None, early stopping is done on the training data. Only used if ``n_iter_no_change`` is not None. - n_iter_no_change : int or None, optional (default=None) + n_iter_no_change : int or None, optional (default=10) Used to determine when to "early stop". The fitting process is stopped when none of the last ``n_iter_no_change`` scores are better than the ``n_iter_no_change - 1`` -th-to-last one, up to some @@ -747,7 +749,7 @@ def __init__(self, loss='least_squares', learning_rate=0.1, max_iter=100, max_leaf_nodes=31, max_depth=None, min_samples_leaf=20, l2_regularization=0., max_bins=256, warm_start=False, scoring=None, validation_fraction=0.1, - n_iter_no_change=None, tol=1e-7, verbose=0, + n_iter_no_change=10, tol=1e-7, verbose=0, random_state=None): super(HistGradientBoostingRegressor, self).__init__( loss=loss, learning_rate=learning_rate, max_iter=max_iter, @@ -795,7 +797,9 @@ class HistGradientBoostingClassifier(BaseHistGradientBoosting, for big datasets (n_samples >= 10 000). The input data ``X`` is pre-binned into integer-valued bins, which considerably reduces the number of splitting points to consider, and allows the algorithm to leverage - integer-based data structures. For small sample sizes, + integer-based data structures. Early stopping is the default behavior, as + it usually makes the fitting process much faster without a substantial + difference in terms of predictive performance. For small sample sizes, :class:`GradientBoostingClassifier` might be preferred since binning may lead to split points that are too approximate in this setting. @@ -865,7 +869,7 @@ class HistGradientBoostingClassifier(BaseHistGradientBoosting, Proportion (or absolute size) of training data to set aside as validation data for early stopping. If None, early stopping is done on the training data. - n_iter_no_change : int or None, optional (default=None) + n_iter_no_change : int or None, optional (default=10) Used to determine when to "early stop". The fitting process is stopped when none of the last ``n_iter_no_change`` scores are better than the ``n_iter_no_change - 1`` -th-to-last one, up to some @@ -923,7 +927,7 @@ class HistGradientBoostingClassifier(BaseHistGradientBoosting, def __init__(self, loss='auto', learning_rate=0.1, max_iter=100, max_leaf_nodes=31, max_depth=None, min_samples_leaf=20, l2_regularization=0., max_bins=256, warm_start=False, - scoring=None, validation_fraction=0.1, n_iter_no_change=None, + scoring=None, validation_fraction=0.1, n_iter_no_change=10, tol=1e-7, verbose=0, random_state=None): super(HistGradientBoostingClassifier, self).__init__( loss=loss, learning_rate=learning_rate, max_iter=max_iter, From c30e4d99e7b810e4bfe8a30f5eee9f6b00ba3907 Mon Sep 17 00:00:00 2001 From: "johann.faouzi" Date: Tue, 30 Jul 2019 09:39:37 +0200 Subject: [PATCH 02/33] Disable early stopping for some tests --- .../_hist_gradient_boosting/tests/test_warm_start.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_warm_start.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_warm_start.py index 806ad94ccee98..b1ef4a292c4b6 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_warm_start.py +++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_warm_start.py @@ -37,7 +37,8 @@ def test_max_iter_with_warm_start_validation(GradientBoosting, X, y): # is smaller than the number of iterations from the previous fit when warm # start is True. - estimator = GradientBoosting(max_iter=50, warm_start=True) + estimator = GradientBoosting(max_iter=50, warm_start=True, + n_iter_no_change=None) estimator.fit(X, y) estimator.set_params(max_iter=25) err_msg = ('max_iter=25 must be larger than or equal to n_iter_=50 ' @@ -76,7 +77,7 @@ def test_warm_start_yields_identical_results(GradientBoosting, X, y): def test_warm_start_max_depth(GradientBoosting, X, y): # Test if possible to fit trees of different depth in ensemble. gb = GradientBoosting(max_iter=100, min_samples_leaf=1, - warm_start=True, max_depth=2) + warm_start=True, max_depth=2, n_iter_no_change=None) gb.fit(X, y) gb.set_params(max_iter=110, max_depth=3) gb.fit(X, y) @@ -115,11 +116,12 @@ def test_warm_start_early_stopping(GradientBoosting, X, y): ]) def test_warm_start_equal_n_estimators(GradientBoosting, X, y): # Test if warm start with equal n_estimators does nothing - gb_1 = GradientBoosting(max_depth=2) + gb_1 = GradientBoosting(max_depth=2, n_iter_no_change=None) gb_1.fit(X, y) gb_2 = clone(gb_1) - gb_2.set_params(max_iter=gb_1.max_iter, warm_start=True) + gb_2.set_params(max_iter=gb_1.max_iter, warm_start=True, + n_iter_no_change=None) gb_2.fit(X, y) # Check that both predictors are equal From a196c5ea0fa07866be7f37cb10f96ccc78698456 Mon Sep 17 00:00:00 2001 From: "johann.faouzi" Date: Tue, 30 Jul 2019 09:40:21 +0200 Subject: [PATCH 03/33] Check that early stopping is enabled by default --- .../tests/test_gradient_boosting.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py index be7e424a844bc..b69f6743cd178 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py +++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py @@ -44,7 +44,8 @@ def test_init_parameters_validation(GradientBoosting, X, y, params, err_msg): def test_invalid_classification_loss(): - binary_clf = HistGradientBoostingClassifier(loss="binary_crossentropy") + binary_clf = HistGradientBoostingClassifier( + loss="binary_crossentropy", n_iter_no_change=None) err_msg = ("loss='binary_crossentropy' is not defined for multiclass " "classification with n_classes=3, use " "loss='categorical_crossentropy' instead") @@ -227,6 +228,18 @@ def test_infinite_values(): X = np.array([-np.inf, 0, 1, np.inf]).reshape(-1, 1) y = np.array([0, 0, 1, 1]) - gbdt = HistGradientBoostingRegressor(min_samples_leaf=1) + gbdt = HistGradientBoostingRegressor(min_samples_leaf=1, + n_iter_no_change=None) gbdt.fit(X, y) np.testing.assert_allclose(gbdt.predict(X), y, atol=1e-4) + + +@pytest.mark.parametrize('GradientBoosting, X, y', [ + (HistGradientBoostingClassifier, X_classification, y_classification), + (HistGradientBoostingRegressor, X_regression, y_regression) +]) +def test_early_stopping_default(GradientBoosting, X, y): + # Test that early stopping is enabled by default + gb = GradientBoosting(max_iter=200) + gb.fit(X, y) + assert gb.n_iter_ < gb.max_iter From b84c7e2bdb425083837636ecea45bdd843a0759a Mon Sep 17 00:00:00 2001 From: "johann.faouzi" Date: Tue, 30 Jul 2019 09:40:33 +0200 Subject: [PATCH 04/33] Fix the random state in the examples --- .../ensemble/_hist_gradient_boosting/gradient_boosting.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py index 5df5102392319..a5d2ab1ffcfbf 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py +++ b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py @@ -738,9 +738,9 @@ class HistGradientBoostingRegressor(BaseHistGradientBoosting, RegressorMixin): >>> from sklearn.ensemble import HistGradientBoostingRegressor >>> from sklearn.datasets import load_boston >>> X, y = load_boston(return_X_y=True) - >>> est = HistGradientBoostingRegressor().fit(X, y) + >>> est = HistGradientBoostingRegressor(random_state=42).fit(X, y) >>> est.score(X, y) - 0.98... + 0.95... """ _VALID_LOSSES = ('least_squares',) @@ -916,9 +916,9 @@ class HistGradientBoostingClassifier(BaseHistGradientBoosting, >>> from sklearn.ensemble import HistGradientBoostingRegressor >>> from sklearn.datasets import load_iris >>> X, y = load_iris(return_X_y=True) - >>> clf = HistGradientBoostingClassifier().fit(X, y) + >>> clf = HistGradientBoostingClassifier(random_state=42).fit(X, y) >>> clf.score(X, y) - 1.0 + 0.98... """ _VALID_LOSSES = ('binary_crossentropy', 'categorical_crossentropy', From c02faeaa6a767e3db677b0db204c418a62aa723d Mon Sep 17 00:00:00 2001 From: "johann.faouzi" Date: Wed, 31 Jul 2019 10:01:38 +0200 Subject: [PATCH 05/33] Move sentence at the end of the paragraph --- .../_hist_gradient_boosting/gradient_boosting.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py index a5d2ab1ffcfbf..09a638cf4e834 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py +++ b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py @@ -622,12 +622,12 @@ class HistGradientBoostingRegressor(BaseHistGradientBoosting, RegressorMixin): for big datasets (n_samples >= 10 000). The input data ``X`` is pre-binned into integer-valued bins, which considerably reduces the number of splitting points to consider, and allows the algorithm to leverage - integer-based data structures. Early stopping is the default behavior, as - it usually makes the fitting process much faster without a substantial - difference in terms of predictive performance. For small sample sizes, + integer-based data structures. For small sample sizes, :class:`GradientBoostingRegressor` might be preferred since binning may lead to split points that are too - approximate in this setting. + approximate in this setting. Early stopping is the default behavior, as + it usually makes the fitting process much faster without a substantial + difference in terms of predictive performance. This implementation is inspired by `LightGBM `_. @@ -797,12 +797,12 @@ class HistGradientBoostingClassifier(BaseHistGradientBoosting, for big datasets (n_samples >= 10 000). The input data ``X`` is pre-binned into integer-valued bins, which considerably reduces the number of splitting points to consider, and allows the algorithm to leverage - integer-based data structures. Early stopping is the default behavior, as - it usually makes the fitting process much faster without a substantial - difference in terms of predictive performance. For small sample sizes, + integer-based data structures. For small sample sizes, :class:`GradientBoostingClassifier` might be preferred since binning may lead to split points that are too - approximate in this setting. + approximate in this setting. Early stopping is the default behavior, as + it usually makes the fitting process much faster without a substantial + difference in terms of predictive performance. This implementation is inspired by `LightGBM `_. From de41ab38c72ef41c2747518fb7fd707a378687d7 Mon Sep 17 00:00:00 2001 From: "johann.faouzi" Date: Wed, 31 Jul 2019 10:12:23 +0200 Subject: [PATCH 06/33] Disable early stopping in test_estimators --- sklearn/utils/estimator_checks.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index c8a82bc8e623f..bf930af2f9307 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -384,6 +384,9 @@ def set_checking_parameters(estimator): # The default min_samples_leaf (20) isn't appropriate for small # datasets (only very shallow trees are built) that the checks use. estimator.set_params(min_samples_leaf=5) + # Early stopping is not appropriate for some tests in test_estimators + # because the actual training set is smaller than the given data + estimator.set_params(n_iter_no_change=None) # Speed-up by reducing the number of CV or splits for CV estimators loo_cv = ['RidgeCV'] From 9e83463c4f24e632db2a91e5886611abd9a2bc01 Mon Sep 17 00:00:00 2001 From: "johann.faouzi" Date: Wed, 31 Jul 2019 10:39:49 +0200 Subject: [PATCH 07/33] Disable early stopping in partial dependence tests --- sklearn/inspection/tests/test_partial_dependence.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sklearn/inspection/tests/test_partial_dependence.py b/sklearn/inspection/tests/test_partial_dependence.py index 19399224e07ba..26681b2579947 100644 --- a/sklearn/inspection/tests/test_partial_dependence.py +++ b/sklearn/inspection/tests/test_partial_dependence.py @@ -158,8 +158,10 @@ def test_grid_from_X_error(grid_resolution, percentiles, err_msg): (LinearRegression(), 'brute'), (GradientBoostingRegressor(random_state=0), 'brute'), (GradientBoostingRegressor(random_state=0), 'recursion'), - (HistGradientBoostingRegressor(random_state=0), 'brute'), - (HistGradientBoostingRegressor(random_state=0), 'recursion')] + (HistGradientBoostingRegressor(random_state=0, n_iter_no_change=None), + 'brute'), + (HistGradientBoostingRegressor(random_state=0, n_iter_no_change=None), + 'recursion')] ) def test_partial_dependence_helpers(est, method, target_feature): # Check that what is returned by _partial_dependence_brute or From 6f332d0fd6d705259c86f9a45ba7d912f064b5f9 Mon Sep 17 00:00:00 2001 From: "johann.faouzi" Date: Wed, 31 Jul 2019 16:34:58 +0200 Subject: [PATCH 08/33] Move the new test next to the others tests regarding ES --- .../tests/test_gradient_boosting.py | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py index b69f6743cd178..327b0dbd9147a 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py +++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py @@ -128,6 +128,17 @@ def test_early_stopping_classification(data, scoring, validation_fraction, assert gb.n_iter_ == max_iter +@pytest.mark.parametrize('GradientBoosting, X, y', [ + (HistGradientBoostingClassifier, X_classification, y_classification), + (HistGradientBoostingRegressor, X_regression, y_regression) +]) +def test_early_stopping_default(GradientBoosting, X, y): + # Test that early stopping is enabled by default + gb = GradientBoosting(max_iter=200) + gb.fit(X, y) + assert gb.n_iter_ < gb.max_iter + + @pytest.mark.parametrize( 'scores, n_iter_no_change, tol, stopping', [ @@ -232,14 +243,3 @@ def test_infinite_values(): n_iter_no_change=None) gbdt.fit(X, y) np.testing.assert_allclose(gbdt.predict(X), y, atol=1e-4) - - -@pytest.mark.parametrize('GradientBoosting, X, y', [ - (HistGradientBoostingClassifier, X_classification, y_classification), - (HistGradientBoostingRegressor, X_regression, y_regression) -]) -def test_early_stopping_default(GradientBoosting, X, y): - # Test that early stopping is enabled by default - gb = GradientBoosting(max_iter=200) - gb.fit(X, y) - assert gb.n_iter_ < gb.max_iter From 5764facd4dd1f69d83b95b38d9dd7e2313a45ee4 Mon Sep 17 00:00:00 2001 From: "johann.faouzi" Date: Tue, 6 Aug 2019 15:36:31 +0200 Subject: [PATCH 09/33] Update the docstrings and the init for both classes --- .../gradient_boosting.py | 66 +++++++------------ 1 file changed, 25 insertions(+), 41 deletions(-) diff --git a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py index 9fc5ed28049f8..e09de478e2ea5 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py +++ b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py @@ -619,19 +619,7 @@ class HistGradientBoostingRegressor(BaseHistGradientBoosting, RegressorMixin): This estimator is much faster than :class:`GradientBoostingRegressor` -<<<<<<< HEAD - for big datasets (n_samples >= 10 000). The input data ``X`` is pre-binned - into integer-valued bins, which considerably reduces the number of - splitting points to consider, and allows the algorithm to leverage - integer-based data structures. For small sample sizes, - :class:`GradientBoostingRegressor` - might be preferred since binning may lead to split points that are too - approximate in this setting. Early stopping is the default behavior, as - it usually makes the fitting process much faster without a substantial - difference in terms of predictive performance. -======= for big datasets (n_samples >= 10 000). ->>>>>>> c64ee34a01ded919fc7fe3ad800260029624433b This implementation is inspired by `LightGBM `_. @@ -682,6 +670,10 @@ class HistGradientBoostingRegressor(BaseHistGradientBoosting, RegressorMixin): allows for a much faster training stage. Features with a small number of unique values may use less than ``max_bins`` bins. Must be no larger than 256. + early_stopping : 'auto' or bool (default='auto') + If 'auto', early stopping is enabled if the sample size is larger than + 1000. If True, early stopping is enabled, otherwise early stopping is + disabled. warm_start : bool, optional (default=False) When set to ``True``, reuse the solution of the previous call to fit and add more estimators to the ensemble. For results to be valid, the @@ -697,11 +689,11 @@ class HistGradientBoostingRegressor(BaseHistGradientBoosting, RegressorMixin): Proportion (or absolute size) of training data to set aside as validation data for early stopping. If None, early stopping is done on the training data. Only used if ``n_iter_no_change`` is not None. - n_iter_no_change : int or None, optional (default=10) + n_iter_no_change : int, optional (default=10) Used to determine when to "early stop". The fitting process is stopped when none of the last ``n_iter_no_change`` scores are better than the ``n_iter_no_change - 1`` -th-to-last one, up to some - tolerance. If None or 0, no early-stopping is done. + tolerance. Ignored if ``early_stopping`` is False. tol : float or None, optional (default=1e-7) The absolute tolerance to use when comparing scores during early stopping. The higher the tolerance, the more likely we are to early @@ -743,9 +735,9 @@ class HistGradientBoostingRegressor(BaseHistGradientBoosting, RegressorMixin): >>> from sklearn.ensemble import HistGradientBoostingRegressor >>> from sklearn.datasets import load_boston >>> X, y = load_boston(return_X_y=True) - >>> est = HistGradientBoostingRegressor(random_state=42).fit(X, y) + >>> est = HistGradientBoostingRegressor().fit(X, y) >>> est.score(X, y) - 0.95... + 0.98... """ _VALID_LOSSES = ('least_squares',) @@ -753,16 +745,16 @@ class HistGradientBoostingRegressor(BaseHistGradientBoosting, RegressorMixin): def __init__(self, loss='least_squares', learning_rate=0.1, max_iter=100, max_leaf_nodes=31, max_depth=None, min_samples_leaf=20, l2_regularization=0., max_bins=256, - warm_start=False, scoring=None, validation_fraction=0.1, - n_iter_no_change=10, tol=1e-7, verbose=0, - random_state=None): + warm_start=False, early_stopping='auto', scoring=None, + validation_fraction=0.1, n_iter_no_change=10, tol=1e-7, + verbose=0, random_state=None): super(HistGradientBoostingRegressor, self).__init__( loss=loss, learning_rate=learning_rate, max_iter=max_iter, max_leaf_nodes=max_leaf_nodes, max_depth=max_depth, min_samples_leaf=min_samples_leaf, l2_regularization=l2_regularization, max_bins=max_bins, - warm_start=warm_start, scoring=scoring, - validation_fraction=validation_fraction, + warm_start=warm_start, early_stopping=early_stopping, + scoring=scoring, validation_fraction=validation_fraction, n_iter_no_change=n_iter_no_change, tol=tol, verbose=verbose, random_state=random_state) @@ -799,19 +791,7 @@ class HistGradientBoostingClassifier(BaseHistGradientBoosting, This estimator is much faster than :class:`GradientBoostingClassifier` -<<<<<<< HEAD - for big datasets (n_samples >= 10 000). The input data ``X`` is pre-binned - into integer-valued bins, which considerably reduces the number of - splitting points to consider, and allows the algorithm to leverage - integer-based data structures. For small sample sizes, - :class:`GradientBoostingClassifier` - might be preferred since binning may lead to split points that are too - approximate in this setting. Early stopping is the default behavior, as - it usually makes the fitting process much faster without a substantial - difference in terms of predictive performance. -======= for big datasets (n_samples >= 10 000). ->>>>>>> c64ee34a01ded919fc7fe3ad800260029624433b This implementation is inspired by `LightGBM `_. @@ -865,6 +845,10 @@ class HistGradientBoostingClassifier(BaseHistGradientBoosting, allows for a much faster training stage. Features with a small number of unique values may use less than ``max_bins`` bins. Must be no larger than 256. + early_stopping : 'auto' or bool (default='auto') + If 'auto', early stopping is enabled if the sample size is larger than + 1000. If True, early stopping is enabled, otherwise early stopping is + disabled. warm_start : bool, optional (default=False) When set to ``True``, reuse the solution of the previous call to fit and add more estimators to the ensemble. For results to be valid, the @@ -880,11 +864,11 @@ class HistGradientBoostingClassifier(BaseHistGradientBoosting, Proportion (or absolute size) of training data to set aside as validation data for early stopping. If None, early stopping is done on the training data. - n_iter_no_change : int or None, optional (default=10) + n_iter_no_change : int, optional (default=10) Used to determine when to "early stop". The fitting process is stopped when none of the last ``n_iter_no_change`` scores are better than the ``n_iter_no_change - 1`` -th-to-last one, up to some - tolerance. If None or 0, no early-stopping is done. + tolerance. Ignored if ``early_stopping`` is False. tol : float or None, optional (default=1e-7) The absolute tolerance to use when comparing scores. The higher the tolerance, the more likely we are to early stop: higher tolerance @@ -927,9 +911,9 @@ class HistGradientBoostingClassifier(BaseHistGradientBoosting, >>> from sklearn.ensemble import HistGradientBoostingRegressor >>> from sklearn.datasets import load_iris >>> X, y = load_iris(return_X_y=True) - >>> clf = HistGradientBoostingClassifier(random_state=42).fit(X, y) + >>> clf = HistGradientBoostingClassifier().fit(X, y) >>> clf.score(X, y) - 0.98... + 1.0 """ _VALID_LOSSES = ('binary_crossentropy', 'categorical_crossentropy', @@ -938,15 +922,15 @@ class HistGradientBoostingClassifier(BaseHistGradientBoosting, def __init__(self, loss='auto', learning_rate=0.1, max_iter=100, max_leaf_nodes=31, max_depth=None, min_samples_leaf=20, l2_regularization=0., max_bins=256, warm_start=False, - scoring=None, validation_fraction=0.1, n_iter_no_change=10, - tol=1e-7, verbose=0, random_state=None): + early_stopping='auto', scoring=None, validation_fraction=0.1, + n_iter_no_change=10, tol=1e-7, verbose=0, random_state=None): super(HistGradientBoostingClassifier, self).__init__( loss=loss, learning_rate=learning_rate, max_iter=max_iter, max_leaf_nodes=max_leaf_nodes, max_depth=max_depth, min_samples_leaf=min_samples_leaf, l2_regularization=l2_regularization, max_bins=max_bins, - warm_start=warm_start, scoring=scoring, - validation_fraction=validation_fraction, + warm_start=warm_start, early_stopping=early_stopping, + scoring=scoring, validation_fraction=validation_fraction, n_iter_no_change=n_iter_no_change, tol=tol, verbose=verbose, random_state=random_state) From 42f3d954ebf68df2858ed008a5b9ebfdf6e47f74 Mon Sep 17 00:00:00 2001 From: "johann.faouzi" Date: Tue, 6 Aug 2019 15:38:05 +0200 Subject: [PATCH 10/33] Swap warm_start and early_stopping --- .../_hist_gradient_boosting/gradient_boosting.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py index e09de478e2ea5..8421aa29a8267 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py +++ b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py @@ -745,7 +745,7 @@ class HistGradientBoostingRegressor(BaseHistGradientBoosting, RegressorMixin): def __init__(self, loss='least_squares', learning_rate=0.1, max_iter=100, max_leaf_nodes=31, max_depth=None, min_samples_leaf=20, l2_regularization=0., max_bins=256, - warm_start=False, early_stopping='auto', scoring=None, + early_stopping='auto', warm_start=False, scoring=None, validation_fraction=0.1, n_iter_no_change=10, tol=1e-7, verbose=0, random_state=None): super(HistGradientBoostingRegressor, self).__init__( @@ -753,7 +753,7 @@ def __init__(self, loss='least_squares', learning_rate=0.1, max_leaf_nodes=max_leaf_nodes, max_depth=max_depth, min_samples_leaf=min_samples_leaf, l2_regularization=l2_regularization, max_bins=max_bins, - warm_start=warm_start, early_stopping=early_stopping, + early_stopping=early_stopping, warm_start=warm_start, scoring=scoring, validation_fraction=validation_fraction, n_iter_no_change=n_iter_no_change, tol=tol, verbose=verbose, random_state=random_state) @@ -921,15 +921,15 @@ class HistGradientBoostingClassifier(BaseHistGradientBoosting, def __init__(self, loss='auto', learning_rate=0.1, max_iter=100, max_leaf_nodes=31, max_depth=None, min_samples_leaf=20, - l2_regularization=0., max_bins=256, warm_start=False, - early_stopping='auto', scoring=None, validation_fraction=0.1, + l2_regularization=0., max_bins=256, early_stopping='auto', + warm_start=False, scoring=None, validation_fraction=0.1, n_iter_no_change=10, tol=1e-7, verbose=0, random_state=None): super(HistGradientBoostingClassifier, self).__init__( loss=loss, learning_rate=learning_rate, max_iter=max_iter, max_leaf_nodes=max_leaf_nodes, max_depth=max_depth, min_samples_leaf=min_samples_leaf, l2_regularization=l2_regularization, max_bins=max_bins, - warm_start=warm_start, early_stopping=early_stopping, + early_stopping=early_stopping, warm_start=warm_start, scoring=scoring, validation_fraction=validation_fraction, n_iter_no_change=n_iter_no_change, tol=tol, verbose=verbose, random_state=random_state) From 16a83e83dd94018d26d2cf3e8ce4320dfe79cac1 Mon Sep 17 00:00:00 2001 From: "johann.faouzi" Date: Tue, 6 Aug 2019 15:40:25 +0200 Subject: [PATCH 11/33] Update validation_fraction documentation --- .../_hist_gradient_boosting/gradient_boosting.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py index 8421aa29a8267..630732eedbaa4 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py +++ b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py @@ -27,8 +27,8 @@ class BaseHistGradientBoosting(BaseEstimator, ABC): @abstractmethod def __init__(self, loss, learning_rate, max_iter, max_leaf_nodes, max_depth, min_samples_leaf, l2_regularization, max_bins, - warm_start, scoring, validation_fraction, n_iter_no_change, - tol, verbose, random_state): + early_stopping, warm_start, scoring, validation_fraction, + n_iter_no_change, tol, verbose, random_state): self.loss = loss self.learning_rate = learning_rate self.max_iter = max_iter @@ -37,6 +37,7 @@ def __init__(self, loss, learning_rate, max_iter, max_leaf_nodes, self.min_samples_leaf = min_samples_leaf self.l2_regularization = l2_regularization self.max_bins = max_bins + self.early_stopping = early_stopping self.warm_start = warm_start self.scoring = scoring self.validation_fraction = validation_fraction @@ -121,9 +122,6 @@ def fit(self, X, y): self.loss_ = self._get_loss() - self.do_early_stopping_ = (self.n_iter_no_change is not None and - self.n_iter_no_change > 0) - # create validation data if needed self._use_validation_data = self.validation_fraction is not None if self.do_early_stopping_ and self._use_validation_data: @@ -688,7 +686,7 @@ class HistGradientBoostingRegressor(BaseHistGradientBoosting, RegressorMixin): validation_fraction : int or float or None, optional (default=0.1) Proportion (or absolute size) of training data to set aside as validation data for early stopping. If None, early stopping is done on - the training data. Only used if ``n_iter_no_change`` is not None. + the training data. Only used if ``early_stopping`` is True. n_iter_no_change : int, optional (default=10) Used to determine when to "early stop". The fitting process is stopped when none of the last ``n_iter_no_change`` scores are better @@ -863,7 +861,7 @@ class HistGradientBoostingClassifier(BaseHistGradientBoosting, validation_fraction : int or float or None, optional (default=0.1) Proportion (or absolute size) of training data to set aside as validation data for early stopping. If None, early stopping is done on - the training data. + the training data. Only used if ``early_stopping`` is True. n_iter_no_change : int, optional (default=10) Used to determine when to "early stop". The fitting process is stopped when none of the last ``n_iter_no_change`` scores are better From 1e993d285b79cac142ffcd8d064c37f990d1edae Mon Sep 17 00:00:00 2001 From: "johann.faouzi" Date: Tue, 6 Aug 2019 15:45:58 +0200 Subject: [PATCH 12/33] Update docstrings with early stopping --- .../_hist_gradient_boosting/gradient_boosting.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py index 630732eedbaa4..424df34ace4c3 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py +++ b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py @@ -122,6 +122,9 @@ def fit(self, X, y): self.loss_ = self._get_loss() + self.do_early_stopping_ = (self.n_iter_no_change is not None and + self.n_iter_no_change > 0) + # create validation data if needed self._use_validation_data = self.validation_fraction is not None if self.do_early_stopping_ and self._use_validation_data: @@ -682,16 +685,16 @@ class HistGradientBoostingRegressor(BaseHistGradientBoosting, RegressorMixin): string (see :ref:`scoring_parameter`) or a callable (see :ref:`scoring`). If None, the estimator's default scorer is used. If ``scoring='loss'``, early stopping is checked w.r.t the loss value. - Only used if ``n_iter_no_change`` is not None. + Only used if early stopping is performed. validation_fraction : int or float or None, optional (default=0.1) Proportion (or absolute size) of training data to set aside as validation data for early stopping. If None, early stopping is done on - the training data. Only used if ``early_stopping`` is True. + the training data. Only used if early stopping is performed. n_iter_no_change : int, optional (default=10) Used to determine when to "early stop". The fitting process is stopped when none of the last ``n_iter_no_change`` scores are better than the ``n_iter_no_change - 1`` -th-to-last one, up to some - tolerance. Ignored if ``early_stopping`` is False. + tolerance. Only used if early stopping is performed. tol : float or None, optional (default=1e-7) The absolute tolerance to use when comparing scores during early stopping. The higher the tolerance, the more likely we are to early @@ -857,16 +860,16 @@ class HistGradientBoostingClassifier(BaseHistGradientBoosting, string (see :ref:`scoring_parameter`) or a callable (see :ref:`scoring`). If None, the estimator's default scorer is used. If ``scoring='loss'``, early stopping is checked - w.r.t the loss value. Only used if ``n_iter_no_change`` is not None. + w.r.t the loss value. Only used if early stopping is performed. validation_fraction : int or float or None, optional (default=0.1) Proportion (or absolute size) of training data to set aside as validation data for early stopping. If None, early stopping is done on - the training data. Only used if ``early_stopping`` is True. + the training data. Only used if early stopping is performed. n_iter_no_change : int, optional (default=10) Used to determine when to "early stop". The fitting process is stopped when none of the last ``n_iter_no_change`` scores are better than the ``n_iter_no_change - 1`` -th-to-last one, up to some - tolerance. Ignored if ``early_stopping`` is False. + tolerance. Only used if early stopping is performed. tol : float or None, optional (default=1e-7) The absolute tolerance to use when comparing scores. The higher the tolerance, the more likely we are to early stop: higher tolerance From e08dc0bb047ac03cf549cb99df0b3b34e7710278 Mon Sep 17 00:00:00 2001 From: "johann.faouzi" Date: Tue, 6 Aug 2019 16:29:06 +0200 Subject: [PATCH 13/33] Remove n_iter_no_cjange in set_params --- sklearn/utils/estimator_checks.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index 10f5f4d76dfa8..c17d8f44bf8c2 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -388,9 +388,6 @@ def set_checking_parameters(estimator): # The default min_samples_leaf (20) isn't appropriate for small # datasets (only very shallow trees are built) that the checks use. estimator.set_params(min_samples_leaf=5) - # Early stopping is not appropriate for some tests in test_estimators - # because the actual training set is smaller than the given data - estimator.set_params(n_iter_no_change=None) # Speed-up by reducing the number of CV or splits for CV estimators loo_cv = ['RidgeCV'] From 30c7139eff86ecc657f31e6be1a78b897da03125 Mon Sep 17 00:00:00 2001 From: "johann.faouzi" Date: Tue, 6 Aug 2019 16:40:32 +0200 Subject: [PATCH 14/33] Update the tests in _hist... --- .../gradient_boosting.py | 11 ++-- .../tests/test_gradient_boosting.py | 66 +++++++++++-------- .../tests/test_warm_start.py | 35 +++++----- 3 files changed, 61 insertions(+), 51 deletions(-) diff --git a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py index 424df34ace4c3..48646247b040a 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py +++ b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py @@ -64,7 +64,7 @@ def _validate_parameters(self): if self.max_iter < 1: raise ValueError('max_iter={} must not be smaller ' 'than 1.'.format(self.max_iter)) - if self.n_iter_no_change is not None and self.n_iter_no_change < 0: + if self.n_iter_no_change < 0: raise ValueError('n_iter_no_change={} must be ' 'positive.'.format(self.n_iter_no_change)) if (self.validation_fraction is not None and @@ -108,7 +108,7 @@ def fit(self, X, y): self._rng = rng self._validate_parameters() - self.n_features_ = X.shape[1] # used for validation in predict() + n_samples, self.n_features_ = X.shape # used for validation in predict # we need this stateful variable to tell raw_predict() that it was # called from fit() (this current method), and that the data it has @@ -121,9 +121,10 @@ def fit(self, X, y): self._in_fit = True self.loss_ = self._get_loss() - - self.do_early_stopping_ = (self.n_iter_no_change is not None and - self.n_iter_no_change > 0) + if self.early_stopping == 'auto': + self.do_early_stopping_ = n_samples > 1000 + else: + self.do_early_stopping_ = self.early_stopping # create validation data if needed self._use_validation_data = self.validation_fraction is not None diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py index 327b0dbd9147a..8676483339f55 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py +++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py @@ -45,7 +45,7 @@ def test_init_parameters_validation(GradientBoosting, X, y, params, err_msg): def test_invalid_classification_loss(): binary_clf = HistGradientBoostingClassifier( - loss="binary_crossentropy", n_iter_no_change=None) + loss="binary_crossentropy") err_msg = ("loss='binary_crossentropy' is not defined for multiclass " "classification with n_classes=3, use " "loss='categorical_crossentropy' instead") @@ -54,27 +54,28 @@ def test_invalid_classification_loss(): @pytest.mark.parametrize( - 'scoring, validation_fraction, n_iter_no_change, tol', [ - ('neg_mean_squared_error', .1, 5, 1e-7), # use scorer - ('neg_mean_squared_error', None, 5, 1e-1), # use scorer on train data - (None, .1, 5, 1e-7), # same with default scorer - (None, None, 5, 1e-1), - ('loss', .1, 5, 1e-7), # use loss - ('loss', None, 5, 1e-1), # use loss on training data - (None, None, None, None), # no early stopping + 'scoring, validation_fraction, early_stopping, n_iter_no_change, tol', [ + ('neg_mean_squared_error', .1, True, 5, 1e-7), # use scorer + ('neg_mean_squared_error', None, True, 5, 1e-1), # use scorer on train + (None, .1, True, 5, 1e-7), # same with default scorer + (None, None, True, 5, 1e-1), + ('loss', .1, True, 5, 1e-7), # use loss + ('loss', None, True, 5, 1e-1), # use loss on training data + (None, None, False, 5, None), # no early stopping ]) def test_early_stopping_regression(scoring, validation_fraction, - n_iter_no_change, tol): + early_stopping, n_iter_no_change, tol): max_iter = 200 X, y = make_regression(n_samples=50, random_state=0) gb = HistGradientBoostingRegressor( - verbose=1, # just for coverage + verbose=0, # just for coverage min_samples_leaf=5, # easier to overfit fast scoring=scoring, tol=tol, + early_stopping=early_stopping, validation_fraction=validation_fraction, max_iter=max_iter, n_iter_no_change=n_iter_no_change, @@ -82,7 +83,7 @@ def test_early_stopping_regression(scoring, validation_fraction, ) gb.fit(X, y) - if n_iter_no_change is not None: + if early_stopping is True: assert n_iter_no_change <= gb.n_iter_ < max_iter else: assert gb.n_iter_ == max_iter @@ -94,27 +95,28 @@ def test_early_stopping_regression(scoring, validation_fraction, random_state=0) )) @pytest.mark.parametrize( - 'scoring, validation_fraction, n_iter_no_change, tol', [ - ('accuracy', .1, 5, 1e-7), # use scorer - ('accuracy', None, 5, 1e-1), # use scorer on training data - (None, .1, 5, 1e-7), # same with default scorerscor - (None, None, 5, 1e-1), - ('loss', .1, 5, 1e-7), # use loss - ('loss', None, 5, 1e-1), # use loss on training data - (None, None, None, None), # no early stopping + 'scoring, validation_fraction, early_stopping, n_iter_no_change, tol', [ + ('accuracy', .1, True, 5, 1e-7), # use scorer + ('accuracy', None, True, 5, 1e-1), # use scorer on training data + (None, .1, True, 5, 1e-7), # same with default scorer + (None, None, True, 5, 1e-1), + ('loss', .1, True, 5, 1e-7), # use loss + ('loss', None, True, 5, 1e-1), # use loss on training data + (None, None, False, 5, None), # no early stopping ]) def test_early_stopping_classification(data, scoring, validation_fraction, - n_iter_no_change, tol): + early_stopping, n_iter_no_change, tol): max_iter = 50 X, y = data gb = HistGradientBoostingClassifier( - verbose=1, # just for coverage + verbose=0, # just for coverage min_samples_leaf=5, # easier to overfit fast scoring=scoring, tol=tol, + early_stopping=early_stopping, validation_fraction=validation_fraction, max_iter=max_iter, n_iter_no_change=n_iter_no_change, @@ -122,7 +124,7 @@ def test_early_stopping_classification(data, scoring, validation_fraction, ) gb.fit(X, y) - if n_iter_no_change is not None: + if early_stopping is True: assert n_iter_no_change <= gb.n_iter_ < max_iter else: assert gb.n_iter_ == max_iter @@ -130,13 +132,19 @@ def test_early_stopping_classification(data, scoring, validation_fraction, @pytest.mark.parametrize('GradientBoosting, X, y', [ (HistGradientBoostingClassifier, X_classification, y_classification), - (HistGradientBoostingRegressor, X_regression, y_regression) + (HistGradientBoostingClassifier, *make_classification(n_samples=1001)), + (HistGradientBoostingRegressor, X_regression, y_regression), + (HistGradientBoostingRegressor, *make_regression(n_samples=1001)) ]) def test_early_stopping_default(GradientBoosting, X, y): - # Test that early stopping is enabled by default + # Test that early stopping is enabled by default if and only if there + # are more than 1000 samples gb = GradientBoosting(max_iter=200) gb.fit(X, y) - assert gb.n_iter_ < gb.max_iter + if X.shape[0] > 1000: + assert gb.n_iter_ < gb.max_iter + else: + assert gb.n_iter_ == gb.max_iter @pytest.mark.parametrize( @@ -169,7 +177,7 @@ def test_binning_train_validation_are_separated(): rng = np.random.RandomState(0) validation_fraction = .2 gb = HistGradientBoostingClassifier( - n_iter_no_change=5, + early_stopping=True, validation_fraction=validation_fraction, random_state=rng ) @@ -215,7 +223,7 @@ def test_small_trainset(): y = [[class_] * int(prop * n_samples) for (class_, prop) in original_distrib.items()] y = shuffle(np.concatenate(y)) - gb = HistGradientBoostingClassifier() + gb = HistGradientBoostingClassifier(early_stopping=False) # Compute the small training set X_small, y_small = gb._get_small_trainset(X, y, seed=42) @@ -240,6 +248,6 @@ def test_infinite_values(): y = np.array([0, 0, 1, 1]) gbdt = HistGradientBoostingRegressor(min_samples_leaf=1, - n_iter_no_change=None) + early_stopping=False) gbdt.fit(X, y) np.testing.assert_allclose(gbdt.predict(X), y, atol=1e-4) diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_warm_start.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_warm_start.py index b1ef4a292c4b6..0f3c04f4d6494 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_warm_start.py +++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_warm_start.py @@ -37,11 +37,11 @@ def test_max_iter_with_warm_start_validation(GradientBoosting, X, y): # is smaller than the number of iterations from the previous fit when warm # start is True. - estimator = GradientBoosting(max_iter=50, warm_start=True, - n_iter_no_change=None) + estimator = GradientBoosting(max_iter=10, early_stopping=False, + warm_start=True) estimator.fit(X, y) - estimator.set_params(max_iter=25) - err_msg = ('max_iter=25 must be larger than or equal to n_iter_=50 ' + estimator.set_params(max_iter=5) + err_msg = ('max_iter=5 must be larger than or equal to n_iter_=10 ' 'when warm_start==True') with pytest.raises(ValueError, match=err_msg): estimator.fit(X, y) @@ -76,14 +76,14 @@ def test_warm_start_yields_identical_results(GradientBoosting, X, y): ]) def test_warm_start_max_depth(GradientBoosting, X, y): # Test if possible to fit trees of different depth in ensemble. - gb = GradientBoosting(max_iter=100, min_samples_leaf=1, - warm_start=True, max_depth=2, n_iter_no_change=None) + gb = GradientBoosting(max_iter=20, min_samples_leaf=1, + warm_start=True, max_depth=2, early_stopping=False) gb.fit(X, y) - gb.set_params(max_iter=110, max_depth=3) + gb.set_params(max_iter=30, max_depth=3, n_iter_no_change=110) gb.fit(X, y) - # First 100 trees have max_depth == 2 - for i in range(100): + # First 20 trees have max_depth == 2 + for i in range(20): assert gb._predictors[i][0].get_max_depth() == 2 # Last 10 trees have max_depth == 3 for i in range(1, 11): @@ -100,8 +100,8 @@ def test_warm_start_early_stopping(GradientBoosting, X, y): n_iter_no_change = 5 gb = GradientBoosting( - n_iter_no_change=n_iter_no_change, max_iter=10000, - random_state=42, warm_start=True, tol=1e-3 + n_iter_no_change=n_iter_no_change, max_iter=200, + random_state=42, warm_start=True, tol=1e-3, early_stopping=False ) gb.fit(X, y) n_iter_first_fit = gb.n_iter_ @@ -116,12 +116,12 @@ def test_warm_start_early_stopping(GradientBoosting, X, y): ]) def test_warm_start_equal_n_estimators(GradientBoosting, X, y): # Test if warm start with equal n_estimators does nothing - gb_1 = GradientBoosting(max_depth=2, n_iter_no_change=None) + gb_1 = GradientBoosting(max_depth=2, n_iter_no_change=5) gb_1.fit(X, y) gb_2 = clone(gb_1) gb_2.set_params(max_iter=gb_1.max_iter, warm_start=True, - n_iter_no_change=None) + n_iter_no_change=5) gb_2.fit(X, y) # Check that both predictors are equal @@ -168,15 +168,16 @@ def _get_rng(rng_type): return np.random.RandomState(0) random_state = _get_rng(rng_type) - gb_1 = GradientBoosting(n_iter_no_change=5, max_iter=2, - random_state=random_state) + gb_1 = GradientBoosting(n_iter_no_change=5, early_stopping=True, + max_iter=2, random_state=random_state) gb_1.fit(X, y) train_val_seed_1 = gb_1._train_val_split_seed small_trainset_seed_1 = gb_1._small_trainset_seed random_state = _get_rng(rng_type) - gb_2 = GradientBoosting(n_iter_no_change=5, max_iter=2, - random_state=random_state, warm_start=True) + gb_2 = GradientBoosting(n_iter_no_change=5, early_stopping=True, + max_iter=2, random_state=random_state, + warm_start=True) gb_2.fit(X, y) # inits state train_val_seed_2 = gb_2._train_val_split_seed small_trainset_seed_2 = gb_2._small_trainset_seed From 36e9975cc3eeeebf01a572ac269422041a91ed99 Mon Sep 17 00:00:00 2001 From: "johann.faouzi" Date: Tue, 6 Aug 2019 16:42:33 +0200 Subject: [PATCH 15/33] Replace n_iter_no_change with early_stopping in test_partial_dependence --- sklearn/inspection/tests/test_partial_dependence.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/inspection/tests/test_partial_dependence.py b/sklearn/inspection/tests/test_partial_dependence.py index 26681b2579947..3c4dfcc3381e8 100644 --- a/sklearn/inspection/tests/test_partial_dependence.py +++ b/sklearn/inspection/tests/test_partial_dependence.py @@ -158,9 +158,9 @@ def test_grid_from_X_error(grid_resolution, percentiles, err_msg): (LinearRegression(), 'brute'), (GradientBoostingRegressor(random_state=0), 'brute'), (GradientBoostingRegressor(random_state=0), 'recursion'), - (HistGradientBoostingRegressor(random_state=0, n_iter_no_change=None), + (HistGradientBoostingRegressor(random_state=0, early_stopping=False), 'brute'), - (HistGradientBoostingRegressor(random_state=0, n_iter_no_change=None), + (HistGradientBoostingRegressor(random_state=0, early_stopping=False), 'recursion')] ) def test_partial_dependence_helpers(est, method, target_feature): From 081ee188838ef9167e9034ddaca5695f2e2ae802 Mon Sep 17 00:00:00 2001 From: "johann.faouzi" Date: Tue, 6 Aug 2019 16:44:01 +0200 Subject: [PATCH 16/33] Remove early_stopping in partial_dependence --- sklearn/inspection/tests/test_partial_dependence.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/inspection/tests/test_partial_dependence.py b/sklearn/inspection/tests/test_partial_dependence.py index 3c4dfcc3381e8..16ed23a790a8b 100644 --- a/sklearn/inspection/tests/test_partial_dependence.py +++ b/sklearn/inspection/tests/test_partial_dependence.py @@ -158,9 +158,9 @@ def test_grid_from_X_error(grid_resolution, percentiles, err_msg): (LinearRegression(), 'brute'), (GradientBoostingRegressor(random_state=0), 'brute'), (GradientBoostingRegressor(random_state=0), 'recursion'), - (HistGradientBoostingRegressor(random_state=0, early_stopping=False), + (HistGradientBoostingRegressor(random_state=0), 'brute'), - (HistGradientBoostingRegressor(random_state=0, early_stopping=False), + (HistGradientBoostingRegressor(random_state=0), 'recursion')] ) def test_partial_dependence_helpers(est, method, target_feature): From 72681fdaf86e067007ab598985bc3b772248a18c Mon Sep 17 00:00:00 2001 From: "johann.faouzi" Date: Tue, 6 Aug 2019 16:45:13 +0200 Subject: [PATCH 17/33] One line is enough --- sklearn/inspection/tests/test_partial_dependence.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/sklearn/inspection/tests/test_partial_dependence.py b/sklearn/inspection/tests/test_partial_dependence.py index 16ed23a790a8b..19399224e07ba 100644 --- a/sklearn/inspection/tests/test_partial_dependence.py +++ b/sklearn/inspection/tests/test_partial_dependence.py @@ -158,10 +158,8 @@ def test_grid_from_X_error(grid_resolution, percentiles, err_msg): (LinearRegression(), 'brute'), (GradientBoostingRegressor(random_state=0), 'brute'), (GradientBoostingRegressor(random_state=0), 'recursion'), - (HistGradientBoostingRegressor(random_state=0), - 'brute'), - (HistGradientBoostingRegressor(random_state=0), - 'recursion')] + (HistGradientBoostingRegressor(random_state=0), 'brute'), + (HistGradientBoostingRegressor(random_state=0), 'recursion')] ) def test_partial_dependence_helpers(est, method, target_feature): # Check that what is returned by _partial_dependence_brute or From eda29c8a9821bdce0551034bc472281ec967ef4c Mon Sep 17 00:00:00 2001 From: Johann Faouzi Date: Thu, 22 Aug 2019 13:59:20 +0200 Subject: [PATCH 18/33] Use 10k sample threshold for auto early stopping and resolve conflicts --- .../gradient_boosting.py | 32 ++++++++++--------- .../tests/test_gradient_boosting.py | 8 ++--- 2 files changed, 21 insertions(+), 19 deletions(-) diff --git a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py index 48646247b040a..fcc0c54cc3208 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py +++ b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py @@ -122,7 +122,7 @@ def fit(self, X, y): self.loss_ = self._get_loss() if self.early_stopping == 'auto': - self.do_early_stopping_ = n_samples > 1000 + self.do_early_stopping_ = n_samples > 10000 else: self.do_early_stopping_ = self.early_stopping @@ -666,12 +666,13 @@ class HistGradientBoostingRegressor(BaseHistGradientBoosting, RegressorMixin): l2_regularization : float, optional (default=0) The L2 regularization parameter. Use ``0`` for no regularization (default). - max_bins : int, optional (default=256) - The maximum number of bins to use. Before training, each feature of - the input array ``X`` is binned into at most ``max_bins`` bins, which - allows for a much faster training stage. Features with a small - number of unique values may use less than ``max_bins`` bins. Must be no - larger than 256. + max_bins : int, optional (default=255) + The maximum number of bins to use for non-missing values. Before + training, each feature of the input array `X` is binned into + integer-valued bins, which allows for a much faster training stage. + Features with a small number of unique values may use less than + ``max_bins`` bins. In addition to the ``max_bins`` bins, one more bin + is always reserved for missing values. Must be no larger than 255. early_stopping : 'auto' or bool (default='auto') If 'auto', early stopping is enabled if the sample size is larger than 1000. If True, early stopping is enabled, otherwise early stopping is @@ -746,7 +747,7 @@ class HistGradientBoostingRegressor(BaseHistGradientBoosting, RegressorMixin): def __init__(self, loss='least_squares', learning_rate=0.1, max_iter=100, max_leaf_nodes=31, max_depth=None, - min_samples_leaf=20, l2_regularization=0., max_bins=256, + min_samples_leaf=20, l2_regularization=0., max_bins=255, early_stopping='auto', warm_start=False, scoring=None, validation_fraction=0.1, n_iter_no_change=10, tol=1e-7, verbose=0, random_state=None): @@ -841,12 +842,13 @@ class HistGradientBoostingClassifier(BaseHistGradientBoosting, since only very shallow trees would be built. l2_regularization : float, optional (default=0) The L2 regularization parameter. Use 0 for no regularization. - max_bins : int, optional (default=256) - The maximum number of bins to use. Before training, each feature of - the input array ``X`` is binned into at most ``max_bins`` bins, which - allows for a much faster training stage. Features with a small - number of unique values may use less than ``max_bins`` bins. Must be no - larger than 256. + max_bins : int, optional (default=255) + The maximum number of bins to use for non-missing values. Before + training, each feature of the input array `X` is binned into + integer-valued bins, which allows for a much faster training stage. + Features with a small number of unique values may use less than + ``max_bins`` bins. In addition to the ``max_bins`` bins, one more bin + is always reserved for missing values. Must be no larger than 255. early_stopping : 'auto' or bool (default='auto') If 'auto', early stopping is enabled if the sample size is larger than 1000. If True, early stopping is enabled, otherwise early stopping is @@ -923,7 +925,7 @@ class HistGradientBoostingClassifier(BaseHistGradientBoosting, def __init__(self, loss='auto', learning_rate=0.1, max_iter=100, max_leaf_nodes=31, max_depth=None, min_samples_leaf=20, - l2_regularization=0., max_bins=256, early_stopping='auto', + l2_regularization=0., max_bins=255, early_stopping='auto', warm_start=False, scoring=None, validation_fraction=0.1, n_iter_no_change=10, tol=1e-7, verbose=0, random_state=None): super(HistGradientBoostingClassifier, self).__init__( diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py index 8676483339f55..c89a0d2079beb 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py +++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py @@ -132,16 +132,16 @@ def test_early_stopping_classification(data, scoring, validation_fraction, @pytest.mark.parametrize('GradientBoosting, X, y', [ (HistGradientBoostingClassifier, X_classification, y_classification), - (HistGradientBoostingClassifier, *make_classification(n_samples=1001)), + (HistGradientBoostingClassifier, *make_classification(n_samples=10001)), (HistGradientBoostingRegressor, X_regression, y_regression), - (HistGradientBoostingRegressor, *make_regression(n_samples=1001)) + (HistGradientBoostingRegressor, *make_regression(n_samples=10001)) ]) def test_early_stopping_default(GradientBoosting, X, y): # Test that early stopping is enabled by default if and only if there - # are more than 1000 samples + # are more than 10000 samples gb = GradientBoosting(max_iter=200) gb.fit(X, y) - if X.shape[0] > 1000: + if X.shape[0] > 10000: assert gb.n_iter_ < gb.max_iter else: assert gb.n_iter_ == gb.max_iter From 3c4cfeaca743915ee95ee5aa19dab339c86bf513 Mon Sep 17 00:00:00 2001 From: Johann Faouzi Date: Thu, 22 Aug 2019 14:51:37 +0200 Subject: [PATCH 19/33] Increase the maximum number of iterations to check early stopping --- .../_hist_gradient_boosting/tests/test_gradient_boosting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py index c6a45d96303e2..a9f9e8eaf1081 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py +++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py @@ -144,7 +144,7 @@ def test_early_stopping_classification(data, scoring, validation_fraction, def test_early_stopping_default(GradientBoosting, X, y): # Test that early stopping is enabled by default if and only if there # are more than 10000 samples - gb = GradientBoosting(max_iter=200) + gb = GradientBoosting(min_samples_leaf=50, max_iter=1000) gb.fit(X, y) if X.shape[0] > 10000: assert gb.n_iter_ < gb.max_iter From 73f6756221e4cb1b3d63cdb527ea00e21aa560b0 Mon Sep 17 00:00:00 2001 From: Johann Faouzi Date: Thu, 22 Aug 2019 15:30:41 +0200 Subject: [PATCH 20/33] Fix min number of samples for early stopping in docstrings --- sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py index 161e3dd4311b4..b6bf86115a316 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py +++ b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py @@ -704,7 +704,7 @@ class HistGradientBoostingRegressor(BaseHistGradientBoosting, RegressorMixin): is always reserved for missing values. Must be no larger than 255. early_stopping : 'auto' or bool (default='auto') If 'auto', early stopping is enabled if the sample size is larger than - 1000. If True, early stopping is enabled, otherwise early stopping is + 10000. If True, early stopping is enabled, otherwise early stopping is disabled. warm_start : bool, optional (default=False) When set to ``True``, reuse the solution of the previous call to fit @@ -888,7 +888,7 @@ class HistGradientBoostingClassifier(BaseHistGradientBoosting, is always reserved for missing values. Must be no larger than 255. early_stopping : 'auto' or bool (default='auto') If 'auto', early stopping is enabled if the sample size is larger than - 1000. If True, early stopping is enabled, otherwise early stopping is + 10000. If True, early stopping is enabled, otherwise early stopping is disabled. warm_start : bool, optional (default=False) When set to ``True``, reuse the solution of the previous call to fit From df72eef5d8742a2a64eb5037279119c3134b0d1a Mon Sep 17 00:00:00 2001 From: Johann Faouzi Date: Thu, 22 Aug 2019 16:51:24 +0200 Subject: [PATCH 21/33] Update ensemble.rst with new early stopping behavior --- doc/modules/ensemble.rst | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/doc/modules/ensemble.rst b/doc/modules/ensemble.rst index fde8f40db6c8c..4d61a73e5a203 100644 --- a/doc/modules/ensemble.rst +++ b/doc/modules/ensemble.rst @@ -888,11 +888,13 @@ generally recommended to use as many bins as possible, which is the default. The ``l2_regularization`` parameter is a regularizer on the loss function and corresponds to :math:`\lambda` in equation (2) of [XGBoost]_. -Note that **early-stopping is enabled by default**. The early-stopping -behaviour is controlled via the ``scoring``, ``validation_fraction``, +Note that **early-stopping is enabled by default if the number of samples is +larger than 10,000**. The early-stopping behaviour is controlled via the +``early-stopping``, ``scoring``, ``validation_fraction``, ``n_iter_no_change``, and ``tol`` parameters. It is possible to early-stop using an arbitrary :term:`scorer`, or just the training or validation loss. By -default, early-stopping is performed using the default :term:`scorer` of +default, early-stopping is performed if there are at least 10,000 samples in +the training set, using the default :term:`scorer` of the estimator on a validation set. Missing values support @@ -1179,7 +1181,7 @@ The following example shows how to fit the VotingRegressor:: >>> # Loading some example data >>> X, y = load_boston(return_X_y=True) - + >>> # Training classifiers >>> reg1 = GradientBoostingRegressor(random_state=1, n_estimators=10) >>> reg2 = RandomForestRegressor(random_state=1, n_estimators=10) From 99a830f270709f2570868f1687a9f103ec9e736e Mon Sep 17 00:00:00 2001 From: Johann Faouzi Date: Tue, 3 Sep 2019 16:36:51 +0200 Subject: [PATCH 22/33] Update the user guide with the new default scoring --- doc/modules/ensemble.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/modules/ensemble.rst b/doc/modules/ensemble.rst index 4d61a73e5a203..4783c613175c1 100644 --- a/doc/modules/ensemble.rst +++ b/doc/modules/ensemble.rst @@ -894,8 +894,7 @@ larger than 10,000**. The early-stopping behaviour is controlled via the ``n_iter_no_change``, and ``tol`` parameters. It is possible to early-stop using an arbitrary :term:`scorer`, or just the training or validation loss. By default, early-stopping is performed if there are at least 10,000 samples in -the training set, using the default :term:`scorer` of -the estimator on a validation set. +the training set, using the validation loss. Missing values support ---------------------- From f019d47fec92e3bb4ac93a13f439816c586fa325 Mon Sep 17 00:00:00 2001 From: Johann Faouzi Date: Tue, 3 Sep 2019 16:39:39 +0200 Subject: [PATCH 23/33] Update code after reviews --- .../gradient_boosting.py | 37 ++++++++++--------- .../tests/test_gradient_boosting.py | 30 +++++++++------ .../tests/test_warm_start.py | 14 ++++--- 3 files changed, 45 insertions(+), 36 deletions(-) diff --git a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py index b6bf86115a316..e803dd5a00031 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py +++ b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py @@ -28,7 +28,7 @@ class BaseHistGradientBoosting(BaseEstimator, ABC): @abstractmethod def __init__(self, loss, learning_rate, max_iter, max_leaf_nodes, max_depth, min_samples_leaf, l2_regularization, max_bins, - early_stopping, warm_start, scoring, validation_fraction, + warm_start, early_stopping, scoring, validation_fraction, n_iter_no_change, tol, verbose, random_state): self.loss = loss self.learning_rate = learning_rate @@ -38,8 +38,8 @@ def __init__(self, loss, learning_rate, max_iter, max_leaf_nodes, self.min_samples_leaf = min_samples_leaf self.l2_regularization = l2_regularization self.max_bins = max_bins - self.early_stopping = early_stopping self.warm_start = warm_start + self.early_stopping = early_stopping self.scoring = scoring self.validation_fraction = validation_fraction self.n_iter_no_change = n_iter_no_change @@ -702,16 +702,16 @@ class HistGradientBoostingRegressor(BaseHistGradientBoosting, RegressorMixin): Features with a small number of unique values may use less than ``max_bins`` bins. In addition to the ``max_bins`` bins, one more bin is always reserved for missing values. Must be no larger than 255. - early_stopping : 'auto' or bool (default='auto') - If 'auto', early stopping is enabled if the sample size is larger than - 10000. If True, early stopping is enabled, otherwise early stopping is - disabled. warm_start : bool, optional (default=False) When set to ``True``, reuse the solution of the previous call to fit and add more estimators to the ensemble. For results to be valid, the estimator should be re-trained on the same data only. See :term:`the Glossary `. - scoring : str or callable or None, optional (default=None) + early_stopping : 'auto' or bool (default='auto') + If 'auto', early stopping is enabled if the sample size is larger than + 10000. If True, early stopping is enabled, otherwise early stopping is + disabled. + scoring : str or callable or None, optional (default='loss') Scoring parameter to use for early stopping. It can be a single string (see :ref:`scoring_parameter`) or a callable (see :ref:`scoring`). If None, the estimator's default scorer is used. If @@ -777,7 +777,7 @@ class HistGradientBoostingRegressor(BaseHistGradientBoosting, RegressorMixin): def __init__(self, loss='least_squares', learning_rate=0.1, max_iter=100, max_leaf_nodes=31, max_depth=None, min_samples_leaf=20, l2_regularization=0., max_bins=255, - early_stopping='auto', warm_start=False, scoring=None, + warm_start=False, early_stopping='auto', scoring='loss', validation_fraction=0.1, n_iter_no_change=10, tol=1e-7, verbose=0, random_state=None): super(HistGradientBoostingRegressor, self).__init__( @@ -785,7 +785,7 @@ def __init__(self, loss='least_squares', learning_rate=0.1, max_leaf_nodes=max_leaf_nodes, max_depth=max_depth, min_samples_leaf=min_samples_leaf, l2_regularization=l2_regularization, max_bins=max_bins, - early_stopping=early_stopping, warm_start=warm_start, + warm_start=warm_start, early_stopping=early_stopping, scoring=scoring, validation_fraction=validation_fraction, n_iter_no_change=n_iter_no_change, tol=tol, verbose=verbose, random_state=random_state) @@ -886,16 +886,16 @@ class HistGradientBoostingClassifier(BaseHistGradientBoosting, Features with a small number of unique values may use less than ``max_bins`` bins. In addition to the ``max_bins`` bins, one more bin is always reserved for missing values. Must be no larger than 255. - early_stopping : 'auto' or bool (default='auto') - If 'auto', early stopping is enabled if the sample size is larger than - 10000. If True, early stopping is enabled, otherwise early stopping is - disabled. warm_start : bool, optional (default=False) When set to ``True``, reuse the solution of the previous call to fit and add more estimators to the ensemble. For results to be valid, the estimator should be re-trained on the same data only. See :term:`the Glossary `. - scoring : str or callable or None, optional (default=None) + early_stopping : 'auto' or bool (default='auto') + If 'auto', early stopping is enabled if the sample size is larger than + 10000. If True, early stopping is enabled, otherwise early stopping is + disabled. + scoring : str or callable or None, optional (default='loss') Scoring parameter to use for early stopping. It can be a single string (see :ref:`scoring_parameter`) or a callable (see :ref:`scoring`). If None, the estimator's default scorer @@ -962,15 +962,16 @@ class HistGradientBoostingClassifier(BaseHistGradientBoosting, def __init__(self, loss='auto', learning_rate=0.1, max_iter=100, max_leaf_nodes=31, max_depth=None, min_samples_leaf=20, - l2_regularization=0., max_bins=255, early_stopping='auto', - warm_start=False, scoring=None, validation_fraction=0.1, - n_iter_no_change=10, tol=1e-7, verbose=0, random_state=None): + l2_regularization=0., max_bins=255, warm_start=False, + early_stopping='auto', scoring='loss', + validation_fraction=0.1, n_iter_no_change=10, tol=1e-7, + verbose=0, random_state=None): super(HistGradientBoostingClassifier, self).__init__( loss=loss, learning_rate=learning_rate, max_iter=max_iter, max_leaf_nodes=max_leaf_nodes, max_depth=max_depth, min_samples_leaf=min_samples_leaf, l2_regularization=l2_regularization, max_bins=max_bins, - early_stopping=early_stopping, warm_start=warm_start, + warm_start=warm_start, early_stopping=early_stopping, scoring=scoring, validation_fraction=validation_fraction, n_iter_no_change=n_iter_no_change, tol=tol, verbose=verbose, random_state=random_state) diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py index a9f9e8eaf1081..2ed0f93228343 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py +++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py @@ -19,6 +19,14 @@ X_regression, y_regression = make_regression(random_state=0) +def _make_dumb_dataset(n_samples): + """Make a dumb dataset to test early stopping.""" + rng = np.random.RandomState(42) + X_dumb = rng.randn(n_samples, 1) + y_dumb = (X_dumb[:, 0] > 0).astype('int64') + return X_dumb, y_dumb + + @pytest.mark.parametrize('GradientBoosting, X, y', [ (HistGradientBoostingClassifier, X_classification, y_classification), (HistGradientBoostingRegressor, X_regression, y_regression) @@ -49,8 +57,7 @@ def test_init_parameters_validation(GradientBoosting, X, y, params, err_msg): def test_invalid_classification_loss(): - binary_clf = HistGradientBoostingClassifier( - loss="binary_crossentropy") + binary_clf = HistGradientBoostingClassifier(loss="binary_crossentropy") err_msg = ("loss='binary_crossentropy' is not defined for multiclass " "classification with n_classes=3, use " "loss='categorical_crossentropy' instead") @@ -76,7 +83,7 @@ def test_early_stopping_regression(scoring, validation_fraction, X, y = make_regression(n_samples=50, random_state=0) gb = HistGradientBoostingRegressor( - verbose=0, # just for coverage + verbose=1, # just for coverage min_samples_leaf=5, # easier to overfit fast scoring=scoring, tol=tol, @@ -88,7 +95,7 @@ def test_early_stopping_regression(scoring, validation_fraction, ) gb.fit(X, y) - if early_stopping is True: + if early_stopping: assert n_iter_no_change <= gb.n_iter_ < max_iter else: assert gb.n_iter_ == max_iter @@ -117,7 +124,7 @@ def test_early_stopping_classification(data, scoring, validation_fraction, X, y = data gb = HistGradientBoostingClassifier( - verbose=0, # just for coverage + verbose=1, # just for coverage min_samples_leaf=5, # easier to overfit fast scoring=scoring, tol=tol, @@ -136,15 +143,15 @@ def test_early_stopping_classification(data, scoring, validation_fraction, @pytest.mark.parametrize('GradientBoosting, X, y', [ - (HistGradientBoostingClassifier, X_classification, y_classification), - (HistGradientBoostingClassifier, *make_classification(n_samples=10001)), - (HistGradientBoostingRegressor, X_regression, y_regression), - (HistGradientBoostingRegressor, *make_regression(n_samples=10001)) + (HistGradientBoostingClassifier, *_make_dumb_dataset(10000)), + (HistGradientBoostingClassifier, *_make_dumb_dataset(10001)), + (HistGradientBoostingRegressor, *_make_dumb_dataset(10000)), + (HistGradientBoostingRegressor, *_make_dumb_dataset(10001)) ]) def test_early_stopping_default(GradientBoosting, X, y): # Test that early stopping is enabled by default if and only if there # are more than 10000 samples - gb = GradientBoosting(min_samples_leaf=50, max_iter=1000) + gb = GradientBoosting(max_iter=10, n_iter_no_change=2, tol=1e-1) gb.fit(X, y) if X.shape[0] > 10000: assert gb.n_iter_ < gb.max_iter @@ -415,8 +422,7 @@ def test_infinite_values(): X = np.array([-np.inf, 0, 1, np.inf]).reshape(-1, 1) y = np.array([0, 0, 1, 1]) - gbdt = HistGradientBoostingRegressor(min_samples_leaf=1, - early_stopping=False) + gbdt = HistGradientBoostingRegressor(min_samples_leaf=1) gbdt.fit(X, y) np.testing.assert_allclose(gbdt.predict(X), y, atol=1e-4) diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_warm_start.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_warm_start.py index 0f3c04f4d6494..fbb20fcba1aef 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_warm_start.py +++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_warm_start.py @@ -11,6 +11,7 @@ from sklearn.experimental import enable_hist_gradient_boosting # noqa from sklearn.ensemble import HistGradientBoostingRegressor from sklearn.ensemble import HistGradientBoostingClassifier +from sklearn.metrics import check_scoring X_classification, y_classification = make_classification(random_state=0) @@ -116,7 +117,7 @@ def test_warm_start_early_stopping(GradientBoosting, X, y): ]) def test_warm_start_equal_n_estimators(GradientBoosting, X, y): # Test if warm start with equal n_estimators does nothing - gb_1 = GradientBoosting(max_depth=2, n_iter_no_change=5) + gb_1 = GradientBoosting(max_depth=2, early_stopping=False) gb_1.fit(X, y) gb_2 = clone(gb_1) @@ -168,16 +169,17 @@ def _get_rng(rng_type): return np.random.RandomState(0) random_state = _get_rng(rng_type) - gb_1 = GradientBoosting(n_iter_no_change=5, early_stopping=True, - max_iter=2, random_state=random_state) + gb_1 = GradientBoosting(early_stopping=True, max_iter=2, + random_state=random_state) + gb_1.set_params(scoring=check_scoring(gb_1)) gb_1.fit(X, y) train_val_seed_1 = gb_1._train_val_split_seed small_trainset_seed_1 = gb_1._small_trainset_seed random_state = _get_rng(rng_type) - gb_2 = GradientBoosting(n_iter_no_change=5, early_stopping=True, - max_iter=2, random_state=random_state, - warm_start=True) + gb_2 = GradientBoosting(early_stopping=True, max_iter=2, + random_state=random_state, warm_start=True) + gb_2.set_params(scoring=check_scoring(gb_2)) gb_2.fit(X, y) # inits state train_val_seed_2 = gb_2._train_val_split_seed small_trainset_seed_2 = gb_2._small_trainset_seed From 847ed81b7539d2f99e640929dff90c75c86b02a8 Mon Sep 17 00:00:00 2001 From: Johann Faouzi Date: Thu, 14 Nov 2019 09:35:12 +0100 Subject: [PATCH 24/33] Fix issues in tests --- .../_hist_gradient_boosting/tests/test_gradient_boosting.py | 2 +- .../ensemble/_hist_gradient_boosting/tests/test_warm_start.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py index 2ed0f93228343..767381cb8ab29 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py +++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py @@ -290,7 +290,7 @@ def test_small_trainset(): y = [[class_] * int(prop * n_samples) for (class_, prop) in original_distrib.items()] y = shuffle(np.concatenate(y)) - gb = HistGradientBoostingClassifier(early_stopping=False) + gb = HistGradientBoostingClassifier() # Compute the small training set X_small, y_small = gb._get_small_trainset(X, y, seed=42) diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_warm_start.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_warm_start.py index fbb20fcba1aef..0025080aa274d 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_warm_start.py +++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_warm_start.py @@ -102,7 +102,7 @@ def test_warm_start_early_stopping(GradientBoosting, X, y): n_iter_no_change = 5 gb = GradientBoosting( n_iter_no_change=n_iter_no_change, max_iter=200, - random_state=42, warm_start=True, tol=1e-3, early_stopping=False + random_state=42, warm_start=True, tol=1e-3, early_stopping=True ) gb.fit(X, y) n_iter_first_fit = gb.n_iter_ From 39a69db43f665a18acd0a0d46beaef91183fc24a Mon Sep 17 00:00:00 2001 From: Johann Faouzi Date: Thu, 14 Nov 2019 09:44:08 +0100 Subject: [PATCH 25/33] Update what's new --- doc/whats_new/v0.22.rst | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/doc/whats_new/v0.22.rst b/doc/whats_new/v0.22.rst index 38d8cb2e3285d..4755e1257733f 100644 --- a/doc/whats_new/v0.22.rst +++ b/doc/whats_new/v0.22.rst @@ -127,6 +127,10 @@ Changelog training loss or score is now monitored on a class-wise stratified subsample to preserve the class balance of the original training set. :pr:`14194` by :user:`Johann Faouzi `. + - |Feature| Early stopping is now determined with a new `early_stopping` + parameter instead of `n_iter_no_change`. Default value is 'auto', which + enables early stopping if there are at least 10,000 samples in the + training set. :pr:`14516` by :user:`Johann Faouzi `. - |Feature| :func:`inspection.partial_dependence` and :func:`inspection.plot_partial_dependence` now support the fast 'recursion' method for both estimators. :pr:`13769` by `Nicolas Hug`_. @@ -298,19 +302,19 @@ Changelog - |Enhancement| SVM now throws more specific error when fit on non-square data and kernel = precomputed. :class:`svm.BaseLibSVM` :pr:`14336` by :user:`Gregory Dexter `. - + :mod:`sklearn.tree` ................... - |Feature| Adds minimal cost complexity pruning, controlled by ``ccp_alpha``, to :class:`tree.DecisionTreeClassifier`, :class:`tree.DecisionTreeRegressor`, :class:`tree.ExtraTreeClassifier`, :class:`tree.ExtraTreeRegressor`, - :class:`ensemble.RandomForestClassifier`, + :class:`ensemble.RandomForestClassifier`, :class:`ensemble.RandomForestRegressor`, - :class:`ensemble.ExtraTreesClassifier`, + :class:`ensemble.ExtraTreesClassifier`, :class:`ensemble.ExtraTreesRegressor`, - :class:`ensemble.RandomTreesEmbedding`, - :class:`ensemble.GradientBoostingClassifier`, + :class:`ensemble.RandomTreesEmbedding`, + :class:`ensemble.GradientBoostingClassifier`, and :class:`ensemble.GradientBoostingRegressor`. :pr:`12887` by `Thomas Fan`_. From 5b366d92aa378cab73796abdbf49f012b7e72f0d Mon Sep 17 00:00:00 2001 From: Johann Faouzi Date: Thu, 14 Nov 2019 10:52:57 +0100 Subject: [PATCH 26/33] Make raw_predictions and raw_predictions_val private attributes --- .../gradient_boosting.py | 31 ++++++++++--------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py index e803dd5a00031..9a40aba76859d 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py +++ b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py @@ -186,11 +186,11 @@ def fit(self, X, y): self._baseline_prediction = self.loss_.get_baseline_prediction( y_train, self.n_trees_per_iteration_ ) - raw_predictions = np.zeros( + self._raw_predictions = np.zeros( shape=(self.n_trees_per_iteration_, n_samples), dtype=self._baseline_prediction.dtype ) - raw_predictions += self._baseline_prediction + self._raw_predictions += self._baseline_prediction # initialize gradients and hessians (empty arrays). # shape = (n_trees_per_iteration, n_samples). @@ -205,7 +205,8 @@ def fit(self, X, y): # Initialize structures and attributes related to early stopping self.scorer_ = None # set if scoring != loss - raw_predictions_val = None # set if scoring == loss and use val + # set if scoring == loss and use val + self._raw_predictions_val = None self.train_score_ = [] self.validation_score_ = [] @@ -224,16 +225,18 @@ def fit(self, X, y): # the validation data. if self._use_validation_data: - raw_predictions_val = np.zeros( + self._raw_predictions_val = np.zeros( shape=(self.n_trees_per_iteration_, X_binned_val.shape[0]), dtype=self._baseline_prediction.dtype ) - raw_predictions_val += self._baseline_prediction + self._raw_predictions_val += self._baseline_prediction - self._check_early_stopping_loss(raw_predictions, y_train, - raw_predictions_val, y_val) + self._check_early_stopping_loss( + self._raw_predictions, y_train, + self._raw_predictions_val, y_val + ) else: self.scorer_ = check_scoring(self, self.scoring) # scorer_ is a callable with signature (est, X, y) and @@ -273,7 +276,7 @@ def fit(self, X, y): self.validation_score_ = self.validation_score_.tolist() # Compute raw predictions - raw_predictions = self._raw_predict(X_binned_train) + self._raw_predictions = self._raw_predict(X_binned_train) if self.do_early_stopping_ and self.scoring != 'loss': # Compute the subsample set @@ -299,8 +302,8 @@ def fit(self, X, y): end='', flush=True) # Update gradients and hessians, inplace - self.loss_.update_gradients_and_hessians(gradients, hessians, - y_train, raw_predictions) + self.loss_.update_gradients_and_hessians( + gradients, hessians, y_train, self._raw_predictions) # Append a list since there may be more than 1 predictor per iter predictors.append([]) @@ -332,7 +335,7 @@ def fit(self, X, y): # Update raw_predictions with the predictions of the newly # created tree. tic_pred = time() - _update_raw_predictions(raw_predictions[k, :], grower) + _update_raw_predictions(self._raw_predictions[k, :], grower) toc_pred = time() acc_prediction_time += toc_pred - tic_pred @@ -342,7 +345,7 @@ def fit(self, X, y): # Update raw_predictions_val with the newest tree(s) if self._use_validation_data: for k, pred in enumerate(self._predictors[-1]): - raw_predictions_val[k, :] += ( + self._raw_predictions_val[k, :] += ( pred.predict_binned( X_binned_val, self.bin_mapper_.missing_values_bin_idx_ @@ -350,8 +353,8 @@ def fit(self, X, y): ) should_early_stop = self._check_early_stopping_loss( - raw_predictions, y_train, - raw_predictions_val, y_val + self._raw_predictions, y_train, + self._raw_predictions_val, y_val ) else: From 7f81df7b3f671ff6b34fb81046d09b3e454bbd71 Mon Sep 17 00:00:00 2001 From: Johann Faouzi Date: Fri, 15 Nov 2019 15:04:03 +0100 Subject: [PATCH 27/33] Remove private attributes for raw predictions --- .../gradient_boosting.py | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py index 8d7feef08f555..bfb48614d10ea 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py +++ b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py @@ -187,11 +187,11 @@ def fit(self, X, y): self._baseline_prediction = self.loss_.get_baseline_prediction( y_train, self.n_trees_per_iteration_ ) - self._raw_predictions = np.zeros( + raw_predictions = np.zeros( shape=(self.n_trees_per_iteration_, n_samples), dtype=self._baseline_prediction.dtype ) - self._raw_predictions += self._baseline_prediction + raw_predictions += self._baseline_prediction # initialize gradients and hessians (empty arrays). # shape = (n_trees_per_iteration, n_samples). @@ -207,7 +207,7 @@ def fit(self, X, y): # Initialize structures and attributes related to early stopping self.scorer_ = None # set if scoring != loss # set if scoring == loss and use val - self._raw_predictions_val = None + raw_predictions_val = None self.train_score_ = [] self.validation_score_ = [] @@ -226,17 +226,17 @@ def fit(self, X, y): # the validation data. if self._use_validation_data: - self._raw_predictions_val = np.zeros( + raw_predictions_val = np.zeros( shape=(self.n_trees_per_iteration_, X_binned_val.shape[0]), dtype=self._baseline_prediction.dtype ) - self._raw_predictions_val += self._baseline_prediction + raw_predictions_val += self._baseline_prediction self._check_early_stopping_loss( - self._raw_predictions, y_train, - self._raw_predictions_val, y_val + raw_predictions, y_train, + raw_predictions_val, y_val ) else: self.scorer_ = check_scoring(self, self.scoring) @@ -274,7 +274,7 @@ def fit(self, X, y): self.validation_score_ = self.validation_score_.tolist() # Compute raw predictions - self._raw_predictions = self._raw_predict(X_binned_train) + raw_predictions = self._raw_predict(X_binned_train) if self.do_early_stopping_ and self.scoring != 'loss': # Compute the subsample set @@ -301,7 +301,7 @@ def fit(self, X, y): # Update gradients and hessians, inplace self.loss_.update_gradients_and_hessians( - gradients, hessians, y_train, self._raw_predictions) + gradients, hessians, y_train, raw_predictions) # Append a list since there may be more than 1 predictor per iter predictors.append([]) @@ -337,7 +337,7 @@ def fit(self, X, y): # Update raw_predictions with the predictions of the newly # created tree. tic_pred = time() - _update_raw_predictions(self._raw_predictions[k, :], grower) + _update_raw_predictions(raw_predictions[k, :], grower) toc_pred = time() acc_prediction_time += toc_pred - tic_pred @@ -347,7 +347,7 @@ def fit(self, X, y): # Update raw_predictions_val with the newest tree(s) if self._use_validation_data: for k, pred in enumerate(self._predictors[-1]): - self._raw_predictions_val[k, :] += ( + raw_predictions_val[k, :] += ( pred.predict_binned( X_binned_val, self.bin_mapper_.missing_values_bin_idx_ @@ -355,8 +355,8 @@ def fit(self, X, y): ) should_early_stop = self._check_early_stopping_loss( - self._raw_predictions, y_train, - self._raw_predictions_val, y_val + raw_predictions, y_train, + raw_predictions_val, y_val ) else: From 58c9bceb2ff5580af62e9a94023bafd2f8345308 Mon Sep 17 00:00:00 2001 From: Johann Faouzi Date: Fri, 15 Nov 2019 15:06:52 +0100 Subject: [PATCH 28/33] Revert changes --- .../gradient_boosting.py | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py index bfb48614d10ea..153b3a7d09e3c 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py +++ b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py @@ -206,8 +206,7 @@ def fit(self, X, y): # Initialize structures and attributes related to early stopping self.scorer_ = None # set if scoring != loss - # set if scoring == loss and use val - raw_predictions_val = None + raw_predictions_val = None # set if scoring == loss and use val self.train_score_ = [] self.validation_score_ = [] @@ -234,10 +233,8 @@ def fit(self, X, y): raw_predictions_val += self._baseline_prediction - self._check_early_stopping_loss( - raw_predictions, y_train, - raw_predictions_val, y_val - ) + self._check_early_stopping_loss(raw_predictions, y_train, + raw_predictions_val, y_val) else: self.scorer_ = check_scoring(self, self.scoring) # scorer_ is a callable with signature (est, X, y) and @@ -300,8 +297,8 @@ def fit(self, X, y): end='', flush=True) # Update gradients and hessians, inplace - self.loss_.update_gradients_and_hessians( - gradients, hessians, y_train, raw_predictions) + self.loss_.update_gradients_and_hessians(gradients, hessians, + y_train, raw_predictions) # Append a list since there may be more than 1 predictor per iter predictors.append([]) @@ -355,9 +352,7 @@ def fit(self, X, y): ) should_early_stop = self._check_early_stopping_loss( - raw_predictions, y_train, - raw_predictions_val, y_val - ) + raw_predictions, y_train, raw_predictions_val, y_val) else: should_early_stop = self._check_early_stopping_scorer( From 4e093ad9b257a6d22d058bfcb1c31b3baa8fb5c4 Mon Sep 17 00:00:00 2001 From: Johann Faouzi Date: Fri, 15 Nov 2019 15:09:17 +0100 Subject: [PATCH 29/33] Revert changes --- sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py index 153b3a7d09e3c..fd5711c767161 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py +++ b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py @@ -352,7 +352,9 @@ def fit(self, X, y): ) should_early_stop = self._check_early_stopping_loss( - raw_predictions, y_train, raw_predictions_val, y_val) + raw_predictions, y_train, + raw_predictions_val, y_val + ) else: should_early_stop = self._check_early_stopping_scorer( From 6ed073508e7b0fd37fa642402f5e01163902b2c2 Mon Sep 17 00:00:00 2001 From: Johann Faouzi Date: Fri, 15 Nov 2019 18:16:49 +0100 Subject: [PATCH 30/33] Add note about scorer --- doc/modules/ensemble.rst | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/doc/modules/ensemble.rst b/doc/modules/ensemble.rst index 5bec1a262200c..a16041e6ca659 100644 --- a/doc/modules/ensemble.rst +++ b/doc/modules/ensemble.rst @@ -899,9 +899,10 @@ Note that **early-stopping is enabled by default if the number of samples is larger than 10,000**. The early-stopping behaviour is controlled via the ``early-stopping``, ``scoring``, ``validation_fraction``, ``n_iter_no_change``, and ``tol`` parameters. It is possible to early-stop -using an arbitrary :term:`scorer`, or just the training or validation loss. By -default, early-stopping is performed if there are at least 10,000 samples in -the training set, using the validation loss. +using an arbitrary :term:`scorer`, or just the training or validation loss. +Note that for technical reasons, using a scorer is significantly slower than +using the loss. By default, early-stopping is performed if there are at least +10,000 samples in the training set, using the validation loss. Missing values support ---------------------- From 29a37224643f120ed9199dc9106713fd5722e6db Mon Sep 17 00:00:00 2001 From: Johann Faouzi Date: Fri, 15 Nov 2019 18:19:36 +0100 Subject: [PATCH 31/33] Fix test_warm_start_early_stopping --- .../ensemble/_hist_gradient_boosting/tests/test_warm_start.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_warm_start.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_warm_start.py index ccd005be6daed..2417de4f6cc63 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_warm_start.py +++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_warm_start.py @@ -102,14 +102,14 @@ def test_warm_start_early_stopping(GradientBoosting, X, y, scoring): n_iter_no_change = 5 gb = GradientBoosting( - n_iter_no_change=n_iter_no_change, max_iter=10000, + n_iter_no_change=n_iter_no_change, max_iter=10000, early_stopping=True, random_state=42, warm_start=True, tol=1e-3, scoring=scoring, ) gb.fit(X, y) n_iter_first_fit = gb.n_iter_ gb.fit(X, y) n_iter_second_fit = gb.n_iter_ - assert n_iter_second_fit - n_iter_first_fit < n_iter_no_change + assert 0 < n_iter_second_fit - n_iter_first_fit < n_iter_no_change @pytest.mark.parametrize('GradientBoosting, X, y', [ From 82a379627bf442debe451186e92015fc4be797fd Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Mon, 3 Feb 2020 09:07:20 -0500 Subject: [PATCH 32/33] fixed bad merge --- doc/whats_new/v0.23.rst | 4 ---- 1 file changed, 4 deletions(-) diff --git a/doc/whats_new/v0.23.rst b/doc/whats_new/v0.23.rst index 6bc9d23c2c637..26bfa0b599a42 100644 --- a/doc/whats_new/v0.23.rst +++ b/doc/whats_new/v0.23.rst @@ -118,10 +118,6 @@ Changelog samples in the training set. :pr:`14516` by :user:`Johann Faouzi `. -- |Feature| :func:`inspection.partial_dependence` and - :func:`inspection.plot_partial_dependence` now support the fast 'recursion' - method for both estimators. :pr:`13769` by `Nicolas Hug`_. - :mod:`sklearn.feature_extraction` ................................. From 6f2b70a4c5399919896be7fa45ebe97b4d402526 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Mon, 3 Feb 2020 09:51:57 -0500 Subject: [PATCH 33/33] Fixed LightGBM tests: properly deactive ES since parameters have changed --- .../ensemble/_hist_gradient_boosting/gradient_boosting.py | 8 ++++---- .../tests/test_compare_lightgbm.py | 6 +++--- sklearn/ensemble/_hist_gradient_boosting/utils.pyx | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py index dacb1f428817e..e63e0285f553f 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py +++ b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py @@ -750,8 +750,8 @@ class HistGradientBoostingRegressor(RegressorMixin, BaseHistGradientBoosting): Attributes ---------- n_iter_ : int - The number of iterations as selected by early stopping (if - n_iter_no_change is not None). Otherwise it corresponds to max_iter. + The number of iterations as selected by early stopping, depending on + the `early_stopping` parameter. Otherwise it corresponds to max_iter. n_trees_per_iteration_ : int The number of tree that are built at each iteration. For regressors, this is always 1. @@ -940,8 +940,8 @@ class HistGradientBoostingClassifier(BaseHistGradientBoosting, classes_ : array, shape = (n_classes,) Class labels. n_iter_ : int - The number of estimators as selected by early stopping (if - n_iter_no_change is not None). Otherwise it corresponds to max_iter. + The number of iterations as selected by early stopping, depending on + the `early_stopping` parameter. Otherwise it corresponds to max_iter. n_trees_per_iteration_ : int The number of tree that are built at each iteration. This is equal to 1 for binary classification, and to ``n_classes`` for multiclass diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_compare_lightgbm.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_compare_lightgbm.py index 32bb5dee4b197..6ac76a67d07ca 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_compare_lightgbm.py +++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_compare_lightgbm.py @@ -66,7 +66,7 @@ def test_same_predictions_regression(seed, min_samples_leaf, n_samples, max_iter=max_iter, max_bins=max_bins, learning_rate=1, - n_iter_no_change=None, + early_stopping=False, min_samples_leaf=min_samples_leaf, max_leaf_nodes=max_leaf_nodes) est_lightgbm = get_equivalent_estimator(est_sklearn, lib='lightgbm') @@ -119,7 +119,7 @@ def test_same_predictions_classification(seed, min_samples_leaf, n_samples, max_iter=max_iter, max_bins=max_bins, learning_rate=1, - n_iter_no_change=None, + early_stopping=False, min_samples_leaf=min_samples_leaf, max_leaf_nodes=max_leaf_nodes) est_lightgbm = get_equivalent_estimator(est_sklearn, lib='lightgbm') @@ -181,7 +181,7 @@ def test_same_predictions_multiclass_classification( max_iter=max_iter, max_bins=max_bins, learning_rate=lr, - n_iter_no_change=None, + early_stopping=False, min_samples_leaf=min_samples_leaf, max_leaf_nodes=max_leaf_nodes) est_lightgbm = get_equivalent_estimator(est_sklearn, lib='lightgbm') diff --git a/sklearn/ensemble/_hist_gradient_boosting/utils.pyx b/sklearn/ensemble/_hist_gradient_boosting/utils.pyx index 4b1188b87e69e..cf2c5a51c90dd 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/utils.pyx +++ b/sklearn/ensemble/_hist_gradient_boosting/utils.pyx @@ -38,7 +38,7 @@ def get_equivalent_estimator(estimator, lib='lightgbm'): if sklearn_params['loss'] == 'auto': raise ValueError('auto loss is not accepted. We need to know if ' 'the problem is binary or multiclass classification.') - if sklearn_params['n_iter_no_change'] is not None: + if sklearn_params['early_stopping']: raise NotImplementedError('Early stopping should be deactivated.') lightgbm_loss_mapping = {