From ee0586933be0766dc9cf655db3d6311b8056f2c3 Mon Sep 17 00:00:00 2001 From: "Thomas J. Fan" Date: Sun, 29 Jan 2023 23:29:07 -0500 Subject: [PATCH 1/4] MNT Expose allow_nan tag in bagging --- sklearn/ensemble/_bagging.py | 16 ++++++++++++++++ sklearn/ensemble/tests/test_bagging.py | 14 ++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/sklearn/ensemble/_bagging.py b/sklearn/ensemble/_bagging.py index 4586e55a59f97..d10f89102ea82 100644 --- a/sklearn/ensemble/_bagging.py +++ b/sklearn/ensemble/_bagging.py @@ -23,6 +23,7 @@ from ..utils.random import sample_without_replacement from ..utils._param_validation import Interval, HasMethods, StrOptions from ..utils.validation import has_fit_parameter, check_is_fitted, _check_sample_weight +from ..utils._tags import _safe_tags from ..utils.parallel import delayed, Parallel @@ -981,6 +982,14 @@ def decision_function(self, X): return decisions + def _more_tags(self): + if self.estimator is None: + estimator = DecisionTreeClassifier() + else: + estimator = self.estimator + + return {"allow_nan": _safe_tags(estimator, "allow_nan")} + class BaggingRegressor(RegressorMixin, BaseBagging): """A Bagging regressor. @@ -1261,3 +1270,10 @@ def _set_oob_score(self, X, y): self.oob_prediction_ = predictions self.oob_score_ = r2_score(y, predictions) + + def _more_tags(self): + if self.estimator is None: + estimator = DecisionTreeRegressor() + else: + estimator = self.estimator + return {"allow_nan": _safe_tags(estimator, "allow_nan")} diff --git a/sklearn/ensemble/tests/test_bagging.py b/sklearn/ensemble/tests/test_bagging.py index 330287cefef37..bc74cba1b9dc4 100644 --- a/sklearn/ensemble/tests/test_bagging.py +++ b/sklearn/ensemble/tests/test_bagging.py @@ -25,6 +25,8 @@ from sklearn.pipeline import make_pipeline from sklearn.feature_selection import SelectKBest from sklearn.model_selection import train_test_split +from sklearn.ensemble import HistGradientBoostingClassifier +from sklearn.ensemble import HistGradientBoostingRegressor from sklearn.datasets import load_diabetes, load_iris, make_hastie_10_2 from sklearn.utils import check_random_state from sklearn.preprocessing import FunctionTransformer, scale @@ -980,3 +982,15 @@ def test_deprecated_base_estimator_has_decision_function(): with pytest.warns(FutureWarning, match=warn_msg): y_decision = clf.fit(X, y).decision_function(X) assert y_decision.shape == (150, 3) + + +@pytest.mark.parametrize( + "bagging", + [ + BaggingClassifier(HistGradientBoostingClassifier(max_iter=1), n_estimators=1), + BaggingRegressor(HistGradientBoostingRegressor(max_iter=1), n_estimators=1), + ], +) +def test_bagging_allow_nan_tag(bagging): + """Check that bagging inherits allow_nan tag.""" + assert bagging._get_tags()["allow_nan"] From 0368d65d55651fca0a620082925df132e4262e65 Mon Sep 17 00:00:00 2001 From: "Thomas J. Fan" Date: Mon, 30 Jan 2023 13:28:37 -0500 Subject: [PATCH 2/4] DOC Adds whats new --- doc/whats_new/v1.3.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/doc/whats_new/v1.3.rst b/doc/whats_new/v1.3.rst index e4e965aca02d8..be0956261079a 100644 --- a/doc/whats_new/v1.3.rst +++ b/doc/whats_new/v1.3.rst @@ -130,6 +130,10 @@ Changelog scikit-learn 1.3: retraining with scikit-learn 1.3 is required. :pr:`25186` by :user:`Felipe Breve Siola `. +- |Enhancement| :class:`ensemble.BaggingClassifier` and + :class:`ensemble.BaggingRegressor` exposes the `allow_nan` tag from the + underlying estimator. :pr:`25506` by `Thomas Fan`_. + :mod:`sklearn.exception` ........................ - |Feature| Added :class:`exception.InconsistentVersionWarning` which is raised From d752f13bcda92bcb5b3d2f9d62c2ba3401d46e70 Mon Sep 17 00:00:00 2001 From: "Thomas J. Fan" Date: Fri, 3 Feb 2023 12:57:46 -0500 Subject: [PATCH 3/4] Apply suggestions from code review Co-authored-by: Christian Lorentzen --- doc/whats_new/v1.3.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats_new/v1.3.rst b/doc/whats_new/v1.3.rst index be0956261079a..e1033d84302ad 100644 --- a/doc/whats_new/v1.3.rst +++ b/doc/whats_new/v1.3.rst @@ -131,7 +131,7 @@ Changelog :pr:`25186` by :user:`Felipe Breve Siola `. - |Enhancement| :class:`ensemble.BaggingClassifier` and - :class:`ensemble.BaggingRegressor` exposes the `allow_nan` tag from the + :class:`ensemble.BaggingRegressor` expose the `allow_nan` tag from the underlying estimator. :pr:`25506` by `Thomas Fan`_. :mod:`sklearn.exception` From 1d090e206e11042ff4596e980a8bf8e881b726e3 Mon Sep 17 00:00:00 2001 From: "Thomas J. Fan" Date: Fri, 3 Feb 2023 13:20:20 -0500 Subject: [PATCH 4/4] TST Adds more test coverage --- sklearn/ensemble/tests/test_bagging.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/sklearn/ensemble/tests/test_bagging.py b/sklearn/ensemble/tests/test_bagging.py index bc74cba1b9dc4..ebe21a594e8eb 100644 --- a/sklearn/ensemble/tests/test_bagging.py +++ b/sklearn/ensemble/tests/test_bagging.py @@ -985,12 +985,14 @@ def test_deprecated_base_estimator_has_decision_function(): @pytest.mark.parametrize( - "bagging", + "bagging, expected_allow_nan", [ - BaggingClassifier(HistGradientBoostingClassifier(max_iter=1), n_estimators=1), - BaggingRegressor(HistGradientBoostingRegressor(max_iter=1), n_estimators=1), + (BaggingClassifier(HistGradientBoostingClassifier(max_iter=1)), True), + (BaggingRegressor(HistGradientBoostingRegressor(max_iter=1)), True), + (BaggingClassifier(LogisticRegression()), False), + (BaggingRegressor(SVR()), False), ], ) -def test_bagging_allow_nan_tag(bagging): +def test_bagging_allow_nan_tag(bagging, expected_allow_nan): """Check that bagging inherits allow_nan tag.""" - assert bagging._get_tags()["allow_nan"] + assert bagging._get_tags()["allow_nan"] == expected_allow_nan