From 076c65efdd59637898602695edc886fa8ac6372b Mon Sep 17 00:00:00 2001 From: Tim Head Date: Fri, 15 Aug 2025 11:51:37 +0200 Subject: [PATCH 1/9] Add option to use strict xfail mode in parametrize_with_checks In strict mode unexpectedly passing tests lead to a test failure. This helps keep the xfail list up to date. --- sklearn/utils/estimator_checks.py | 42 ++++++++++++++-- sklearn/utils/tests/test_estimator_checks.py | 53 ++++++++++++++++++++ 2 files changed, 92 insertions(+), 3 deletions(-) diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index a5fb530ce8c03..42681b5e8e960 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -424,6 +424,7 @@ def _maybe_mark( expected_failed_checks: dict[str, str] | None = None, mark: Literal["xfail", "skip", None] = None, pytest=None, + strict: bool | None = None, ): """Mark the test as xfail or skip if needed. @@ -442,6 +443,13 @@ def _maybe_mark( Pytest module to use to mark the check. This is only needed if ``mark`` is `"xfail"`. Note that one can run `check_estimator` without having `pytest` installed. This is used in combination with `parametrize_with_checks` only. + strict : bool, default=None + Whether to run checks in strict mode. This option is ignored unless + mark="xfail". If True, checks that are expected to fail but actually + pass will lead to a test failure. If False, unexpectedly passing tests + will be marked as xpass. If None, the default pytest behavior is used. + + .. versionadded:: 1.8 """ should_be_marked, reason = _should_be_skipped_or_marked( estimator, check, expected_failed_checks @@ -451,7 +459,13 @@ def _maybe_mark( estimator_name = estimator.__class__.__name__ if mark == "xfail": - return pytest.param(estimator, check, marks=pytest.mark.xfail(reason=reason)) + # With strict=None we want the value from pytest.ini to take precedence + # and that means not passing strict to the xfail mark at all. + if strict is None: + mark = pytest.mark.xfail(reason=reason) + else: + mark = pytest.mark.xfail(reason=reason, strict=strict) + return pytest.param(estimator, check, marks=mark) else: @wraps(check) @@ -501,6 +515,7 @@ def estimator_checks_generator( legacy: bool = True, expected_failed_checks: dict[str, str] | None = None, mark: Literal["xfail", "skip", None] = None, + strict: bool | None = None, ): """Iteratively yield all check callables for an estimator. @@ -528,6 +543,13 @@ def estimator_checks_generator( xfail(`pytest.mark.xfail`) or skip. Marking a test as "skip" is done via wrapping the check in a function that raises a :class:`~sklearn.exceptions.SkipTest` exception. + strict : bool, default=None + Whether to run checks in strict mode. This option is ignored unless + mark="xfail". If True, checks that are expected to fail but actually + pass will lead to a test failure. If False, unexpectedly passing tests + will be marked as xpass. If None, the default pytest behavior is used. + + .. versionadded:: 1.8 Returns ------- @@ -552,6 +574,7 @@ def estimator_checks_generator( expected_failed_checks=expected_failed_checks, mark=mark, pytest=pytest, + strict=strict, ) @@ -560,6 +583,7 @@ def parametrize_with_checks( *, legacy: bool = True, expected_failed_checks: Callable | None = None, + strict: bool | None = None, ): """Pytest specific decorator for parametrizing estimator checks. @@ -605,9 +629,16 @@ def parametrize_with_checks( Where `"check_name"` is the name of the check, and `"my reason"` is why the check fails. These tests will be marked as xfail if the check fails. - .. versionadded:: 1.6 + strict : bool, default=None + Whether to run checks in strict mode. If True, checks that are + expected to fail but actually pass will lead to a test failure. If + False, unexpectedly passing tests will be marked as xpass. If None, + the default pytest behavior is used. + + .. versionadded:: 1.8 + Returns ------- decorator : `pytest.mark.parametrize` @@ -640,7 +671,12 @@ def parametrize_with_checks( def _checks_generator(estimators, legacy, expected_failed_checks): for estimator in estimators: - args = {"estimator": estimator, "legacy": legacy, "mark": "xfail"} + args = { + "estimator": estimator, + "legacy": legacy, + "mark": "xfail", + "strict": strict, + } if callable(expected_failed_checks): args["expected_failed_checks"] = expected_failed_checks(estimator) yield from estimator_checks_generator(**args) diff --git a/sklearn/utils/tests/test_estimator_checks.py b/sklearn/utils/tests/test_estimator_checks.py index 4fab82e17cc92..aa63fee9ae76b 100644 --- a/sklearn/utils/tests/test_estimator_checks.py +++ b/sklearn/utils/tests/test_estimator_checks.py @@ -1308,6 +1308,59 @@ def test_all_estimators_all_public(): run_tests_without_pytest() +def test_estimator_checks_generator_strict_none(): + est = next(_construct_instances(NuSVC)) + expected_to_fail = _get_expected_failed_checks(est) + # If we don't pass strict, it should not appear in the xfail mark either + # This way the behaviour configured in pytest.ini takes precedence. + checks = estimator_checks_generator( + est, + legacy=True, + expected_failed_checks=expected_to_fail, + mark="xfail", + ) + # making sure we use a class that has expected failures + assert len(expected_to_fail) > 0 + # xfail'ed checks are wrapped in a ParameterSet, so below we extract + # the things we need via a bit of a crutch: len() + marked_checks = [c for c in checks if len(c) == 3] + + for parameter_set in marked_checks: + (_, check), marks, _ = parameter_set + mark = marks[0] + assert "strict" not in mark.kwargs + + +def test_estimator_checks_generator_strict_xfail_tests(): + # Make sure that the checks generator marks tests that are expected to fail + # as strict xfail + est = next(_construct_instances(NuSVC)) + expected_to_fail = _get_expected_failed_checks(est) + checks = estimator_checks_generator( + est, + legacy=True, + expected_failed_checks=expected_to_fail, + mark="xfail", + strict=True, + ) + # making sure we use a class that has expected failures + assert len(expected_to_fail) > 0 + strict_xfailed_checks = [] + + # xfail'ed checks are wrapped in a ParameterSet, so below we extract + # the things we need via a bit of a crutch: len() + marked_checks = [c for c in checks if len(c) == 3] + + for parameter_set in marked_checks: + (_, check), marks, _ = parameter_set + mark = marks[0] + if mark.kwargs["strict"]: + strict_xfailed_checks.append(_check_name(check)) + + # all checks expected to fail are marked as strict xfail + assert set(expected_to_fail.keys()) == set(strict_xfailed_checks) + + def test_estimator_checks_generator_skipping_tests(): # Make sure the checks generator skips tests that are expected to fail est = next(_construct_instances(NuSVC)) From 53d24b886d06fe27f320f161182db92cba4bbd5b Mon Sep 17 00:00:00 2001 From: Tim Head Date: Fri, 15 Aug 2025 12:04:28 +0200 Subject: [PATCH 2/9] what's new --- .../upcoming_changes/sklearn.utils/31951.enhancement.rst | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 doc/whats_new/upcoming_changes/sklearn.utils/31951.enhancement.rst diff --git a/doc/whats_new/upcoming_changes/sklearn.utils/31951.enhancement.rst b/doc/whats_new/upcoming_changes/sklearn.utils/31951.enhancement.rst new file mode 100644 index 0000000000000..882ac399da886 --- /dev/null +++ b/doc/whats_new/upcoming_changes/sklearn.utils/31951.enhancement.rst @@ -0,0 +1,4 @@ +``sklearn.utils.estimator_checks.parametrize_with_checks`` now lets you configure +strict mode for xfailing checks. Tests that unexpectedly pass will lead to a test +failure. The default behaviour is unchanged. +By :user:`Tim Head `. From 4a215e6a68d01a682e3432c7b5af33e2036545d3 Mon Sep 17 00:00:00 2001 From: Tim Head Date: Fri, 29 Aug 2025 14:34:25 +0200 Subject: [PATCH 3/9] Fix changelog MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Loïc Estève --- .../upcoming_changes/sklearn.utils/31951.enhancement.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats_new/upcoming_changes/sklearn.utils/31951.enhancement.rst b/doc/whats_new/upcoming_changes/sklearn.utils/31951.enhancement.rst index 882ac399da886..e86427a65c564 100644 --- a/doc/whats_new/upcoming_changes/sklearn.utils/31951.enhancement.rst +++ b/doc/whats_new/upcoming_changes/sklearn.utils/31951.enhancement.rst @@ -1,4 +1,4 @@ -``sklearn.utils.estimator_checks.parametrize_with_checks`` now lets you configure +- ``sklearn.utils.estimator_checks.parametrize_with_checks`` now lets you configure strict mode for xfailing checks. Tests that unexpectedly pass will lead to a test failure. The default behaviour is unchanged. By :user:`Tim Head `. From 40f3c03ae70d1ea1cf1b3bef1d0696c952d1edba Mon Sep 17 00:00:00 2001 From: Tim Head Date: Fri, 29 Aug 2025 14:55:21 +0200 Subject: [PATCH 4/9] Improve test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Loïc Estève --- sklearn/utils/tests/test_estimator_checks.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/sklearn/utils/tests/test_estimator_checks.py b/sklearn/utils/tests/test_estimator_checks.py index 0ec084eb2e0de..f1f43261d138a 100644 --- a/sklearn/utils/tests/test_estimator_checks.py +++ b/sklearn/utils/tests/test_estimator_checks.py @@ -1337,14 +1337,13 @@ def test_estimator_checks_generator_strict_none(): ) # making sure we use a class that has expected failures assert len(expected_to_fail) > 0 - # xfail'ed checks are wrapped in a ParameterSet, so below we extract - # the things we need via a bit of a crutch: len() - marked_checks = [c for c in checks if len(c) == 3] + marked_checks = [c for c in checks if hasattr(c, "marks")] + # making sure we have some checks with marks + assert len(marked_checks) > 0 for parameter_set in marked_checks: - (_, check), marks, _ = parameter_set - mark = marks[0] - assert "strict" not in mark.kwargs + first_mark = parameter_set.marks[0] + assert "strict" not in first_mark.kwargs def test_estimator_checks_generator_strict_xfail_tests(): From 005ec1ae5f8b8080c43c7f02bf30ccdabff4afb7 Mon Sep 17 00:00:00 2001 From: Tim Head Date: Fri, 29 Aug 2025 15:24:18 +0200 Subject: [PATCH 5/9] Switch to using xfail_strict as argument More consistent with the name of the pytest config option --- sklearn/utils/estimator_checks.py | 31 ++++++++++---------- sklearn/utils/tests/test_estimator_checks.py | 10 +++---- 2 files changed, 21 insertions(+), 20 deletions(-) diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index f0f2c1478e3ff..b51a6050f8dfd 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -424,7 +424,7 @@ def _maybe_mark( expected_failed_checks: dict[str, str] | None = None, mark: Literal["xfail", "skip", None] = None, pytest=None, - strict: bool | None = None, + xfail_strict: bool | None = None, ): """Mark the test as xfail or skip if needed. @@ -443,8 +443,8 @@ def _maybe_mark( Pytest module to use to mark the check. This is only needed if ``mark`` is `"xfail"`. Note that one can run `check_estimator` without having `pytest` installed. This is used in combination with `parametrize_with_checks` only. - strict : bool, default=None - Whether to run checks in strict mode. This option is ignored unless + xfail_strict : bool, default=None + Whether to run checks in xfail strict mode. This option is ignored unless mark="xfail". If True, checks that are expected to fail but actually pass will lead to a test failure. If False, unexpectedly passing tests will be marked as xpass. If None, the default pytest behavior is used. @@ -459,12 +459,13 @@ def _maybe_mark( estimator_name = estimator.__class__.__name__ if mark == "xfail": - # With strict=None we want the value from pytest.ini to take precedence - # and that means not passing strict to the xfail mark at all. - if strict is None: + # With xfail_strict=None we want the value from the pytest config to + # take precedence and that means not passing strict to the xfail + # mark at all. + if xfail_strict is None: mark = pytest.mark.xfail(reason=reason) else: - mark = pytest.mark.xfail(reason=reason, strict=strict) + mark = pytest.mark.xfail(reason=reason, strict=xfail_strict) return pytest.param(estimator, check, marks=mark) else: @@ -515,7 +516,7 @@ def estimator_checks_generator( legacy: bool = True, expected_failed_checks: dict[str, str] | None = None, mark: Literal["xfail", "skip", None] = None, - strict: bool | None = None, + xfail_strict: bool | None = None, ): """Iteratively yield all check callables for an estimator. @@ -543,8 +544,8 @@ def estimator_checks_generator( xfail(`pytest.mark.xfail`) or skip. Marking a test as "skip" is done via wrapping the check in a function that raises a :class:`~sklearn.exceptions.SkipTest` exception. - strict : bool, default=None - Whether to run checks in strict mode. This option is ignored unless + xfail_strict : bool, default=None + Whether to run checks in xfail strict mode. This option is ignored unless mark="xfail". If True, checks that are expected to fail but actually pass will lead to a test failure. If False, unexpectedly passing tests will be marked as xpass. If None, the default pytest behavior is used. @@ -574,7 +575,7 @@ def estimator_checks_generator( expected_failed_checks=expected_failed_checks, mark=mark, pytest=pytest, - strict=strict, + xfail_strict=xfail_strict, ) @@ -583,7 +584,7 @@ def parametrize_with_checks( *, legacy: bool = True, expected_failed_checks: Callable | None = None, - strict: bool | None = None, + xfail_strict: bool | None = None, ): """Pytest specific decorator for parametrizing estimator checks. @@ -631,8 +632,8 @@ def parametrize_with_checks( .. versionadded:: 1.6 - strict : bool, default=None - Whether to run checks in strict mode. If True, checks that are + xfail_strict : bool, default=None + Whether to run checks in xfail strict mode. If True, checks that are expected to fail but actually pass will lead to a test failure. If False, unexpectedly passing tests will be marked as xpass. If None, the default pytest behavior is used. @@ -675,7 +676,7 @@ def _checks_generator(estimators, legacy, expected_failed_checks): "estimator": estimator, "legacy": legacy, "mark": "xfail", - "strict": strict, + "xfail_strict": xfail_strict, } if callable(expected_failed_checks): args["expected_failed_checks"] = expected_failed_checks(estimator) diff --git a/sklearn/utils/tests/test_estimator_checks.py b/sklearn/utils/tests/test_estimator_checks.py index f1f43261d138a..8b1acaa3a44ee 100644 --- a/sklearn/utils/tests/test_estimator_checks.py +++ b/sklearn/utils/tests/test_estimator_checks.py @@ -1335,10 +1335,10 @@ def test_estimator_checks_generator_strict_none(): expected_failed_checks=expected_to_fail, mark="xfail", ) - # making sure we use a class that has expected failures + # make sure we use a class that has expected failures assert len(expected_to_fail) > 0 marked_checks = [c for c in checks if hasattr(c, "marks")] - # making sure we have some checks with marks + # make sure we have some checks with marks assert len(marked_checks) > 0 for parameter_set in marked_checks: @@ -1356,15 +1356,15 @@ def test_estimator_checks_generator_strict_xfail_tests(): legacy=True, expected_failed_checks=expected_to_fail, mark="xfail", - strict=True, + xfail_strict=True, ) - # making sure we use a class that has expected failures + # make sure we use a class that has expected failures assert len(expected_to_fail) > 0 strict_xfailed_checks = [] # xfail'ed checks are wrapped in a ParameterSet, so below we extract # the things we need via a bit of a crutch: len() - marked_checks = [c for c in checks if len(c) == 3] + marked_checks = [c for c in checks if hasattr(c, "marks")] for parameter_set in marked_checks: (_, check), marks, _ = parameter_set From c02f85f1f5bde5643697454f86447eb709322dd0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Mon, 1 Sep 2025 09:40:51 +0200 Subject: [PATCH 6/9] Update doc/whats_new/upcoming_changes/sklearn.utils/31951.enhancement.rst --- .../upcoming_changes/sklearn.utils/31951.enhancement.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/whats_new/upcoming_changes/sklearn.utils/31951.enhancement.rst b/doc/whats_new/upcoming_changes/sklearn.utils/31951.enhancement.rst index e86427a65c564..78df7fff40743 100644 --- a/doc/whats_new/upcoming_changes/sklearn.utils/31951.enhancement.rst +++ b/doc/whats_new/upcoming_changes/sklearn.utils/31951.enhancement.rst @@ -1,4 +1,4 @@ - ``sklearn.utils.estimator_checks.parametrize_with_checks`` now lets you configure -strict mode for xfailing checks. Tests that unexpectedly pass will lead to a test -failure. The default behaviour is unchanged. -By :user:`Tim Head `. + strict mode for xfailing checks. Tests that unexpectedly pass will lead to a test + failure. The default behaviour is unchanged. + By :user:`Tim Head `. From 178abcb88090862dfda1a9da754fa32420eeb02e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= Date: Mon, 1 Sep 2025 09:51:59 +0200 Subject: [PATCH 7/9] tweak [azure parallel] --- sklearn/utils/tests/test_estimator_checks.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/sklearn/utils/tests/test_estimator_checks.py b/sklearn/utils/tests/test_estimator_checks.py index 8b1acaa3a44ee..b126ead008f90 100644 --- a/sklearn/utils/tests/test_estimator_checks.py +++ b/sklearn/utils/tests/test_estimator_checks.py @@ -1365,11 +1365,13 @@ def test_estimator_checks_generator_strict_xfail_tests(): # xfail'ed checks are wrapped in a ParameterSet, so below we extract # the things we need via a bit of a crutch: len() marked_checks = [c for c in checks if hasattr(c, "marks")] + # make sure we use a class that has expected failures + assert len(expected_to_fail) > 0 for parameter_set in marked_checks: - (_, check), marks, _ = parameter_set - mark = marks[0] - if mark.kwargs["strict"]: + _, check = parameter_set.values + first_mark = parameter_set.marks[0] + if first_mark.kwargs["strict"]: strict_xfailed_checks.append(_check_name(check)) # all checks expected to fail are marked as strict xfail From 3ed0e4a8e037f8706fd96465b78c5f39d91f6731 Mon Sep 17 00:00:00 2001 From: Tim Head Date: Mon, 1 Sep 2025 11:10:01 +0200 Subject: [PATCH 8/9] Add comment explaining purpose of test --- sklearn/utils/tests/test_estimator_checks.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sklearn/utils/tests/test_estimator_checks.py b/sklearn/utils/tests/test_estimator_checks.py index 8b1acaa3a44ee..9cccc35352a72 100644 --- a/sklearn/utils/tests/test_estimator_checks.py +++ b/sklearn/utils/tests/test_estimator_checks.py @@ -1325,6 +1325,7 @@ def test_all_estimators_all_public(): def test_estimator_checks_generator_strict_none(): + # Check that no "strict" mark is included in the generated checks est = next(_construct_instances(NuSVC)) expected_to_fail = _get_expected_failed_checks(est) # If we don't pass strict, it should not appear in the xfail mark either From fa808d2c77ccb90a3c7e6788cb5fd48161f64ad9 Mon Sep 17 00:00:00 2001 From: Tim Head Date: Mon, 1 Sep 2025 11:12:31 +0200 Subject: [PATCH 9/9] Improve docstring formatting Co-authored-by: Adrin Jalali --- sklearn/utils/estimator_checks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index b51a6050f8dfd..d8cd13848a09d 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -445,7 +445,7 @@ def _maybe_mark( installed. This is used in combination with `parametrize_with_checks` only. xfail_strict : bool, default=None Whether to run checks in xfail strict mode. This option is ignored unless - mark="xfail". If True, checks that are expected to fail but actually + `mark="xfail"`. If True, checks that are expected to fail but actually pass will lead to a test failure. If False, unexpectedly passing tests will be marked as xpass. If None, the default pytest behavior is used. @@ -546,7 +546,7 @@ def estimator_checks_generator( :class:`~sklearn.exceptions.SkipTest` exception. xfail_strict : bool, default=None Whether to run checks in xfail strict mode. This option is ignored unless - mark="xfail". If True, checks that are expected to fail but actually + `mark="xfail"`. If True, checks that are expected to fail but actually pass will lead to a test failure. If False, unexpectedly passing tests will be marked as xpass. If None, the default pytest behavior is used.