From 076c65efdd59637898602695edc886fa8ac6372b Mon Sep 17 00:00:00 2001
From: Tim Head <betatim@gmail.com>
Date: Fri, 15 Aug 2025 11:51:37 +0200
Subject: [PATCH 1/9] Add option to use strict xfail mode in
 parametrize_with_checks

In strict mode unexpectedly passing tests lead to a test failure. This
helps keep the xfail list up to date.
---
 sklearn/utils/estimator_checks.py            | 42 ++++++++++++++--
 sklearn/utils/tests/test_estimator_checks.py | 53 ++++++++++++++++++++
 2 files changed, 92 insertions(+), 3 deletions(-)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index a5fb530ce8c03..42681b5e8e960 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -424,6 +424,7 @@ def _maybe_mark(
     expected_failed_checks: dict[str, str] | None = None,
     mark: Literal["xfail", "skip", None] = None,
     pytest=None,
+    strict: bool | None = None,
 ):
     """Mark the test as xfail or skip if needed.
 
@@ -442,6 +443,13 @@ def _maybe_mark(
         Pytest module to use to mark the check. This is only needed if ``mark`` is
         `"xfail"`. Note that one can run `check_estimator` without having `pytest`
         installed. This is used in combination with `parametrize_with_checks` only.
+    strict : bool, default=None
+        Whether to run checks in strict mode. This option is ignored unless
+        mark="xfail". If True, checks that are expected to fail but actually
+        pass will lead to a test failure. If False, unexpectedly passing tests
+        will be marked as xpass. If None, the default pytest behavior is used.
+
+        .. versionadded:: 1.8
     """
     should_be_marked, reason = _should_be_skipped_or_marked(
         estimator, check, expected_failed_checks
@@ -451,7 +459,13 @@ def _maybe_mark(
 
     estimator_name = estimator.__class__.__name__
     if mark == "xfail":
-        return pytest.param(estimator, check, marks=pytest.mark.xfail(reason=reason))
+        # With strict=None we want the value from pytest.ini to take precedence
+        # and that means not passing strict to the xfail mark at all.
+        if strict is None:
+            mark = pytest.mark.xfail(reason=reason)
+        else:
+            mark = pytest.mark.xfail(reason=reason, strict=strict)
+        return pytest.param(estimator, check, marks=mark)
     else:
 
         @wraps(check)
@@ -501,6 +515,7 @@ def estimator_checks_generator(
     legacy: bool = True,
     expected_failed_checks: dict[str, str] | None = None,
     mark: Literal["xfail", "skip", None] = None,
+    strict: bool | None = None,
 ):
     """Iteratively yield all check callables for an estimator.
 
@@ -528,6 +543,13 @@ def estimator_checks_generator(
         xfail(`pytest.mark.xfail`) or skip. Marking a test as "skip" is done via
         wrapping the check in a function that raises a
         :class:`~sklearn.exceptions.SkipTest` exception.
+    strict : bool, default=None
+        Whether to run checks in strict mode. This option is ignored unless
+        mark="xfail". If True, checks that are expected to fail but actually
+        pass will lead to a test failure. If False, unexpectedly passing tests
+        will be marked as xpass. If None, the default pytest behavior is used.
+
+        .. versionadded:: 1.8
 
     Returns
     -------
@@ -552,6 +574,7 @@ def estimator_checks_generator(
                 expected_failed_checks=expected_failed_checks,
                 mark=mark,
                 pytest=pytest,
+                strict=strict,
             )
 
 
@@ -560,6 +583,7 @@ def parametrize_with_checks(
     *,
     legacy: bool = True,
     expected_failed_checks: Callable | None = None,
+    strict: bool | None = None,
 ):
     """Pytest specific decorator for parametrizing estimator checks.
 
@@ -605,9 +629,16 @@ def parametrize_with_checks(
         Where `"check_name"` is the name of the check, and `"my reason"` is why
         the check fails. These tests will be marked as xfail if the check fails.
 
-
         .. versionadded:: 1.6
 
+    strict : bool, default=None
+        Whether to run checks in strict mode. If True, checks that are
+        expected to fail but actually pass will lead to a test failure. If
+        False, unexpectedly passing tests will be marked as xpass. If None,
+        the default pytest behavior is used.
+
+        .. versionadded:: 1.8
+
     Returns
     -------
     decorator : `pytest.mark.parametrize`
@@ -640,7 +671,12 @@ def parametrize_with_checks(
 
     def _checks_generator(estimators, legacy, expected_failed_checks):
         for estimator in estimators:
-            args = {"estimator": estimator, "legacy": legacy, "mark": "xfail"}
+            args = {
+                "estimator": estimator,
+                "legacy": legacy,
+                "mark": "xfail",
+                "strict": strict,
+            }
             if callable(expected_failed_checks):
                 args["expected_failed_checks"] = expected_failed_checks(estimator)
             yield from estimator_checks_generator(**args)
diff --git a/sklearn/utils/tests/test_estimator_checks.py b/sklearn/utils/tests/test_estimator_checks.py
index 4fab82e17cc92..aa63fee9ae76b 100644
--- a/sklearn/utils/tests/test_estimator_checks.py
+++ b/sklearn/utils/tests/test_estimator_checks.py
@@ -1308,6 +1308,59 @@ def test_all_estimators_all_public():
     run_tests_without_pytest()
 
 
+def test_estimator_checks_generator_strict_none():
+    est = next(_construct_instances(NuSVC))
+    expected_to_fail = _get_expected_failed_checks(est)
+    # If we don't pass strict, it should not appear in the xfail mark either
+    # This way the behaviour configured in pytest.ini takes precedence.
+    checks = estimator_checks_generator(
+        est,
+        legacy=True,
+        expected_failed_checks=expected_to_fail,
+        mark="xfail",
+    )
+    # making sure we use a class that has expected failures
+    assert len(expected_to_fail) > 0
+    # xfail'ed checks are wrapped in a ParameterSet, so below we extract
+    # the things we need via a bit of a crutch: len()
+    marked_checks = [c for c in checks if len(c) == 3]
+
+    for parameter_set in marked_checks:
+        (_, check), marks, _ = parameter_set
+        mark = marks[0]
+        assert "strict" not in mark.kwargs
+
+
+def test_estimator_checks_generator_strict_xfail_tests():
+    # Make sure that the checks generator marks tests that are expected to fail
+    # as strict xfail
+    est = next(_construct_instances(NuSVC))
+    expected_to_fail = _get_expected_failed_checks(est)
+    checks = estimator_checks_generator(
+        est,
+        legacy=True,
+        expected_failed_checks=expected_to_fail,
+        mark="xfail",
+        strict=True,
+    )
+    # making sure we use a class that has expected failures
+    assert len(expected_to_fail) > 0
+    strict_xfailed_checks = []
+
+    # xfail'ed checks are wrapped in a ParameterSet, so below we extract
+    # the things we need via a bit of a crutch: len()
+    marked_checks = [c for c in checks if len(c) == 3]
+
+    for parameter_set in marked_checks:
+        (_, check), marks, _ = parameter_set
+        mark = marks[0]
+        if mark.kwargs["strict"]:
+            strict_xfailed_checks.append(_check_name(check))
+
+    # all checks expected to fail are marked as strict xfail
+    assert set(expected_to_fail.keys()) == set(strict_xfailed_checks)
+
+
 def test_estimator_checks_generator_skipping_tests():
     # Make sure the checks generator skips tests that are expected to fail
     est = next(_construct_instances(NuSVC))

From 53d24b886d06fe27f320f161182db92cba4bbd5b Mon Sep 17 00:00:00 2001
From: Tim Head <betatim@gmail.com>
Date: Fri, 15 Aug 2025 12:04:28 +0200
Subject: [PATCH 2/9] what's new

---
 .../upcoming_changes/sklearn.utils/31951.enhancement.rst      | 4 ++++
 1 file changed, 4 insertions(+)
 create mode 100644 doc/whats_new/upcoming_changes/sklearn.utils/31951.enhancement.rst

diff --git a/doc/whats_new/upcoming_changes/sklearn.utils/31951.enhancement.rst b/doc/whats_new/upcoming_changes/sklearn.utils/31951.enhancement.rst
new file mode 100644
index 0000000000000..882ac399da886
--- /dev/null
+++ b/doc/whats_new/upcoming_changes/sklearn.utils/31951.enhancement.rst
@@ -0,0 +1,4 @@
+``sklearn.utils.estimator_checks.parametrize_with_checks`` now lets you configure
+strict mode for xfailing checks. Tests that unexpectedly pass will lead to a test
+failure. The default behaviour is unchanged.
+By :user:`Tim Head <betatim>`.

From 4a215e6a68d01a682e3432c7b5af33e2036545d3 Mon Sep 17 00:00:00 2001
From: Tim Head <betatim@gmail.com>
Date: Fri, 29 Aug 2025 14:34:25 +0200
Subject: [PATCH 3/9] Fix changelog
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Loïc Estève <loic.esteve@ymail.com>
---
 .../upcoming_changes/sklearn.utils/31951.enhancement.rst        | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/whats_new/upcoming_changes/sklearn.utils/31951.enhancement.rst b/doc/whats_new/upcoming_changes/sklearn.utils/31951.enhancement.rst
index 882ac399da886..e86427a65c564 100644
--- a/doc/whats_new/upcoming_changes/sklearn.utils/31951.enhancement.rst
+++ b/doc/whats_new/upcoming_changes/sklearn.utils/31951.enhancement.rst
@@ -1,4 +1,4 @@
-``sklearn.utils.estimator_checks.parametrize_with_checks`` now lets you configure
+- ``sklearn.utils.estimator_checks.parametrize_with_checks`` now lets you configure
 strict mode for xfailing checks. Tests that unexpectedly pass will lead to a test
 failure. The default behaviour is unchanged.
 By :user:`Tim Head <betatim>`.

From 40f3c03ae70d1ea1cf1b3bef1d0696c952d1edba Mon Sep 17 00:00:00 2001
From: Tim Head <betatim@gmail.com>
Date: Fri, 29 Aug 2025 14:55:21 +0200
Subject: [PATCH 4/9] Improve test
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Loïc Estève <loic.esteve@ymail.com>
---
 sklearn/utils/tests/test_estimator_checks.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/sklearn/utils/tests/test_estimator_checks.py b/sklearn/utils/tests/test_estimator_checks.py
index 0ec084eb2e0de..f1f43261d138a 100644
--- a/sklearn/utils/tests/test_estimator_checks.py
+++ b/sklearn/utils/tests/test_estimator_checks.py
@@ -1337,14 +1337,13 @@ def test_estimator_checks_generator_strict_none():
     )
     # making sure we use a class that has expected failures
     assert len(expected_to_fail) > 0
-    # xfail'ed checks are wrapped in a ParameterSet, so below we extract
-    # the things we need via a bit of a crutch: len()
-    marked_checks = [c for c in checks if len(c) == 3]
+    marked_checks = [c for c in checks if hasattr(c, "marks")]
+    # making sure we have some checks with marks
+    assert len(marked_checks) > 0
 
     for parameter_set in marked_checks:
-        (_, check), marks, _ = parameter_set
-        mark = marks[0]
-        assert "strict" not in mark.kwargs
+        first_mark = parameter_set.marks[0]
+        assert "strict" not in first_mark.kwargs
 
 
 def test_estimator_checks_generator_strict_xfail_tests():

From 005ec1ae5f8b8080c43c7f02bf30ccdabff4afb7 Mon Sep 17 00:00:00 2001
From: Tim Head <betatim@gmail.com>
Date: Fri, 29 Aug 2025 15:24:18 +0200
Subject: [PATCH 5/9] Switch to using xfail_strict as argument

More consistent with the name of the pytest config option
---
 sklearn/utils/estimator_checks.py            | 31 ++++++++++----------
 sklearn/utils/tests/test_estimator_checks.py | 10 +++----
 2 files changed, 21 insertions(+), 20 deletions(-)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index f0f2c1478e3ff..b51a6050f8dfd 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -424,7 +424,7 @@ def _maybe_mark(
     expected_failed_checks: dict[str, str] | None = None,
     mark: Literal["xfail", "skip", None] = None,
     pytest=None,
-    strict: bool | None = None,
+    xfail_strict: bool | None = None,
 ):
     """Mark the test as xfail or skip if needed.
 
@@ -443,8 +443,8 @@ def _maybe_mark(
         Pytest module to use to mark the check. This is only needed if ``mark`` is
         `"xfail"`. Note that one can run `check_estimator` without having `pytest`
         installed. This is used in combination with `parametrize_with_checks` only.
-    strict : bool, default=None
-        Whether to run checks in strict mode. This option is ignored unless
+    xfail_strict : bool, default=None
+        Whether to run checks in xfail strict mode. This option is ignored unless
         mark="xfail". If True, checks that are expected to fail but actually
         pass will lead to a test failure. If False, unexpectedly passing tests
         will be marked as xpass. If None, the default pytest behavior is used.
@@ -459,12 +459,13 @@ def _maybe_mark(
 
     estimator_name = estimator.__class__.__name__
     if mark == "xfail":
-        # With strict=None we want the value from pytest.ini to take precedence
-        # and that means not passing strict to the xfail mark at all.
-        if strict is None:
+        # With xfail_strict=None we want the value from the pytest config to
+        # take precedence and that means not passing strict to the xfail
+        # mark at all.
+        if xfail_strict is None:
             mark = pytest.mark.xfail(reason=reason)
         else:
-            mark = pytest.mark.xfail(reason=reason, strict=strict)
+            mark = pytest.mark.xfail(reason=reason, strict=xfail_strict)
         return pytest.param(estimator, check, marks=mark)
     else:
 
@@ -515,7 +516,7 @@ def estimator_checks_generator(
     legacy: bool = True,
     expected_failed_checks: dict[str, str] | None = None,
     mark: Literal["xfail", "skip", None] = None,
-    strict: bool | None = None,
+    xfail_strict: bool | None = None,
 ):
     """Iteratively yield all check callables for an estimator.
 
@@ -543,8 +544,8 @@ def estimator_checks_generator(
         xfail(`pytest.mark.xfail`) or skip. Marking a test as "skip" is done via
         wrapping the check in a function that raises a
         :class:`~sklearn.exceptions.SkipTest` exception.
-    strict : bool, default=None
-        Whether to run checks in strict mode. This option is ignored unless
+    xfail_strict : bool, default=None
+        Whether to run checks in xfail strict mode. This option is ignored unless
         mark="xfail". If True, checks that are expected to fail but actually
         pass will lead to a test failure. If False, unexpectedly passing tests
         will be marked as xpass. If None, the default pytest behavior is used.
@@ -574,7 +575,7 @@ def estimator_checks_generator(
                 expected_failed_checks=expected_failed_checks,
                 mark=mark,
                 pytest=pytest,
-                strict=strict,
+                xfail_strict=xfail_strict,
             )
 
 
@@ -583,7 +584,7 @@ def parametrize_with_checks(
     *,
     legacy: bool = True,
     expected_failed_checks: Callable | None = None,
-    strict: bool | None = None,
+    xfail_strict: bool | None = None,
 ):
     """Pytest specific decorator for parametrizing estimator checks.
 
@@ -631,8 +632,8 @@ def parametrize_with_checks(
 
         .. versionadded:: 1.6
 
-    strict : bool, default=None
-        Whether to run checks in strict mode. If True, checks that are
+    xfail_strict : bool, default=None
+        Whether to run checks in xfail strict mode. If True, checks that are
         expected to fail but actually pass will lead to a test failure. If
         False, unexpectedly passing tests will be marked as xpass. If None,
         the default pytest behavior is used.
@@ -675,7 +676,7 @@ def _checks_generator(estimators, legacy, expected_failed_checks):
                 "estimator": estimator,
                 "legacy": legacy,
                 "mark": "xfail",
-                "strict": strict,
+                "xfail_strict": xfail_strict,
             }
             if callable(expected_failed_checks):
                 args["expected_failed_checks"] = expected_failed_checks(estimator)
diff --git a/sklearn/utils/tests/test_estimator_checks.py b/sklearn/utils/tests/test_estimator_checks.py
index f1f43261d138a..8b1acaa3a44ee 100644
--- a/sklearn/utils/tests/test_estimator_checks.py
+++ b/sklearn/utils/tests/test_estimator_checks.py
@@ -1335,10 +1335,10 @@ def test_estimator_checks_generator_strict_none():
         expected_failed_checks=expected_to_fail,
         mark="xfail",
     )
-    # making sure we use a class that has expected failures
+    # make sure we use a class that has expected failures
     assert len(expected_to_fail) > 0
     marked_checks = [c for c in checks if hasattr(c, "marks")]
-    # making sure we have some checks with marks
+    # make sure we have some checks with marks
     assert len(marked_checks) > 0
 
     for parameter_set in marked_checks:
@@ -1356,15 +1356,15 @@ def test_estimator_checks_generator_strict_xfail_tests():
         legacy=True,
         expected_failed_checks=expected_to_fail,
         mark="xfail",
-        strict=True,
+        xfail_strict=True,
     )
-    # making sure we use a class that has expected failures
+    # make sure we use a class that has expected failures
     assert len(expected_to_fail) > 0
     strict_xfailed_checks = []
 
     # xfail'ed checks are wrapped in a ParameterSet, so below we extract
     # the things we need via a bit of a crutch: len()
-    marked_checks = [c for c in checks if len(c) == 3]
+    marked_checks = [c for c in checks if hasattr(c, "marks")]
 
     for parameter_set in marked_checks:
         (_, check), marks, _ = parameter_set

From c02f85f1f5bde5643697454f86447eb709322dd0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Mon, 1 Sep 2025 09:40:51 +0200
Subject: [PATCH 6/9] Update
 doc/whats_new/upcoming_changes/sklearn.utils/31951.enhancement.rst

---
 .../upcoming_changes/sklearn.utils/31951.enhancement.rst    | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/doc/whats_new/upcoming_changes/sklearn.utils/31951.enhancement.rst b/doc/whats_new/upcoming_changes/sklearn.utils/31951.enhancement.rst
index e86427a65c564..78df7fff40743 100644
--- a/doc/whats_new/upcoming_changes/sklearn.utils/31951.enhancement.rst
+++ b/doc/whats_new/upcoming_changes/sklearn.utils/31951.enhancement.rst
@@ -1,4 +1,4 @@
 - ``sklearn.utils.estimator_checks.parametrize_with_checks`` now lets you configure
-strict mode for xfailing checks. Tests that unexpectedly pass will lead to a test
-failure. The default behaviour is unchanged.
-By :user:`Tim Head <betatim>`.
+  strict mode for xfailing checks. Tests that unexpectedly pass will lead to a test
+  failure. The default behaviour is unchanged.
+  By :user:`Tim Head <betatim>`.

From 178abcb88090862dfda1a9da754fa32420eeb02e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Mon, 1 Sep 2025 09:51:59 +0200
Subject: [PATCH 7/9] tweak [azure parallel]

---
 sklearn/utils/tests/test_estimator_checks.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/sklearn/utils/tests/test_estimator_checks.py b/sklearn/utils/tests/test_estimator_checks.py
index 8b1acaa3a44ee..b126ead008f90 100644
--- a/sklearn/utils/tests/test_estimator_checks.py
+++ b/sklearn/utils/tests/test_estimator_checks.py
@@ -1365,11 +1365,13 @@ def test_estimator_checks_generator_strict_xfail_tests():
     # xfail'ed checks are wrapped in a ParameterSet, so below we extract
     # the things we need via a bit of a crutch: len()
     marked_checks = [c for c in checks if hasattr(c, "marks")]
+    # make sure we use a class that has expected failures
+    assert len(expected_to_fail) > 0
 
     for parameter_set in marked_checks:
-        (_, check), marks, _ = parameter_set
-        mark = marks[0]
-        if mark.kwargs["strict"]:
+        _, check = parameter_set.values
+        first_mark = parameter_set.marks[0]
+        if first_mark.kwargs["strict"]:
             strict_xfailed_checks.append(_check_name(check))
 
     # all checks expected to fail are marked as strict xfail

From 3ed0e4a8e037f8706fd96465b78c5f39d91f6731 Mon Sep 17 00:00:00 2001
From: Tim Head <betatim@gmail.com>
Date: Mon, 1 Sep 2025 11:10:01 +0200
Subject: [PATCH 8/9] Add comment explaining purpose of test

---
 sklearn/utils/tests/test_estimator_checks.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sklearn/utils/tests/test_estimator_checks.py b/sklearn/utils/tests/test_estimator_checks.py
index 8b1acaa3a44ee..9cccc35352a72 100644
--- a/sklearn/utils/tests/test_estimator_checks.py
+++ b/sklearn/utils/tests/test_estimator_checks.py
@@ -1325,6 +1325,7 @@ def test_all_estimators_all_public():
 
 
 def test_estimator_checks_generator_strict_none():
+    # Check that no "strict" mark is included in the generated checks
     est = next(_construct_instances(NuSVC))
     expected_to_fail = _get_expected_failed_checks(est)
     # If we don't pass strict, it should not appear in the xfail mark either

From fa808d2c77ccb90a3c7e6788cb5fd48161f64ad9 Mon Sep 17 00:00:00 2001
From: Tim Head <betatim@gmail.com>
Date: Mon, 1 Sep 2025 11:12:31 +0200
Subject: [PATCH 9/9] Improve docstring formatting

Co-authored-by: Adrin Jalali <adrin.jalali@gmail.com>
---
 sklearn/utils/estimator_checks.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index b51a6050f8dfd..d8cd13848a09d 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -445,7 +445,7 @@ def _maybe_mark(
         installed. This is used in combination with `parametrize_with_checks` only.
     xfail_strict : bool, default=None
         Whether to run checks in xfail strict mode. This option is ignored unless
-        mark="xfail". If True, checks that are expected to fail but actually
+        `mark="xfail"`. If True, checks that are expected to fail but actually
         pass will lead to a test failure. If False, unexpectedly passing tests
         will be marked as xpass. If None, the default pytest behavior is used.
 
@@ -546,7 +546,7 @@ def estimator_checks_generator(
         :class:`~sklearn.exceptions.SkipTest` exception.
     xfail_strict : bool, default=None
         Whether to run checks in xfail strict mode. This option is ignored unless
-        mark="xfail". If True, checks that are expected to fail but actually
+        `mark="xfail"`. If True, checks that are expected to fail but actually
         pass will lead to a test failure. If False, unexpectedly passing tests
         will be marked as xpass. If None, the default pytest behavior is used.