From b5fcce880cd08bb6959738d8456e348fc6998464 Mon Sep 17 00:00:00 2001
From: Yao Xiao <108576690+Charlie-XIAO@users.noreply.github.com>
Date: Wed, 13 Dec 2023 11:15:43 +0800
Subject: [PATCH 1/3] combine all conftests into one

---
 conftest.py                     | 305 +++++++++++++++++++++++++++++++-
 sklearn/conftest.py             | 280 -----------------------------
 sklearn/externals/conftest.py   |   7 -
 sklearn/utils/tests/conftest.py |  10 --
 4 files changed, 299 insertions(+), 303 deletions(-)
 delete mode 100644 sklearn/conftest.py
 delete mode 100644 sklearn/externals/conftest.py
 delete mode 100644 sklearn/utils/tests/conftest.py

diff --git a/conftest.py b/conftest.py
index e4e478d2d72d7..0a05372a23a9b 100644
--- a/conftest.py
+++ b/conftest.py
@@ -1,6 +1,299 @@
-# Even if empty this file is useful so that when running from the root folder
-# ./sklearn is added to sys.path by pytest. See
-# https://docs.pytest.org/en/latest/explanation/pythonpath.html for more
-# details. For example, this allows to build extensions in place and run pytest
-# doc/modules/clustering.rst and use sklearn from the local folder rather than
-# the one from site-packages.
+import builtins
+import platform
+import sys
+from contextlib import suppress
+from functools import wraps
+from os import environ
+from pathlib import Path
+from unittest import SkipTest
+
+import joblib
+import numpy as np
+import pytest
+from _pytest.doctest import DoctestItem
+from threadpoolctl import threadpool_limits
+
+from sklearn import config_context, set_config
+from sklearn._min_dependencies import PYTEST_MIN_VERSION
+from sklearn.datasets import (
+    fetch_20newsgroups,
+    fetch_20newsgroups_vectorized,
+    fetch_california_housing,
+    fetch_covtype,
+    fetch_kddcup99,
+    fetch_olivetti_faces,
+    fetch_rcv1,
+)
+from sklearn.tests import random_seed
+from sklearn.utils import _IS_32BIT
+from sklearn.utils.fixes import np_base_version, parse_version, sp_version
+
+if parse_version(pytest.__version__) < parse_version(PYTEST_MIN_VERSION):
+    raise ImportError(
+        f"Your version of pytest is too old. Got version {pytest.__version__}, you"
+        f" should have pytest >= {PYTEST_MIN_VERSION} installed."
+    )
+
+scipy_datasets_require_network = sp_version >= parse_version("1.10")
+
+
+def pytest_ignore_collect(path, config):
+    """
+    Do not collect any tests in externals. This is more robust than using
+    --ignore because --ignore needs a path and it is not convenient to pass in
+    the externals path (very long install-dependent path in site-packages) when
+    using --pyargs
+    """
+    return Path(path).match("*/sklearn/externals/*")
+
+
+@pytest.fixture
+def enable_slep006():
+    """Enable SLEP006 for all tests."""
+    with config_context(enable_metadata_routing=True):
+        yield
+
+
+def raccoon_face_or_skip():
+    # SciPy >= 1.10 requires network to access to get data
+    if scipy_datasets_require_network:
+        run_network_tests = environ.get("SKLEARN_SKIP_NETWORK_TESTS", "1") == "0"
+        if not run_network_tests:
+            raise SkipTest("test is enabled when SKLEARN_SKIP_NETWORK_TESTS=0")
+
+        try:
+            import pooch  # noqa
+        except ImportError:
+            raise SkipTest("test requires pooch to be installed")
+
+        from scipy.datasets import face
+    else:
+        from scipy.misc import face
+
+    return face(gray=True)
+
+
+dataset_fetchers = {
+    "fetch_20newsgroups_fxt": fetch_20newsgroups,
+    "fetch_20newsgroups_vectorized_fxt": fetch_20newsgroups_vectorized,
+    "fetch_california_housing_fxt": fetch_california_housing,
+    "fetch_covtype_fxt": fetch_covtype,
+    "fetch_kddcup99_fxt": fetch_kddcup99,
+    "fetch_olivetti_faces_fxt": fetch_olivetti_faces,
+    "fetch_rcv1_fxt": fetch_rcv1,
+}
+
+if scipy_datasets_require_network:
+    dataset_fetchers["raccoon_face_fxt"] = raccoon_face_or_skip
+
+_SKIP32_MARK = pytest.mark.skipif(
+    environ.get("SKLEARN_RUN_FLOAT32_TESTS", "0") != "1",
+    reason="Set SKLEARN_RUN_FLOAT32_TESTS=1 to run float32 dtype tests",
+)
+
+
+# Global fixtures
+@pytest.fixture(params=[pytest.param(np.float32, marks=_SKIP32_MARK), np.float64])
+def global_dtype(request):
+    yield request.param
+
+
+def _fetch_fixture(f):
+    """Fetch dataset (download if missing and requested by environment)."""
+    download_if_missing = environ.get("SKLEARN_SKIP_NETWORK_TESTS", "1") == "0"
+
+    @wraps(f)
+    def wrapped(*args, **kwargs):
+        kwargs["download_if_missing"] = download_if_missing
+        try:
+            return f(*args, **kwargs)
+        except OSError as e:
+            if str(e) != "Data not found and `download_if_missing` is False":
+                raise
+            pytest.skip("test is enabled when SKLEARN_SKIP_NETWORK_TESTS=0")
+
+    return pytest.fixture(lambda: wrapped)
+
+
+# Adds fixtures for fetching data
+fetch_20newsgroups_fxt = _fetch_fixture(fetch_20newsgroups)
+fetch_20newsgroups_vectorized_fxt = _fetch_fixture(fetch_20newsgroups_vectorized)
+fetch_california_housing_fxt = _fetch_fixture(fetch_california_housing)
+fetch_covtype_fxt = _fetch_fixture(fetch_covtype)
+fetch_kddcup99_fxt = _fetch_fixture(fetch_kddcup99)
+fetch_olivetti_faces_fxt = _fetch_fixture(fetch_olivetti_faces)
+fetch_rcv1_fxt = _fetch_fixture(fetch_rcv1)
+raccoon_face_fxt = pytest.fixture(raccoon_face_or_skip)
+
+
+def pytest_collection_modifyitems(config, items):
+    """Called after collect is completed.
+
+    Parameters
+    ----------
+    config : pytest config
+    items : list of collected items
+    """
+    run_network_tests = environ.get("SKLEARN_SKIP_NETWORK_TESTS", "1") == "0"
+    skip_network = pytest.mark.skip(
+        reason="test is enabled when SKLEARN_SKIP_NETWORK_TESTS=0"
+    )
+
+    # download datasets during collection to avoid thread unsafe behavior
+    # when running pytest in parallel with pytest-xdist
+    dataset_features_set = set(dataset_fetchers)
+    datasets_to_download = set()
+
+    for item in items:
+        if not hasattr(item, "fixturenames"):
+            continue
+        item_fixtures = set(item.fixturenames)
+        dataset_to_fetch = item_fixtures & dataset_features_set
+        if not dataset_to_fetch:
+            continue
+
+        if run_network_tests:
+            datasets_to_download |= dataset_to_fetch
+        else:
+            # network tests are skipped
+            item.add_marker(skip_network)
+
+    # Only download datasets on the first worker spawned by pytest-xdist
+    # to avoid thread unsafe behavior. If pytest-xdist is not used, we still
+    # download before tests run.
+    worker_id = environ.get("PYTEST_XDIST_WORKER", "gw0")
+    if worker_id == "gw0" and run_network_tests:
+        for name in datasets_to_download:
+            with suppress(SkipTest):
+                dataset_fetchers[name]()
+
+    for item in items:
+        # Known failure on with GradientBoostingClassifier on ARM64
+        if (
+            item.name.endswith("GradientBoostingClassifier")
+            and platform.machine() == "aarch64"
+        ):
+            marker = pytest.mark.xfail(
+                reason=(
+                    "know failure. See "
+                    "https://github.com/scikit-learn/scikit-learn/issues/17797"  # noqa
+                )
+            )
+            item.add_marker(marker)
+
+    skip_doctests = False
+    try:
+        import matplotlib  # noqa
+    except ImportError:
+        skip_doctests = True
+        reason = "matplotlib is required to run the doctests"
+
+    if _IS_32BIT:
+        reason = "doctest are only run when the default numpy int is 64 bits."
+        skip_doctests = True
+    elif sys.platform.startswith("win32"):
+        reason = (
+            "doctests are not run for Windows because numpy arrays "
+            "repr is inconsistent across platforms."
+        )
+        skip_doctests = True
+
+    if np_base_version >= parse_version("2"):
+        reason = "Due to NEP 51 numpy scalar repr has changed in numpy 2"
+        skip_doctests = True
+
+    # Normally doctest has the entire module's scope. Here we set globs to an empty dict
+    # to remove the module's scope:
+    # https://docs.python.org/3/library/doctest.html#what-s-the-execution-context
+    for item in items:
+        if isinstance(item, DoctestItem):
+            item.dtest.globs = {}
+
+    if skip_doctests:
+        skip_marker = pytest.mark.skip(reason=reason)
+
+        for item in items:
+            if isinstance(item, DoctestItem):
+                # work-around an internal error with pytest if adding a skip
+                # mark to a doctest in a contextmanager, see
+                # https://github.com/pytest-dev/pytest/issues/8796 for more
+                # details.
+                if item.name != "sklearn._config.config_context":
+                    item.add_marker(skip_marker)
+    try:
+        import PIL  # noqa
+
+        pillow_installed = True
+    except ImportError:
+        pillow_installed = False
+
+    if not pillow_installed:
+        skip_marker = pytest.mark.skip(reason="pillow (or PIL) not installed!")
+        for item in items:
+            if item.name in [
+                "sklearn.feature_extraction.image.PatchExtractor",
+                "sklearn.feature_extraction.image.extract_patches_2d",
+            ]:
+                item.add_marker(skip_marker)
+
+
+@pytest.fixture(scope="function")
+def pyplot():
+    """Setup and teardown fixture for matplotlib.
+
+    This fixture checks if we can import matplotlib. If not, the tests will be
+    skipped. Otherwise, we close the figures before and after running the
+    functions.
+
+    Returns
+    -------
+    pyplot : module
+        The ``matplotlib.pyplot`` module.
+    """
+    pyplot = pytest.importorskip("matplotlib.pyplot")
+    pyplot.close("all")
+    yield pyplot
+    pyplot.close("all")
+
+
+def pytest_configure(config):
+    # Use matplotlib agg backend during the tests including doctests
+    try:
+        import matplotlib
+
+        matplotlib.use("agg")
+    except ImportError:
+        pass
+
+    allowed_parallelism = joblib.cpu_count(only_physical_cores=True)
+    xdist_worker_count = environ.get("PYTEST_XDIST_WORKER_COUNT")
+    if xdist_worker_count is not None:
+        # Set the number of OpenMP and BLAS threads based on the number of workers
+        # xdist is using to prevent oversubscription.
+        allowed_parallelism = max(allowed_parallelism // int(xdist_worker_count), 1)
+    threadpool_limits(allowed_parallelism)
+
+    # Register global_random_seed plugin if it is not already registered
+    if not config.pluginmanager.hasplugin("sklearn.tests.random_seed"):
+        config.pluginmanager.register(random_seed)
+
+
+@pytest.fixture
+def hide_available_pandas(monkeypatch):
+    """Pretend pandas was not installed."""
+    import_orig = builtins.__import__
+
+    def mocked_import(name, *args, **kwargs):
+        if name == "pandas":
+            raise ImportError()
+        return import_orig(name, *args, **kwargs)
+
+    monkeypatch.setattr(builtins, "__import__", mocked_import)
+
+
+@pytest.fixture
+def print_changed_only_false():
+    """Set `print_changed_only` to False for the duration of the test."""
+    set_config(print_changed_only=False)
+    yield
+    set_config(print_changed_only=True)  # reset to default
diff --git a/sklearn/conftest.py b/sklearn/conftest.py
deleted file mode 100644
index d15f9fe2ec142..0000000000000
--- a/sklearn/conftest.py
+++ /dev/null
@@ -1,280 +0,0 @@
-import builtins
-import platform
-import sys
-from contextlib import suppress
-from functools import wraps
-from os import environ
-from unittest import SkipTest
-
-import joblib
-import numpy as np
-import pytest
-from _pytest.doctest import DoctestItem
-from threadpoolctl import threadpool_limits
-
-from sklearn import config_context
-from sklearn._min_dependencies import PYTEST_MIN_VERSION
-from sklearn.datasets import (
-    fetch_20newsgroups,
-    fetch_20newsgroups_vectorized,
-    fetch_california_housing,
-    fetch_covtype,
-    fetch_kddcup99,
-    fetch_olivetti_faces,
-    fetch_rcv1,
-)
-from sklearn.tests import random_seed
-from sklearn.utils import _IS_32BIT
-from sklearn.utils.fixes import np_base_version, parse_version, sp_version
-
-if parse_version(pytest.__version__) < parse_version(PYTEST_MIN_VERSION):
-    raise ImportError(
-        f"Your version of pytest is too old. Got version {pytest.__version__}, you"
-        f" should have pytest >= {PYTEST_MIN_VERSION} installed."
-    )
-
-scipy_datasets_require_network = sp_version >= parse_version("1.10")
-
-
-@pytest.fixture
-def enable_slep006():
-    """Enable SLEP006 for all tests."""
-    with config_context(enable_metadata_routing=True):
-        yield
-
-
-def raccoon_face_or_skip():
-    # SciPy >= 1.10 requires network to access to get data
-    if scipy_datasets_require_network:
-        run_network_tests = environ.get("SKLEARN_SKIP_NETWORK_TESTS", "1") == "0"
-        if not run_network_tests:
-            raise SkipTest("test is enabled when SKLEARN_SKIP_NETWORK_TESTS=0")
-
-        try:
-            import pooch  # noqa
-        except ImportError:
-            raise SkipTest("test requires pooch to be installed")
-
-        from scipy.datasets import face
-    else:
-        from scipy.misc import face
-
-    return face(gray=True)
-
-
-dataset_fetchers = {
-    "fetch_20newsgroups_fxt": fetch_20newsgroups,
-    "fetch_20newsgroups_vectorized_fxt": fetch_20newsgroups_vectorized,
-    "fetch_california_housing_fxt": fetch_california_housing,
-    "fetch_covtype_fxt": fetch_covtype,
-    "fetch_kddcup99_fxt": fetch_kddcup99,
-    "fetch_olivetti_faces_fxt": fetch_olivetti_faces,
-    "fetch_rcv1_fxt": fetch_rcv1,
-}
-
-if scipy_datasets_require_network:
-    dataset_fetchers["raccoon_face_fxt"] = raccoon_face_or_skip
-
-_SKIP32_MARK = pytest.mark.skipif(
-    environ.get("SKLEARN_RUN_FLOAT32_TESTS", "0") != "1",
-    reason="Set SKLEARN_RUN_FLOAT32_TESTS=1 to run float32 dtype tests",
-)
-
-
-# Global fixtures
-@pytest.fixture(params=[pytest.param(np.float32, marks=_SKIP32_MARK), np.float64])
-def global_dtype(request):
-    yield request.param
-
-
-def _fetch_fixture(f):
-    """Fetch dataset (download if missing and requested by environment)."""
-    download_if_missing = environ.get("SKLEARN_SKIP_NETWORK_TESTS", "1") == "0"
-
-    @wraps(f)
-    def wrapped(*args, **kwargs):
-        kwargs["download_if_missing"] = download_if_missing
-        try:
-            return f(*args, **kwargs)
-        except OSError as e:
-            if str(e) != "Data not found and `download_if_missing` is False":
-                raise
-            pytest.skip("test is enabled when SKLEARN_SKIP_NETWORK_TESTS=0")
-
-    return pytest.fixture(lambda: wrapped)
-
-
-# Adds fixtures for fetching data
-fetch_20newsgroups_fxt = _fetch_fixture(fetch_20newsgroups)
-fetch_20newsgroups_vectorized_fxt = _fetch_fixture(fetch_20newsgroups_vectorized)
-fetch_california_housing_fxt = _fetch_fixture(fetch_california_housing)
-fetch_covtype_fxt = _fetch_fixture(fetch_covtype)
-fetch_kddcup99_fxt = _fetch_fixture(fetch_kddcup99)
-fetch_olivetti_faces_fxt = _fetch_fixture(fetch_olivetti_faces)
-fetch_rcv1_fxt = _fetch_fixture(fetch_rcv1)
-raccoon_face_fxt = pytest.fixture(raccoon_face_or_skip)
-
-
-def pytest_collection_modifyitems(config, items):
-    """Called after collect is completed.
-
-    Parameters
-    ----------
-    config : pytest config
-    items : list of collected items
-    """
-    run_network_tests = environ.get("SKLEARN_SKIP_NETWORK_TESTS", "1") == "0"
-    skip_network = pytest.mark.skip(
-        reason="test is enabled when SKLEARN_SKIP_NETWORK_TESTS=0"
-    )
-
-    # download datasets during collection to avoid thread unsafe behavior
-    # when running pytest in parallel with pytest-xdist
-    dataset_features_set = set(dataset_fetchers)
-    datasets_to_download = set()
-
-    for item in items:
-        if not hasattr(item, "fixturenames"):
-            continue
-        item_fixtures = set(item.fixturenames)
-        dataset_to_fetch = item_fixtures & dataset_features_set
-        if not dataset_to_fetch:
-            continue
-
-        if run_network_tests:
-            datasets_to_download |= dataset_to_fetch
-        else:
-            # network tests are skipped
-            item.add_marker(skip_network)
-
-    # Only download datasets on the first worker spawned by pytest-xdist
-    # to avoid thread unsafe behavior. If pytest-xdist is not used, we still
-    # download before tests run.
-    worker_id = environ.get("PYTEST_XDIST_WORKER", "gw0")
-    if worker_id == "gw0" and run_network_tests:
-        for name in datasets_to_download:
-            with suppress(SkipTest):
-                dataset_fetchers[name]()
-
-    for item in items:
-        # Known failure on with GradientBoostingClassifier on ARM64
-        if (
-            item.name.endswith("GradientBoostingClassifier")
-            and platform.machine() == "aarch64"
-        ):
-            marker = pytest.mark.xfail(
-                reason=(
-                    "know failure. See "
-                    "https://github.com/scikit-learn/scikit-learn/issues/17797"  # noqa
-                )
-            )
-            item.add_marker(marker)
-
-    skip_doctests = False
-    try:
-        import matplotlib  # noqa
-    except ImportError:
-        skip_doctests = True
-        reason = "matplotlib is required to run the doctests"
-
-    if _IS_32BIT:
-        reason = "doctest are only run when the default numpy int is 64 bits."
-        skip_doctests = True
-    elif sys.platform.startswith("win32"):
-        reason = (
-            "doctests are not run for Windows because numpy arrays "
-            "repr is inconsistent across platforms."
-        )
-        skip_doctests = True
-
-    if np_base_version >= parse_version("2"):
-        reason = "Due to NEP 51 numpy scalar repr has changed in numpy 2"
-        skip_doctests = True
-
-    # Normally doctest has the entire module's scope. Here we set globs to an empty dict
-    # to remove the module's scope:
-    # https://docs.python.org/3/library/doctest.html#what-s-the-execution-context
-    for item in items:
-        if isinstance(item, DoctestItem):
-            item.dtest.globs = {}
-
-    if skip_doctests:
-        skip_marker = pytest.mark.skip(reason=reason)
-
-        for item in items:
-            if isinstance(item, DoctestItem):
-                # work-around an internal error with pytest if adding a skip
-                # mark to a doctest in a contextmanager, see
-                # https://github.com/pytest-dev/pytest/issues/8796 for more
-                # details.
-                if item.name != "sklearn._config.config_context":
-                    item.add_marker(skip_marker)
-    try:
-        import PIL  # noqa
-
-        pillow_installed = True
-    except ImportError:
-        pillow_installed = False
-
-    if not pillow_installed:
-        skip_marker = pytest.mark.skip(reason="pillow (or PIL) not installed!")
-        for item in items:
-            if item.name in [
-                "sklearn.feature_extraction.image.PatchExtractor",
-                "sklearn.feature_extraction.image.extract_patches_2d",
-            ]:
-                item.add_marker(skip_marker)
-
-
-@pytest.fixture(scope="function")
-def pyplot():
-    """Setup and teardown fixture for matplotlib.
-
-    This fixture checks if we can import matplotlib. If not, the tests will be
-    skipped. Otherwise, we close the figures before and after running the
-    functions.
-
-    Returns
-    -------
-    pyplot : module
-        The ``matplotlib.pyplot`` module.
-    """
-    pyplot = pytest.importorskip("matplotlib.pyplot")
-    pyplot.close("all")
-    yield pyplot
-    pyplot.close("all")
-
-
-def pytest_configure(config):
-    # Use matplotlib agg backend during the tests including doctests
-    try:
-        import matplotlib
-
-        matplotlib.use("agg")
-    except ImportError:
-        pass
-
-    allowed_parallelism = joblib.cpu_count(only_physical_cores=True)
-    xdist_worker_count = environ.get("PYTEST_XDIST_WORKER_COUNT")
-    if xdist_worker_count is not None:
-        # Set the number of OpenMP and BLAS threads based on the number of workers
-        # xdist is using to prevent oversubscription.
-        allowed_parallelism = max(allowed_parallelism // int(xdist_worker_count), 1)
-    threadpool_limits(allowed_parallelism)
-
-    # Register global_random_seed plugin if it is not already registered
-    if not config.pluginmanager.hasplugin("sklearn.tests.random_seed"):
-        config.pluginmanager.register(random_seed)
-
-
-@pytest.fixture
-def hide_available_pandas(monkeypatch):
-    """Pretend pandas was not installed."""
-    import_orig = builtins.__import__
-
-    def mocked_import(name, *args, **kwargs):
-        if name == "pandas":
-            raise ImportError()
-        return import_orig(name, *args, **kwargs)
-
-    monkeypatch.setattr(builtins, "__import__", mocked_import)
diff --git a/sklearn/externals/conftest.py b/sklearn/externals/conftest.py
deleted file mode 100644
index c617107866b92..0000000000000
--- a/sklearn/externals/conftest.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# Do not collect any tests in externals. This is more robust than using
-# --ignore because --ignore needs a path and it is not convenient to pass in
-# the externals path (very long install-dependent path in site-packages) when
-# using --pyargs
-def pytest_ignore_collect(path, config):
-    return True
-
diff --git a/sklearn/utils/tests/conftest.py b/sklearn/utils/tests/conftest.py
deleted file mode 100644
index 148225a481f69..0000000000000
--- a/sklearn/utils/tests/conftest.py
+++ /dev/null
@@ -1,10 +0,0 @@
-import pytest
-
-import sklearn
-
-
-@pytest.fixture
-def print_changed_only_false():
-    sklearn.set_config(print_changed_only=False)
-    yield
-    sklearn.set_config(print_changed_only=True)  # reset to default

From 7e09b8b43b3bf568559267a4d1b463b70979ca99 Mon Sep 17 00:00:00 2001
From: Yao Xiao <108576690+Charlie-XIAO@users.noreply.github.com>
Date: Wed, 13 Dec 2023 17:13:40 +0800
Subject: [PATCH 2/3] revert

---
 conftest.py                   | 305 +---------------------------------
 sklearn/conftest.py           | 288 ++++++++++++++++++++++++++++++++
 sklearn/externals/conftest.py |   6 +
 3 files changed, 300 insertions(+), 299 deletions(-)
 create mode 100644 sklearn/conftest.py
 create mode 100644 sklearn/externals/conftest.py

diff --git a/conftest.py b/conftest.py
index 0a05372a23a9b..e4e478d2d72d7 100644
--- a/conftest.py
+++ b/conftest.py
@@ -1,299 +1,6 @@
-import builtins
-import platform
-import sys
-from contextlib import suppress
-from functools import wraps
-from os import environ
-from pathlib import Path
-from unittest import SkipTest
-
-import joblib
-import numpy as np
-import pytest
-from _pytest.doctest import DoctestItem
-from threadpoolctl import threadpool_limits
-
-from sklearn import config_context, set_config
-from sklearn._min_dependencies import PYTEST_MIN_VERSION
-from sklearn.datasets import (
-    fetch_20newsgroups,
-    fetch_20newsgroups_vectorized,
-    fetch_california_housing,
-    fetch_covtype,
-    fetch_kddcup99,
-    fetch_olivetti_faces,
-    fetch_rcv1,
-)
-from sklearn.tests import random_seed
-from sklearn.utils import _IS_32BIT
-from sklearn.utils.fixes import np_base_version, parse_version, sp_version
-
-if parse_version(pytest.__version__) < parse_version(PYTEST_MIN_VERSION):
-    raise ImportError(
-        f"Your version of pytest is too old. Got version {pytest.__version__}, you"
-        f" should have pytest >= {PYTEST_MIN_VERSION} installed."
-    )
-
-scipy_datasets_require_network = sp_version >= parse_version("1.10")
-
-
-def pytest_ignore_collect(path, config):
-    """
-    Do not collect any tests in externals. This is more robust than using
-    --ignore because --ignore needs a path and it is not convenient to pass in
-    the externals path (very long install-dependent path in site-packages) when
-    using --pyargs
-    """
-    return Path(path).match("*/sklearn/externals/*")
-
-
-@pytest.fixture
-def enable_slep006():
-    """Enable SLEP006 for all tests."""
-    with config_context(enable_metadata_routing=True):
-        yield
-
-
-def raccoon_face_or_skip():
-    # SciPy >= 1.10 requires network to access to get data
-    if scipy_datasets_require_network:
-        run_network_tests = environ.get("SKLEARN_SKIP_NETWORK_TESTS", "1") == "0"
-        if not run_network_tests:
-            raise SkipTest("test is enabled when SKLEARN_SKIP_NETWORK_TESTS=0")
-
-        try:
-            import pooch  # noqa
-        except ImportError:
-            raise SkipTest("test requires pooch to be installed")
-
-        from scipy.datasets import face
-    else:
-        from scipy.misc import face
-
-    return face(gray=True)
-
-
-dataset_fetchers = {
-    "fetch_20newsgroups_fxt": fetch_20newsgroups,
-    "fetch_20newsgroups_vectorized_fxt": fetch_20newsgroups_vectorized,
-    "fetch_california_housing_fxt": fetch_california_housing,
-    "fetch_covtype_fxt": fetch_covtype,
-    "fetch_kddcup99_fxt": fetch_kddcup99,
-    "fetch_olivetti_faces_fxt": fetch_olivetti_faces,
-    "fetch_rcv1_fxt": fetch_rcv1,
-}
-
-if scipy_datasets_require_network:
-    dataset_fetchers["raccoon_face_fxt"] = raccoon_face_or_skip
-
-_SKIP32_MARK = pytest.mark.skipif(
-    environ.get("SKLEARN_RUN_FLOAT32_TESTS", "0") != "1",
-    reason="Set SKLEARN_RUN_FLOAT32_TESTS=1 to run float32 dtype tests",
-)
-
-
-# Global fixtures
-@pytest.fixture(params=[pytest.param(np.float32, marks=_SKIP32_MARK), np.float64])
-def global_dtype(request):
-    yield request.param
-
-
-def _fetch_fixture(f):
-    """Fetch dataset (download if missing and requested by environment)."""
-    download_if_missing = environ.get("SKLEARN_SKIP_NETWORK_TESTS", "1") == "0"
-
-    @wraps(f)
-    def wrapped(*args, **kwargs):
-        kwargs["download_if_missing"] = download_if_missing
-        try:
-            return f(*args, **kwargs)
-        except OSError as e:
-            if str(e) != "Data not found and `download_if_missing` is False":
-                raise
-            pytest.skip("test is enabled when SKLEARN_SKIP_NETWORK_TESTS=0")
-
-    return pytest.fixture(lambda: wrapped)
-
-
-# Adds fixtures for fetching data
-fetch_20newsgroups_fxt = _fetch_fixture(fetch_20newsgroups)
-fetch_20newsgroups_vectorized_fxt = _fetch_fixture(fetch_20newsgroups_vectorized)
-fetch_california_housing_fxt = _fetch_fixture(fetch_california_housing)
-fetch_covtype_fxt = _fetch_fixture(fetch_covtype)
-fetch_kddcup99_fxt = _fetch_fixture(fetch_kddcup99)
-fetch_olivetti_faces_fxt = _fetch_fixture(fetch_olivetti_faces)
-fetch_rcv1_fxt = _fetch_fixture(fetch_rcv1)
-raccoon_face_fxt = pytest.fixture(raccoon_face_or_skip)
-
-
-def pytest_collection_modifyitems(config, items):
-    """Called after collect is completed.
-
-    Parameters
-    ----------
-    config : pytest config
-    items : list of collected items
-    """
-    run_network_tests = environ.get("SKLEARN_SKIP_NETWORK_TESTS", "1") == "0"
-    skip_network = pytest.mark.skip(
-        reason="test is enabled when SKLEARN_SKIP_NETWORK_TESTS=0"
-    )
-
-    # download datasets during collection to avoid thread unsafe behavior
-    # when running pytest in parallel with pytest-xdist
-    dataset_features_set = set(dataset_fetchers)
-    datasets_to_download = set()
-
-    for item in items:
-        if not hasattr(item, "fixturenames"):
-            continue
-        item_fixtures = set(item.fixturenames)
-        dataset_to_fetch = item_fixtures & dataset_features_set
-        if not dataset_to_fetch:
-            continue
-
-        if run_network_tests:
-            datasets_to_download |= dataset_to_fetch
-        else:
-            # network tests are skipped
-            item.add_marker(skip_network)
-
-    # Only download datasets on the first worker spawned by pytest-xdist
-    # to avoid thread unsafe behavior. If pytest-xdist is not used, we still
-    # download before tests run.
-    worker_id = environ.get("PYTEST_XDIST_WORKER", "gw0")
-    if worker_id == "gw0" and run_network_tests:
-        for name in datasets_to_download:
-            with suppress(SkipTest):
-                dataset_fetchers[name]()
-
-    for item in items:
-        # Known failure on with GradientBoostingClassifier on ARM64
-        if (
-            item.name.endswith("GradientBoostingClassifier")
-            and platform.machine() == "aarch64"
-        ):
-            marker = pytest.mark.xfail(
-                reason=(
-                    "know failure. See "
-                    "https://github.com/scikit-learn/scikit-learn/issues/17797"  # noqa
-                )
-            )
-            item.add_marker(marker)
-
-    skip_doctests = False
-    try:
-        import matplotlib  # noqa
-    except ImportError:
-        skip_doctests = True
-        reason = "matplotlib is required to run the doctests"
-
-    if _IS_32BIT:
-        reason = "doctest are only run when the default numpy int is 64 bits."
-        skip_doctests = True
-    elif sys.platform.startswith("win32"):
-        reason = (
-            "doctests are not run for Windows because numpy arrays "
-            "repr is inconsistent across platforms."
-        )
-        skip_doctests = True
-
-    if np_base_version >= parse_version("2"):
-        reason = "Due to NEP 51 numpy scalar repr has changed in numpy 2"
-        skip_doctests = True
-
-    # Normally doctest has the entire module's scope. Here we set globs to an empty dict
-    # to remove the module's scope:
-    # https://docs.python.org/3/library/doctest.html#what-s-the-execution-context
-    for item in items:
-        if isinstance(item, DoctestItem):
-            item.dtest.globs = {}
-
-    if skip_doctests:
-        skip_marker = pytest.mark.skip(reason=reason)
-
-        for item in items:
-            if isinstance(item, DoctestItem):
-                # work-around an internal error with pytest if adding a skip
-                # mark to a doctest in a contextmanager, see
-                # https://github.com/pytest-dev/pytest/issues/8796 for more
-                # details.
-                if item.name != "sklearn._config.config_context":
-                    item.add_marker(skip_marker)
-    try:
-        import PIL  # noqa
-
-        pillow_installed = True
-    except ImportError:
-        pillow_installed = False
-
-    if not pillow_installed:
-        skip_marker = pytest.mark.skip(reason="pillow (or PIL) not installed!")
-        for item in items:
-            if item.name in [
-                "sklearn.feature_extraction.image.PatchExtractor",
-                "sklearn.feature_extraction.image.extract_patches_2d",
-            ]:
-                item.add_marker(skip_marker)
-
-
-@pytest.fixture(scope="function")
-def pyplot():
-    """Setup and teardown fixture for matplotlib.
-
-    This fixture checks if we can import matplotlib. If not, the tests will be
-    skipped. Otherwise, we close the figures before and after running the
-    functions.
-
-    Returns
-    -------
-    pyplot : module
-        The ``matplotlib.pyplot`` module.
-    """
-    pyplot = pytest.importorskip("matplotlib.pyplot")
-    pyplot.close("all")
-    yield pyplot
-    pyplot.close("all")
-
-
-def pytest_configure(config):
-    # Use matplotlib agg backend during the tests including doctests
-    try:
-        import matplotlib
-
-        matplotlib.use("agg")
-    except ImportError:
-        pass
-
-    allowed_parallelism = joblib.cpu_count(only_physical_cores=True)
-    xdist_worker_count = environ.get("PYTEST_XDIST_WORKER_COUNT")
-    if xdist_worker_count is not None:
-        # Set the number of OpenMP and BLAS threads based on the number of workers
-        # xdist is using to prevent oversubscription.
-        allowed_parallelism = max(allowed_parallelism // int(xdist_worker_count), 1)
-    threadpool_limits(allowed_parallelism)
-
-    # Register global_random_seed plugin if it is not already registered
-    if not config.pluginmanager.hasplugin("sklearn.tests.random_seed"):
-        config.pluginmanager.register(random_seed)
-
-
-@pytest.fixture
-def hide_available_pandas(monkeypatch):
-    """Pretend pandas was not installed."""
-    import_orig = builtins.__import__
-
-    def mocked_import(name, *args, **kwargs):
-        if name == "pandas":
-            raise ImportError()
-        return import_orig(name, *args, **kwargs)
-
-    monkeypatch.setattr(builtins, "__import__", mocked_import)
-
-
-@pytest.fixture
-def print_changed_only_false():
-    """Set `print_changed_only` to False for the duration of the test."""
-    set_config(print_changed_only=False)
-    yield
-    set_config(print_changed_only=True)  # reset to default
+# Even if empty this file is useful so that when running from the root folder
+# ./sklearn is added to sys.path by pytest. See
+# https://docs.pytest.org/en/latest/explanation/pythonpath.html for more
+# details. For example, this allows to build extensions in place and run pytest
+# doc/modules/clustering.rst and use sklearn from the local folder rather than
+# the one from site-packages.
diff --git a/sklearn/conftest.py b/sklearn/conftest.py
new file mode 100644
index 0000000000000..d2f44f6912b62
--- /dev/null
+++ b/sklearn/conftest.py
@@ -0,0 +1,288 @@
+import builtins
+import platform
+import sys
+from contextlib import suppress
+from functools import wraps
+from os import environ
+from unittest import SkipTest
+
+import joblib
+import numpy as np
+import pytest
+from _pytest.doctest import DoctestItem
+from threadpoolctl import threadpool_limits
+
+from sklearn import config_context, set_config
+from sklearn._min_dependencies import PYTEST_MIN_VERSION
+from sklearn.datasets import (
+    fetch_20newsgroups,
+    fetch_20newsgroups_vectorized,
+    fetch_california_housing,
+    fetch_covtype,
+    fetch_kddcup99,
+    fetch_olivetti_faces,
+    fetch_rcv1,
+)
+from sklearn.tests import random_seed
+from sklearn.utils import _IS_32BIT
+from sklearn.utils.fixes import np_base_version, parse_version, sp_version
+
+if parse_version(pytest.__version__) < parse_version(PYTEST_MIN_VERSION):
+    raise ImportError(
+        f"Your version of pytest is too old. Got version {pytest.__version__}, you"
+        f" should have pytest >= {PYTEST_MIN_VERSION} installed."
+    )
+
+scipy_datasets_require_network = sp_version >= parse_version("1.10")
+
+
+@pytest.fixture
+def enable_slep006():
+    """Enable SLEP006 for all tests."""
+    with config_context(enable_metadata_routing=True):
+        yield
+
+
+def raccoon_face_or_skip():
+    # SciPy >= 1.10 requires network to access to get data
+    if scipy_datasets_require_network:
+        run_network_tests = environ.get("SKLEARN_SKIP_NETWORK_TESTS", "1") == "0"
+        if not run_network_tests:
+            raise SkipTest("test is enabled when SKLEARN_SKIP_NETWORK_TESTS=0")
+
+        try:
+            import pooch  # noqa
+        except ImportError:
+            raise SkipTest("test requires pooch to be installed")
+
+        from scipy.datasets import face
+    else:
+        from scipy.misc import face
+
+    return face(gray=True)
+
+
+dataset_fetchers = {
+    "fetch_20newsgroups_fxt": fetch_20newsgroups,
+    "fetch_20newsgroups_vectorized_fxt": fetch_20newsgroups_vectorized,
+    "fetch_california_housing_fxt": fetch_california_housing,
+    "fetch_covtype_fxt": fetch_covtype,
+    "fetch_kddcup99_fxt": fetch_kddcup99,
+    "fetch_olivetti_faces_fxt": fetch_olivetti_faces,
+    "fetch_rcv1_fxt": fetch_rcv1,
+}
+
+if scipy_datasets_require_network:
+    dataset_fetchers["raccoon_face_fxt"] = raccoon_face_or_skip
+
+_SKIP32_MARK = pytest.mark.skipif(
+    environ.get("SKLEARN_RUN_FLOAT32_TESTS", "0") != "1",
+    reason="Set SKLEARN_RUN_FLOAT32_TESTS=1 to run float32 dtype tests",
+)
+
+
+# Global fixtures
+@pytest.fixture(params=[pytest.param(np.float32, marks=_SKIP32_MARK), np.float64])
+def global_dtype(request):
+    yield request.param
+
+
+def _fetch_fixture(f):
+    """Fetch dataset (download if missing and requested by environment)."""
+    download_if_missing = environ.get("SKLEARN_SKIP_NETWORK_TESTS", "1") == "0"
+
+    @wraps(f)
+    def wrapped(*args, **kwargs):
+        kwargs["download_if_missing"] = download_if_missing
+        try:
+            return f(*args, **kwargs)
+        except OSError as e:
+            if str(e) != "Data not found and `download_if_missing` is False":
+                raise
+            pytest.skip("test is enabled when SKLEARN_SKIP_NETWORK_TESTS=0")
+
+    return pytest.fixture(lambda: wrapped)
+
+
+# Adds fixtures for fetching data
+fetch_20newsgroups_fxt = _fetch_fixture(fetch_20newsgroups)
+fetch_20newsgroups_vectorized_fxt = _fetch_fixture(fetch_20newsgroups_vectorized)
+fetch_california_housing_fxt = _fetch_fixture(fetch_california_housing)
+fetch_covtype_fxt = _fetch_fixture(fetch_covtype)
+fetch_kddcup99_fxt = _fetch_fixture(fetch_kddcup99)
+fetch_olivetti_faces_fxt = _fetch_fixture(fetch_olivetti_faces)
+fetch_rcv1_fxt = _fetch_fixture(fetch_rcv1)
+raccoon_face_fxt = pytest.fixture(raccoon_face_or_skip)
+
+
+def pytest_collection_modifyitems(config, items):
+    """Called after collect is completed.
+
+    Parameters
+    ----------
+    config : pytest config
+    items : list of collected items
+    """
+    run_network_tests = environ.get("SKLEARN_SKIP_NETWORK_TESTS", "1") == "0"
+    skip_network = pytest.mark.skip(
+        reason="test is enabled when SKLEARN_SKIP_NETWORK_TESTS=0"
+    )
+
+    # download datasets during collection to avoid thread unsafe behavior
+    # when running pytest in parallel with pytest-xdist
+    dataset_features_set = set(dataset_fetchers)
+    datasets_to_download = set()
+
+    for item in items:
+        if not hasattr(item, "fixturenames"):
+            continue
+        item_fixtures = set(item.fixturenames)
+        dataset_to_fetch = item_fixtures & dataset_features_set
+        if not dataset_to_fetch:
+            continue
+
+        if run_network_tests:
+            datasets_to_download |= dataset_to_fetch
+        else:
+            # network tests are skipped
+            item.add_marker(skip_network)
+
+    # Only download datasets on the first worker spawned by pytest-xdist
+    # to avoid thread unsafe behavior. If pytest-xdist is not used, we still
+    # download before tests run.
+    worker_id = environ.get("PYTEST_XDIST_WORKER", "gw0")
+    if worker_id == "gw0" and run_network_tests:
+        for name in datasets_to_download:
+            with suppress(SkipTest):
+                dataset_fetchers[name]()
+
+    for item in items:
+        # Known failure on with GradientBoostingClassifier on ARM64
+        if (
+            item.name.endswith("GradientBoostingClassifier")
+            and platform.machine() == "aarch64"
+        ):
+            marker = pytest.mark.xfail(
+                reason=(
+                    "know failure. See "
+                    "https://github.com/scikit-learn/scikit-learn/issues/17797"  # noqa
+                )
+            )
+            item.add_marker(marker)
+
+    skip_doctests = False
+    try:
+        import matplotlib  # noqa
+    except ImportError:
+        skip_doctests = True
+        reason = "matplotlib is required to run the doctests"
+
+    if _IS_32BIT:
+        reason = "doctest are only run when the default numpy int is 64 bits."
+        skip_doctests = True
+    elif sys.platform.startswith("win32"):
+        reason = (
+            "doctests are not run for Windows because numpy arrays "
+            "repr is inconsistent across platforms."
+        )
+        skip_doctests = True
+
+    if np_base_version >= parse_version("2"):
+        reason = "Due to NEP 51 numpy scalar repr has changed in numpy 2"
+        skip_doctests = True
+
+    # Normally doctest has the entire module's scope. Here we set globs to an empty dict
+    # to remove the module's scope:
+    # https://docs.python.org/3/library/doctest.html#what-s-the-execution-context
+    for item in items:
+        if isinstance(item, DoctestItem):
+            item.dtest.globs = {}
+
+    if skip_doctests:
+        skip_marker = pytest.mark.skip(reason=reason)
+
+        for item in items:
+            if isinstance(item, DoctestItem):
+                # work-around an internal error with pytest if adding a skip
+                # mark to a doctest in a contextmanager, see
+                # https://github.com/pytest-dev/pytest/issues/8796 for more
+                # details.
+                if item.name != "sklearn._config.config_context":
+                    item.add_marker(skip_marker)
+    try:
+        import PIL  # noqa
+
+        pillow_installed = True
+    except ImportError:
+        pillow_installed = False
+
+    if not pillow_installed:
+        skip_marker = pytest.mark.skip(reason="pillow (or PIL) not installed!")
+        for item in items:
+            if item.name in [
+                "sklearn.feature_extraction.image.PatchExtractor",
+                "sklearn.feature_extraction.image.extract_patches_2d",
+            ]:
+                item.add_marker(skip_marker)
+
+
+@pytest.fixture(scope="function")
+def pyplot():
+    """Setup and teardown fixture for matplotlib.
+
+    This fixture checks if we can import matplotlib. If not, the tests will be
+    skipped. Otherwise, we close the figures before and after running the
+    functions.
+
+    Returns
+    -------
+    pyplot : module
+        The ``matplotlib.pyplot`` module.
+    """
+    pyplot = pytest.importorskip("matplotlib.pyplot")
+    pyplot.close("all")
+    yield pyplot
+    pyplot.close("all")
+
+
+def pytest_configure(config):
+    # Use matplotlib agg backend during the tests including doctests
+    try:
+        import matplotlib
+
+        matplotlib.use("agg")
+    except ImportError:
+        pass
+
+    allowed_parallelism = joblib.cpu_count(only_physical_cores=True)
+    xdist_worker_count = environ.get("PYTEST_XDIST_WORKER_COUNT")
+    if xdist_worker_count is not None:
+        # Set the number of OpenMP and BLAS threads based on the number of workers
+        # xdist is using to prevent oversubscription.
+        allowed_parallelism = max(allowed_parallelism // int(xdist_worker_count), 1)
+    threadpool_limits(allowed_parallelism)
+
+    # Register global_random_seed plugin if it is not already registered
+    if not config.pluginmanager.hasplugin("sklearn.tests.random_seed"):
+        config.pluginmanager.register(random_seed)
+
+
+@pytest.fixture
+def hide_available_pandas(monkeypatch):
+    """Pretend pandas was not installed."""
+    import_orig = builtins.__import__
+
+    def mocked_import(name, *args, **kwargs):
+        if name == "pandas":
+            raise ImportError()
+        return import_orig(name, *args, **kwargs)
+
+    monkeypatch.setattr(builtins, "__import__", mocked_import)
+
+
+@pytest.fixture
+def print_changed_only_false():
+    """Set `print_changed_only` to False for the duration of the test."""
+    set_config(print_changed_only=False)
+    yield
+    set_config(print_changed_only=True)  # reset to default
diff --git a/sklearn/externals/conftest.py b/sklearn/externals/conftest.py
new file mode 100644
index 0000000000000..7f7a4af349878
--- /dev/null
+++ b/sklearn/externals/conftest.py
@@ -0,0 +1,6 @@
+# Do not collect any tests in externals. This is more robust than using
+# --ignore because --ignore needs a path and it is not convenient to pass in
+# the externals path (very long install-dependent path in site-packages) when
+# using --pyargs
+def pytest_ignore_collect(path, config):
+    return True

From 3fcfa6518d691dc56d7b961f408b9552ef77cd4d Mon Sep 17 00:00:00 2001
From: Yao Xiao <108576690+Charlie-XIAO@users.noreply.github.com>
Date: Wed, 13 Dec 2023 17:21:57 +0800
Subject: [PATCH 3/3] remove root conftest

---
 conftest.py | 6 ------
 1 file changed, 6 deletions(-)
 delete mode 100644 conftest.py

diff --git a/conftest.py b/conftest.py
deleted file mode 100644
index e4e478d2d72d7..0000000000000
--- a/conftest.py
+++ /dev/null
@@ -1,6 +0,0 @@
-# Even if empty this file is useful so that when running from the root folder
-# ./sklearn is added to sys.path by pytest. See
-# https://docs.pytest.org/en/latest/explanation/pythonpath.html for more
-# details. For example, this allows to build extensions in place and run pytest
-# doc/modules/clustering.rst and use sklearn from the local folder rather than
-# the one from site-packages.