From b5fcce880cd08bb6959738d8456e348fc6998464 Mon Sep 17 00:00:00 2001 From: Yao Xiao <108576690+Charlie-XIAO@users.noreply.github.com> Date: Wed, 13 Dec 2023 11:15:43 +0800 Subject: [PATCH 1/3] combine all conftests into one --- conftest.py | 305 +++++++++++++++++++++++++++++++- sklearn/conftest.py | 280 ----------------------------- sklearn/externals/conftest.py | 7 - sklearn/utils/tests/conftest.py | 10 -- 4 files changed, 299 insertions(+), 303 deletions(-) delete mode 100644 sklearn/conftest.py delete mode 100644 sklearn/externals/conftest.py delete mode 100644 sklearn/utils/tests/conftest.py diff --git a/conftest.py b/conftest.py index e4e478d2d72d7..0a05372a23a9b 100644 --- a/conftest.py +++ b/conftest.py @@ -1,6 +1,299 @@ -# Even if empty this file is useful so that when running from the root folder -# ./sklearn is added to sys.path by pytest. See -# https://docs.pytest.org/en/latest/explanation/pythonpath.html for more -# details. For example, this allows to build extensions in place and run pytest -# doc/modules/clustering.rst and use sklearn from the local folder rather than -# the one from site-packages. +import builtins +import platform +import sys +from contextlib import suppress +from functools import wraps +from os import environ +from pathlib import Path +from unittest import SkipTest + +import joblib +import numpy as np +import pytest +from _pytest.doctest import DoctestItem +from threadpoolctl import threadpool_limits + +from sklearn import config_context, set_config +from sklearn._min_dependencies import PYTEST_MIN_VERSION +from sklearn.datasets import ( + fetch_20newsgroups, + fetch_20newsgroups_vectorized, + fetch_california_housing, + fetch_covtype, + fetch_kddcup99, + fetch_olivetti_faces, + fetch_rcv1, +) +from sklearn.tests import random_seed +from sklearn.utils import _IS_32BIT +from sklearn.utils.fixes import np_base_version, parse_version, sp_version + +if parse_version(pytest.__version__) < parse_version(PYTEST_MIN_VERSION): + raise ImportError( + f"Your version of pytest is too old. Got version {pytest.__version__}, you" + f" should have pytest >= {PYTEST_MIN_VERSION} installed." + ) + +scipy_datasets_require_network = sp_version >= parse_version("1.10") + + +def pytest_ignore_collect(path, config): + """ + Do not collect any tests in externals. This is more robust than using + --ignore because --ignore needs a path and it is not convenient to pass in + the externals path (very long install-dependent path in site-packages) when + using --pyargs + """ + return Path(path).match("*/sklearn/externals/*") + + +@pytest.fixture +def enable_slep006(): + """Enable SLEP006 for all tests.""" + with config_context(enable_metadata_routing=True): + yield + + +def raccoon_face_or_skip(): + # SciPy >= 1.10 requires network to access to get data + if scipy_datasets_require_network: + run_network_tests = environ.get("SKLEARN_SKIP_NETWORK_TESTS", "1") == "0" + if not run_network_tests: + raise SkipTest("test is enabled when SKLEARN_SKIP_NETWORK_TESTS=0") + + try: + import pooch # noqa + except ImportError: + raise SkipTest("test requires pooch to be installed") + + from scipy.datasets import face + else: + from scipy.misc import face + + return face(gray=True) + + +dataset_fetchers = { + "fetch_20newsgroups_fxt": fetch_20newsgroups, + "fetch_20newsgroups_vectorized_fxt": fetch_20newsgroups_vectorized, + "fetch_california_housing_fxt": fetch_california_housing, + "fetch_covtype_fxt": fetch_covtype, + "fetch_kddcup99_fxt": fetch_kddcup99, + "fetch_olivetti_faces_fxt": fetch_olivetti_faces, + "fetch_rcv1_fxt": fetch_rcv1, +} + +if scipy_datasets_require_network: + dataset_fetchers["raccoon_face_fxt"] = raccoon_face_or_skip + +_SKIP32_MARK = pytest.mark.skipif( + environ.get("SKLEARN_RUN_FLOAT32_TESTS", "0") != "1", + reason="Set SKLEARN_RUN_FLOAT32_TESTS=1 to run float32 dtype tests", +) + + +# Global fixtures +@pytest.fixture(params=[pytest.param(np.float32, marks=_SKIP32_MARK), np.float64]) +def global_dtype(request): + yield request.param + + +def _fetch_fixture(f): + """Fetch dataset (download if missing and requested by environment).""" + download_if_missing = environ.get("SKLEARN_SKIP_NETWORK_TESTS", "1") == "0" + + @wraps(f) + def wrapped(*args, **kwargs): + kwargs["download_if_missing"] = download_if_missing + try: + return f(*args, **kwargs) + except OSError as e: + if str(e) != "Data not found and `download_if_missing` is False": + raise + pytest.skip("test is enabled when SKLEARN_SKIP_NETWORK_TESTS=0") + + return pytest.fixture(lambda: wrapped) + + +# Adds fixtures for fetching data +fetch_20newsgroups_fxt = _fetch_fixture(fetch_20newsgroups) +fetch_20newsgroups_vectorized_fxt = _fetch_fixture(fetch_20newsgroups_vectorized) +fetch_california_housing_fxt = _fetch_fixture(fetch_california_housing) +fetch_covtype_fxt = _fetch_fixture(fetch_covtype) +fetch_kddcup99_fxt = _fetch_fixture(fetch_kddcup99) +fetch_olivetti_faces_fxt = _fetch_fixture(fetch_olivetti_faces) +fetch_rcv1_fxt = _fetch_fixture(fetch_rcv1) +raccoon_face_fxt = pytest.fixture(raccoon_face_or_skip) + + +def pytest_collection_modifyitems(config, items): + """Called after collect is completed. + + Parameters + ---------- + config : pytest config + items : list of collected items + """ + run_network_tests = environ.get("SKLEARN_SKIP_NETWORK_TESTS", "1") == "0" + skip_network = pytest.mark.skip( + reason="test is enabled when SKLEARN_SKIP_NETWORK_TESTS=0" + ) + + # download datasets during collection to avoid thread unsafe behavior + # when running pytest in parallel with pytest-xdist + dataset_features_set = set(dataset_fetchers) + datasets_to_download = set() + + for item in items: + if not hasattr(item, "fixturenames"): + continue + item_fixtures = set(item.fixturenames) + dataset_to_fetch = item_fixtures & dataset_features_set + if not dataset_to_fetch: + continue + + if run_network_tests: + datasets_to_download |= dataset_to_fetch + else: + # network tests are skipped + item.add_marker(skip_network) + + # Only download datasets on the first worker spawned by pytest-xdist + # to avoid thread unsafe behavior. If pytest-xdist is not used, we still + # download before tests run. + worker_id = environ.get("PYTEST_XDIST_WORKER", "gw0") + if worker_id == "gw0" and run_network_tests: + for name in datasets_to_download: + with suppress(SkipTest): + dataset_fetchers[name]() + + for item in items: + # Known failure on with GradientBoostingClassifier on ARM64 + if ( + item.name.endswith("GradientBoostingClassifier") + and platform.machine() == "aarch64" + ): + marker = pytest.mark.xfail( + reason=( + "know failure. See " + "https://github.com/scikit-learn/scikit-learn/issues/17797" # noqa + ) + ) + item.add_marker(marker) + + skip_doctests = False + try: + import matplotlib # noqa + except ImportError: + skip_doctests = True + reason = "matplotlib is required to run the doctests" + + if _IS_32BIT: + reason = "doctest are only run when the default numpy int is 64 bits." + skip_doctests = True + elif sys.platform.startswith("win32"): + reason = ( + "doctests are not run for Windows because numpy arrays " + "repr is inconsistent across platforms." + ) + skip_doctests = True + + if np_base_version >= parse_version("2"): + reason = "Due to NEP 51 numpy scalar repr has changed in numpy 2" + skip_doctests = True + + # Normally doctest has the entire module's scope. Here we set globs to an empty dict + # to remove the module's scope: + # https://docs.python.org/3/library/doctest.html#what-s-the-execution-context + for item in items: + if isinstance(item, DoctestItem): + item.dtest.globs = {} + + if skip_doctests: + skip_marker = pytest.mark.skip(reason=reason) + + for item in items: + if isinstance(item, DoctestItem): + # work-around an internal error with pytest if adding a skip + # mark to a doctest in a contextmanager, see + # https://github.com/pytest-dev/pytest/issues/8796 for more + # details. + if item.name != "sklearn._config.config_context": + item.add_marker(skip_marker) + try: + import PIL # noqa + + pillow_installed = True + except ImportError: + pillow_installed = False + + if not pillow_installed: + skip_marker = pytest.mark.skip(reason="pillow (or PIL) not installed!") + for item in items: + if item.name in [ + "sklearn.feature_extraction.image.PatchExtractor", + "sklearn.feature_extraction.image.extract_patches_2d", + ]: + item.add_marker(skip_marker) + + +@pytest.fixture(scope="function") +def pyplot(): + """Setup and teardown fixture for matplotlib. + + This fixture checks if we can import matplotlib. If not, the tests will be + skipped. Otherwise, we close the figures before and after running the + functions. + + Returns + ------- + pyplot : module + The ``matplotlib.pyplot`` module. + """ + pyplot = pytest.importorskip("matplotlib.pyplot") + pyplot.close("all") + yield pyplot + pyplot.close("all") + + +def pytest_configure(config): + # Use matplotlib agg backend during the tests including doctests + try: + import matplotlib + + matplotlib.use("agg") + except ImportError: + pass + + allowed_parallelism = joblib.cpu_count(only_physical_cores=True) + xdist_worker_count = environ.get("PYTEST_XDIST_WORKER_COUNT") + if xdist_worker_count is not None: + # Set the number of OpenMP and BLAS threads based on the number of workers + # xdist is using to prevent oversubscription. + allowed_parallelism = max(allowed_parallelism // int(xdist_worker_count), 1) + threadpool_limits(allowed_parallelism) + + # Register global_random_seed plugin if it is not already registered + if not config.pluginmanager.hasplugin("sklearn.tests.random_seed"): + config.pluginmanager.register(random_seed) + + +@pytest.fixture +def hide_available_pandas(monkeypatch): + """Pretend pandas was not installed.""" + import_orig = builtins.__import__ + + def mocked_import(name, *args, **kwargs): + if name == "pandas": + raise ImportError() + return import_orig(name, *args, **kwargs) + + monkeypatch.setattr(builtins, "__import__", mocked_import) + + +@pytest.fixture +def print_changed_only_false(): + """Set `print_changed_only` to False for the duration of the test.""" + set_config(print_changed_only=False) + yield + set_config(print_changed_only=True) # reset to default diff --git a/sklearn/conftest.py b/sklearn/conftest.py deleted file mode 100644 index d15f9fe2ec142..0000000000000 --- a/sklearn/conftest.py +++ /dev/null @@ -1,280 +0,0 @@ -import builtins -import platform -import sys -from contextlib import suppress -from functools import wraps -from os import environ -from unittest import SkipTest - -import joblib -import numpy as np -import pytest -from _pytest.doctest import DoctestItem -from threadpoolctl import threadpool_limits - -from sklearn import config_context -from sklearn._min_dependencies import PYTEST_MIN_VERSION -from sklearn.datasets import ( - fetch_20newsgroups, - fetch_20newsgroups_vectorized, - fetch_california_housing, - fetch_covtype, - fetch_kddcup99, - fetch_olivetti_faces, - fetch_rcv1, -) -from sklearn.tests import random_seed -from sklearn.utils import _IS_32BIT -from sklearn.utils.fixes import np_base_version, parse_version, sp_version - -if parse_version(pytest.__version__) < parse_version(PYTEST_MIN_VERSION): - raise ImportError( - f"Your version of pytest is too old. Got version {pytest.__version__}, you" - f" should have pytest >= {PYTEST_MIN_VERSION} installed." - ) - -scipy_datasets_require_network = sp_version >= parse_version("1.10") - - -@pytest.fixture -def enable_slep006(): - """Enable SLEP006 for all tests.""" - with config_context(enable_metadata_routing=True): - yield - - -def raccoon_face_or_skip(): - # SciPy >= 1.10 requires network to access to get data - if scipy_datasets_require_network: - run_network_tests = environ.get("SKLEARN_SKIP_NETWORK_TESTS", "1") == "0" - if not run_network_tests: - raise SkipTest("test is enabled when SKLEARN_SKIP_NETWORK_TESTS=0") - - try: - import pooch # noqa - except ImportError: - raise SkipTest("test requires pooch to be installed") - - from scipy.datasets import face - else: - from scipy.misc import face - - return face(gray=True) - - -dataset_fetchers = { - "fetch_20newsgroups_fxt": fetch_20newsgroups, - "fetch_20newsgroups_vectorized_fxt": fetch_20newsgroups_vectorized, - "fetch_california_housing_fxt": fetch_california_housing, - "fetch_covtype_fxt": fetch_covtype, - "fetch_kddcup99_fxt": fetch_kddcup99, - "fetch_olivetti_faces_fxt": fetch_olivetti_faces, - "fetch_rcv1_fxt": fetch_rcv1, -} - -if scipy_datasets_require_network: - dataset_fetchers["raccoon_face_fxt"] = raccoon_face_or_skip - -_SKIP32_MARK = pytest.mark.skipif( - environ.get("SKLEARN_RUN_FLOAT32_TESTS", "0") != "1", - reason="Set SKLEARN_RUN_FLOAT32_TESTS=1 to run float32 dtype tests", -) - - -# Global fixtures -@pytest.fixture(params=[pytest.param(np.float32, marks=_SKIP32_MARK), np.float64]) -def global_dtype(request): - yield request.param - - -def _fetch_fixture(f): - """Fetch dataset (download if missing and requested by environment).""" - download_if_missing = environ.get("SKLEARN_SKIP_NETWORK_TESTS", "1") == "0" - - @wraps(f) - def wrapped(*args, **kwargs): - kwargs["download_if_missing"] = download_if_missing - try: - return f(*args, **kwargs) - except OSError as e: - if str(e) != "Data not found and `download_if_missing` is False": - raise - pytest.skip("test is enabled when SKLEARN_SKIP_NETWORK_TESTS=0") - - return pytest.fixture(lambda: wrapped) - - -# Adds fixtures for fetching data -fetch_20newsgroups_fxt = _fetch_fixture(fetch_20newsgroups) -fetch_20newsgroups_vectorized_fxt = _fetch_fixture(fetch_20newsgroups_vectorized) -fetch_california_housing_fxt = _fetch_fixture(fetch_california_housing) -fetch_covtype_fxt = _fetch_fixture(fetch_covtype) -fetch_kddcup99_fxt = _fetch_fixture(fetch_kddcup99) -fetch_olivetti_faces_fxt = _fetch_fixture(fetch_olivetti_faces) -fetch_rcv1_fxt = _fetch_fixture(fetch_rcv1) -raccoon_face_fxt = pytest.fixture(raccoon_face_or_skip) - - -def pytest_collection_modifyitems(config, items): - """Called after collect is completed. - - Parameters - ---------- - config : pytest config - items : list of collected items - """ - run_network_tests = environ.get("SKLEARN_SKIP_NETWORK_TESTS", "1") == "0" - skip_network = pytest.mark.skip( - reason="test is enabled when SKLEARN_SKIP_NETWORK_TESTS=0" - ) - - # download datasets during collection to avoid thread unsafe behavior - # when running pytest in parallel with pytest-xdist - dataset_features_set = set(dataset_fetchers) - datasets_to_download = set() - - for item in items: - if not hasattr(item, "fixturenames"): - continue - item_fixtures = set(item.fixturenames) - dataset_to_fetch = item_fixtures & dataset_features_set - if not dataset_to_fetch: - continue - - if run_network_tests: - datasets_to_download |= dataset_to_fetch - else: - # network tests are skipped - item.add_marker(skip_network) - - # Only download datasets on the first worker spawned by pytest-xdist - # to avoid thread unsafe behavior. If pytest-xdist is not used, we still - # download before tests run. - worker_id = environ.get("PYTEST_XDIST_WORKER", "gw0") - if worker_id == "gw0" and run_network_tests: - for name in datasets_to_download: - with suppress(SkipTest): - dataset_fetchers[name]() - - for item in items: - # Known failure on with GradientBoostingClassifier on ARM64 - if ( - item.name.endswith("GradientBoostingClassifier") - and platform.machine() == "aarch64" - ): - marker = pytest.mark.xfail( - reason=( - "know failure. See " - "https://github.com/scikit-learn/scikit-learn/issues/17797" # noqa - ) - ) - item.add_marker(marker) - - skip_doctests = False - try: - import matplotlib # noqa - except ImportError: - skip_doctests = True - reason = "matplotlib is required to run the doctests" - - if _IS_32BIT: - reason = "doctest are only run when the default numpy int is 64 bits." - skip_doctests = True - elif sys.platform.startswith("win32"): - reason = ( - "doctests are not run for Windows because numpy arrays " - "repr is inconsistent across platforms." - ) - skip_doctests = True - - if np_base_version >= parse_version("2"): - reason = "Due to NEP 51 numpy scalar repr has changed in numpy 2" - skip_doctests = True - - # Normally doctest has the entire module's scope. Here we set globs to an empty dict - # to remove the module's scope: - # https://docs.python.org/3/library/doctest.html#what-s-the-execution-context - for item in items: - if isinstance(item, DoctestItem): - item.dtest.globs = {} - - if skip_doctests: - skip_marker = pytest.mark.skip(reason=reason) - - for item in items: - if isinstance(item, DoctestItem): - # work-around an internal error with pytest if adding a skip - # mark to a doctest in a contextmanager, see - # https://github.com/pytest-dev/pytest/issues/8796 for more - # details. - if item.name != "sklearn._config.config_context": - item.add_marker(skip_marker) - try: - import PIL # noqa - - pillow_installed = True - except ImportError: - pillow_installed = False - - if not pillow_installed: - skip_marker = pytest.mark.skip(reason="pillow (or PIL) not installed!") - for item in items: - if item.name in [ - "sklearn.feature_extraction.image.PatchExtractor", - "sklearn.feature_extraction.image.extract_patches_2d", - ]: - item.add_marker(skip_marker) - - -@pytest.fixture(scope="function") -def pyplot(): - """Setup and teardown fixture for matplotlib. - - This fixture checks if we can import matplotlib. If not, the tests will be - skipped. Otherwise, we close the figures before and after running the - functions. - - Returns - ------- - pyplot : module - The ``matplotlib.pyplot`` module. - """ - pyplot = pytest.importorskip("matplotlib.pyplot") - pyplot.close("all") - yield pyplot - pyplot.close("all") - - -def pytest_configure(config): - # Use matplotlib agg backend during the tests including doctests - try: - import matplotlib - - matplotlib.use("agg") - except ImportError: - pass - - allowed_parallelism = joblib.cpu_count(only_physical_cores=True) - xdist_worker_count = environ.get("PYTEST_XDIST_WORKER_COUNT") - if xdist_worker_count is not None: - # Set the number of OpenMP and BLAS threads based on the number of workers - # xdist is using to prevent oversubscription. - allowed_parallelism = max(allowed_parallelism // int(xdist_worker_count), 1) - threadpool_limits(allowed_parallelism) - - # Register global_random_seed plugin if it is not already registered - if not config.pluginmanager.hasplugin("sklearn.tests.random_seed"): - config.pluginmanager.register(random_seed) - - -@pytest.fixture -def hide_available_pandas(monkeypatch): - """Pretend pandas was not installed.""" - import_orig = builtins.__import__ - - def mocked_import(name, *args, **kwargs): - if name == "pandas": - raise ImportError() - return import_orig(name, *args, **kwargs) - - monkeypatch.setattr(builtins, "__import__", mocked_import) diff --git a/sklearn/externals/conftest.py b/sklearn/externals/conftest.py deleted file mode 100644 index c617107866b92..0000000000000 --- a/sklearn/externals/conftest.py +++ /dev/null @@ -1,7 +0,0 @@ -# Do not collect any tests in externals. This is more robust than using -# --ignore because --ignore needs a path and it is not convenient to pass in -# the externals path (very long install-dependent path in site-packages) when -# using --pyargs -def pytest_ignore_collect(path, config): - return True - diff --git a/sklearn/utils/tests/conftest.py b/sklearn/utils/tests/conftest.py deleted file mode 100644 index 148225a481f69..0000000000000 --- a/sklearn/utils/tests/conftest.py +++ /dev/null @@ -1,10 +0,0 @@ -import pytest - -import sklearn - - -@pytest.fixture -def print_changed_only_false(): - sklearn.set_config(print_changed_only=False) - yield - sklearn.set_config(print_changed_only=True) # reset to default From 7e09b8b43b3bf568559267a4d1b463b70979ca99 Mon Sep 17 00:00:00 2001 From: Yao Xiao <108576690+Charlie-XIAO@users.noreply.github.com> Date: Wed, 13 Dec 2023 17:13:40 +0800 Subject: [PATCH 2/3] revert --- conftest.py | 305 +--------------------------------- sklearn/conftest.py | 288 ++++++++++++++++++++++++++++++++ sklearn/externals/conftest.py | 6 + 3 files changed, 300 insertions(+), 299 deletions(-) create mode 100644 sklearn/conftest.py create mode 100644 sklearn/externals/conftest.py diff --git a/conftest.py b/conftest.py index 0a05372a23a9b..e4e478d2d72d7 100644 --- a/conftest.py +++ b/conftest.py @@ -1,299 +1,6 @@ -import builtins -import platform -import sys -from contextlib import suppress -from functools import wraps -from os import environ -from pathlib import Path -from unittest import SkipTest - -import joblib -import numpy as np -import pytest -from _pytest.doctest import DoctestItem -from threadpoolctl import threadpool_limits - -from sklearn import config_context, set_config -from sklearn._min_dependencies import PYTEST_MIN_VERSION -from sklearn.datasets import ( - fetch_20newsgroups, - fetch_20newsgroups_vectorized, - fetch_california_housing, - fetch_covtype, - fetch_kddcup99, - fetch_olivetti_faces, - fetch_rcv1, -) -from sklearn.tests import random_seed -from sklearn.utils import _IS_32BIT -from sklearn.utils.fixes import np_base_version, parse_version, sp_version - -if parse_version(pytest.__version__) < parse_version(PYTEST_MIN_VERSION): - raise ImportError( - f"Your version of pytest is too old. Got version {pytest.__version__}, you" - f" should have pytest >= {PYTEST_MIN_VERSION} installed." - ) - -scipy_datasets_require_network = sp_version >= parse_version("1.10") - - -def pytest_ignore_collect(path, config): - """ - Do not collect any tests in externals. This is more robust than using - --ignore because --ignore needs a path and it is not convenient to pass in - the externals path (very long install-dependent path in site-packages) when - using --pyargs - """ - return Path(path).match("*/sklearn/externals/*") - - -@pytest.fixture -def enable_slep006(): - """Enable SLEP006 for all tests.""" - with config_context(enable_metadata_routing=True): - yield - - -def raccoon_face_or_skip(): - # SciPy >= 1.10 requires network to access to get data - if scipy_datasets_require_network: - run_network_tests = environ.get("SKLEARN_SKIP_NETWORK_TESTS", "1") == "0" - if not run_network_tests: - raise SkipTest("test is enabled when SKLEARN_SKIP_NETWORK_TESTS=0") - - try: - import pooch # noqa - except ImportError: - raise SkipTest("test requires pooch to be installed") - - from scipy.datasets import face - else: - from scipy.misc import face - - return face(gray=True) - - -dataset_fetchers = { - "fetch_20newsgroups_fxt": fetch_20newsgroups, - "fetch_20newsgroups_vectorized_fxt": fetch_20newsgroups_vectorized, - "fetch_california_housing_fxt": fetch_california_housing, - "fetch_covtype_fxt": fetch_covtype, - "fetch_kddcup99_fxt": fetch_kddcup99, - "fetch_olivetti_faces_fxt": fetch_olivetti_faces, - "fetch_rcv1_fxt": fetch_rcv1, -} - -if scipy_datasets_require_network: - dataset_fetchers["raccoon_face_fxt"] = raccoon_face_or_skip - -_SKIP32_MARK = pytest.mark.skipif( - environ.get("SKLEARN_RUN_FLOAT32_TESTS", "0") != "1", - reason="Set SKLEARN_RUN_FLOAT32_TESTS=1 to run float32 dtype tests", -) - - -# Global fixtures -@pytest.fixture(params=[pytest.param(np.float32, marks=_SKIP32_MARK), np.float64]) -def global_dtype(request): - yield request.param - - -def _fetch_fixture(f): - """Fetch dataset (download if missing and requested by environment).""" - download_if_missing = environ.get("SKLEARN_SKIP_NETWORK_TESTS", "1") == "0" - - @wraps(f) - def wrapped(*args, **kwargs): - kwargs["download_if_missing"] = download_if_missing - try: - return f(*args, **kwargs) - except OSError as e: - if str(e) != "Data not found and `download_if_missing` is False": - raise - pytest.skip("test is enabled when SKLEARN_SKIP_NETWORK_TESTS=0") - - return pytest.fixture(lambda: wrapped) - - -# Adds fixtures for fetching data -fetch_20newsgroups_fxt = _fetch_fixture(fetch_20newsgroups) -fetch_20newsgroups_vectorized_fxt = _fetch_fixture(fetch_20newsgroups_vectorized) -fetch_california_housing_fxt = _fetch_fixture(fetch_california_housing) -fetch_covtype_fxt = _fetch_fixture(fetch_covtype) -fetch_kddcup99_fxt = _fetch_fixture(fetch_kddcup99) -fetch_olivetti_faces_fxt = _fetch_fixture(fetch_olivetti_faces) -fetch_rcv1_fxt = _fetch_fixture(fetch_rcv1) -raccoon_face_fxt = pytest.fixture(raccoon_face_or_skip) - - -def pytest_collection_modifyitems(config, items): - """Called after collect is completed. - - Parameters - ---------- - config : pytest config - items : list of collected items - """ - run_network_tests = environ.get("SKLEARN_SKIP_NETWORK_TESTS", "1") == "0" - skip_network = pytest.mark.skip( - reason="test is enabled when SKLEARN_SKIP_NETWORK_TESTS=0" - ) - - # download datasets during collection to avoid thread unsafe behavior - # when running pytest in parallel with pytest-xdist - dataset_features_set = set(dataset_fetchers) - datasets_to_download = set() - - for item in items: - if not hasattr(item, "fixturenames"): - continue - item_fixtures = set(item.fixturenames) - dataset_to_fetch = item_fixtures & dataset_features_set - if not dataset_to_fetch: - continue - - if run_network_tests: - datasets_to_download |= dataset_to_fetch - else: - # network tests are skipped - item.add_marker(skip_network) - - # Only download datasets on the first worker spawned by pytest-xdist - # to avoid thread unsafe behavior. If pytest-xdist is not used, we still - # download before tests run. - worker_id = environ.get("PYTEST_XDIST_WORKER", "gw0") - if worker_id == "gw0" and run_network_tests: - for name in datasets_to_download: - with suppress(SkipTest): - dataset_fetchers[name]() - - for item in items: - # Known failure on with GradientBoostingClassifier on ARM64 - if ( - item.name.endswith("GradientBoostingClassifier") - and platform.machine() == "aarch64" - ): - marker = pytest.mark.xfail( - reason=( - "know failure. See " - "https://github.com/scikit-learn/scikit-learn/issues/17797" # noqa - ) - ) - item.add_marker(marker) - - skip_doctests = False - try: - import matplotlib # noqa - except ImportError: - skip_doctests = True - reason = "matplotlib is required to run the doctests" - - if _IS_32BIT: - reason = "doctest are only run when the default numpy int is 64 bits." - skip_doctests = True - elif sys.platform.startswith("win32"): - reason = ( - "doctests are not run for Windows because numpy arrays " - "repr is inconsistent across platforms." - ) - skip_doctests = True - - if np_base_version >= parse_version("2"): - reason = "Due to NEP 51 numpy scalar repr has changed in numpy 2" - skip_doctests = True - - # Normally doctest has the entire module's scope. Here we set globs to an empty dict - # to remove the module's scope: - # https://docs.python.org/3/library/doctest.html#what-s-the-execution-context - for item in items: - if isinstance(item, DoctestItem): - item.dtest.globs = {} - - if skip_doctests: - skip_marker = pytest.mark.skip(reason=reason) - - for item in items: - if isinstance(item, DoctestItem): - # work-around an internal error with pytest if adding a skip - # mark to a doctest in a contextmanager, see - # https://github.com/pytest-dev/pytest/issues/8796 for more - # details. - if item.name != "sklearn._config.config_context": - item.add_marker(skip_marker) - try: - import PIL # noqa - - pillow_installed = True - except ImportError: - pillow_installed = False - - if not pillow_installed: - skip_marker = pytest.mark.skip(reason="pillow (or PIL) not installed!") - for item in items: - if item.name in [ - "sklearn.feature_extraction.image.PatchExtractor", - "sklearn.feature_extraction.image.extract_patches_2d", - ]: - item.add_marker(skip_marker) - - -@pytest.fixture(scope="function") -def pyplot(): - """Setup and teardown fixture for matplotlib. - - This fixture checks if we can import matplotlib. If not, the tests will be - skipped. Otherwise, we close the figures before and after running the - functions. - - Returns - ------- - pyplot : module - The ``matplotlib.pyplot`` module. - """ - pyplot = pytest.importorskip("matplotlib.pyplot") - pyplot.close("all") - yield pyplot - pyplot.close("all") - - -def pytest_configure(config): - # Use matplotlib agg backend during the tests including doctests - try: - import matplotlib - - matplotlib.use("agg") - except ImportError: - pass - - allowed_parallelism = joblib.cpu_count(only_physical_cores=True) - xdist_worker_count = environ.get("PYTEST_XDIST_WORKER_COUNT") - if xdist_worker_count is not None: - # Set the number of OpenMP and BLAS threads based on the number of workers - # xdist is using to prevent oversubscription. - allowed_parallelism = max(allowed_parallelism // int(xdist_worker_count), 1) - threadpool_limits(allowed_parallelism) - - # Register global_random_seed plugin if it is not already registered - if not config.pluginmanager.hasplugin("sklearn.tests.random_seed"): - config.pluginmanager.register(random_seed) - - -@pytest.fixture -def hide_available_pandas(monkeypatch): - """Pretend pandas was not installed.""" - import_orig = builtins.__import__ - - def mocked_import(name, *args, **kwargs): - if name == "pandas": - raise ImportError() - return import_orig(name, *args, **kwargs) - - monkeypatch.setattr(builtins, "__import__", mocked_import) - - -@pytest.fixture -def print_changed_only_false(): - """Set `print_changed_only` to False for the duration of the test.""" - set_config(print_changed_only=False) - yield - set_config(print_changed_only=True) # reset to default +# Even if empty this file is useful so that when running from the root folder +# ./sklearn is added to sys.path by pytest. See +# https://docs.pytest.org/en/latest/explanation/pythonpath.html for more +# details. For example, this allows to build extensions in place and run pytest +# doc/modules/clustering.rst and use sklearn from the local folder rather than +# the one from site-packages. diff --git a/sklearn/conftest.py b/sklearn/conftest.py new file mode 100644 index 0000000000000..d2f44f6912b62 --- /dev/null +++ b/sklearn/conftest.py @@ -0,0 +1,288 @@ +import builtins +import platform +import sys +from contextlib import suppress +from functools import wraps +from os import environ +from unittest import SkipTest + +import joblib +import numpy as np +import pytest +from _pytest.doctest import DoctestItem +from threadpoolctl import threadpool_limits + +from sklearn import config_context, set_config +from sklearn._min_dependencies import PYTEST_MIN_VERSION +from sklearn.datasets import ( + fetch_20newsgroups, + fetch_20newsgroups_vectorized, + fetch_california_housing, + fetch_covtype, + fetch_kddcup99, + fetch_olivetti_faces, + fetch_rcv1, +) +from sklearn.tests import random_seed +from sklearn.utils import _IS_32BIT +from sklearn.utils.fixes import np_base_version, parse_version, sp_version + +if parse_version(pytest.__version__) < parse_version(PYTEST_MIN_VERSION): + raise ImportError( + f"Your version of pytest is too old. Got version {pytest.__version__}, you" + f" should have pytest >= {PYTEST_MIN_VERSION} installed." + ) + +scipy_datasets_require_network = sp_version >= parse_version("1.10") + + +@pytest.fixture +def enable_slep006(): + """Enable SLEP006 for all tests.""" + with config_context(enable_metadata_routing=True): + yield + + +def raccoon_face_or_skip(): + # SciPy >= 1.10 requires network to access to get data + if scipy_datasets_require_network: + run_network_tests = environ.get("SKLEARN_SKIP_NETWORK_TESTS", "1") == "0" + if not run_network_tests: + raise SkipTest("test is enabled when SKLEARN_SKIP_NETWORK_TESTS=0") + + try: + import pooch # noqa + except ImportError: + raise SkipTest("test requires pooch to be installed") + + from scipy.datasets import face + else: + from scipy.misc import face + + return face(gray=True) + + +dataset_fetchers = { + "fetch_20newsgroups_fxt": fetch_20newsgroups, + "fetch_20newsgroups_vectorized_fxt": fetch_20newsgroups_vectorized, + "fetch_california_housing_fxt": fetch_california_housing, + "fetch_covtype_fxt": fetch_covtype, + "fetch_kddcup99_fxt": fetch_kddcup99, + "fetch_olivetti_faces_fxt": fetch_olivetti_faces, + "fetch_rcv1_fxt": fetch_rcv1, +} + +if scipy_datasets_require_network: + dataset_fetchers["raccoon_face_fxt"] = raccoon_face_or_skip + +_SKIP32_MARK = pytest.mark.skipif( + environ.get("SKLEARN_RUN_FLOAT32_TESTS", "0") != "1", + reason="Set SKLEARN_RUN_FLOAT32_TESTS=1 to run float32 dtype tests", +) + + +# Global fixtures +@pytest.fixture(params=[pytest.param(np.float32, marks=_SKIP32_MARK), np.float64]) +def global_dtype(request): + yield request.param + + +def _fetch_fixture(f): + """Fetch dataset (download if missing and requested by environment).""" + download_if_missing = environ.get("SKLEARN_SKIP_NETWORK_TESTS", "1") == "0" + + @wraps(f) + def wrapped(*args, **kwargs): + kwargs["download_if_missing"] = download_if_missing + try: + return f(*args, **kwargs) + except OSError as e: + if str(e) != "Data not found and `download_if_missing` is False": + raise + pytest.skip("test is enabled when SKLEARN_SKIP_NETWORK_TESTS=0") + + return pytest.fixture(lambda: wrapped) + + +# Adds fixtures for fetching data +fetch_20newsgroups_fxt = _fetch_fixture(fetch_20newsgroups) +fetch_20newsgroups_vectorized_fxt = _fetch_fixture(fetch_20newsgroups_vectorized) +fetch_california_housing_fxt = _fetch_fixture(fetch_california_housing) +fetch_covtype_fxt = _fetch_fixture(fetch_covtype) +fetch_kddcup99_fxt = _fetch_fixture(fetch_kddcup99) +fetch_olivetti_faces_fxt = _fetch_fixture(fetch_olivetti_faces) +fetch_rcv1_fxt = _fetch_fixture(fetch_rcv1) +raccoon_face_fxt = pytest.fixture(raccoon_face_or_skip) + + +def pytest_collection_modifyitems(config, items): + """Called after collect is completed. + + Parameters + ---------- + config : pytest config + items : list of collected items + """ + run_network_tests = environ.get("SKLEARN_SKIP_NETWORK_TESTS", "1") == "0" + skip_network = pytest.mark.skip( + reason="test is enabled when SKLEARN_SKIP_NETWORK_TESTS=0" + ) + + # download datasets during collection to avoid thread unsafe behavior + # when running pytest in parallel with pytest-xdist + dataset_features_set = set(dataset_fetchers) + datasets_to_download = set() + + for item in items: + if not hasattr(item, "fixturenames"): + continue + item_fixtures = set(item.fixturenames) + dataset_to_fetch = item_fixtures & dataset_features_set + if not dataset_to_fetch: + continue + + if run_network_tests: + datasets_to_download |= dataset_to_fetch + else: + # network tests are skipped + item.add_marker(skip_network) + + # Only download datasets on the first worker spawned by pytest-xdist + # to avoid thread unsafe behavior. If pytest-xdist is not used, we still + # download before tests run. + worker_id = environ.get("PYTEST_XDIST_WORKER", "gw0") + if worker_id == "gw0" and run_network_tests: + for name in datasets_to_download: + with suppress(SkipTest): + dataset_fetchers[name]() + + for item in items: + # Known failure on with GradientBoostingClassifier on ARM64 + if ( + item.name.endswith("GradientBoostingClassifier") + and platform.machine() == "aarch64" + ): + marker = pytest.mark.xfail( + reason=( + "know failure. See " + "https://github.com/scikit-learn/scikit-learn/issues/17797" # noqa + ) + ) + item.add_marker(marker) + + skip_doctests = False + try: + import matplotlib # noqa + except ImportError: + skip_doctests = True + reason = "matplotlib is required to run the doctests" + + if _IS_32BIT: + reason = "doctest are only run when the default numpy int is 64 bits." + skip_doctests = True + elif sys.platform.startswith("win32"): + reason = ( + "doctests are not run for Windows because numpy arrays " + "repr is inconsistent across platforms." + ) + skip_doctests = True + + if np_base_version >= parse_version("2"): + reason = "Due to NEP 51 numpy scalar repr has changed in numpy 2" + skip_doctests = True + + # Normally doctest has the entire module's scope. Here we set globs to an empty dict + # to remove the module's scope: + # https://docs.python.org/3/library/doctest.html#what-s-the-execution-context + for item in items: + if isinstance(item, DoctestItem): + item.dtest.globs = {} + + if skip_doctests: + skip_marker = pytest.mark.skip(reason=reason) + + for item in items: + if isinstance(item, DoctestItem): + # work-around an internal error with pytest if adding a skip + # mark to a doctest in a contextmanager, see + # https://github.com/pytest-dev/pytest/issues/8796 for more + # details. + if item.name != "sklearn._config.config_context": + item.add_marker(skip_marker) + try: + import PIL # noqa + + pillow_installed = True + except ImportError: + pillow_installed = False + + if not pillow_installed: + skip_marker = pytest.mark.skip(reason="pillow (or PIL) not installed!") + for item in items: + if item.name in [ + "sklearn.feature_extraction.image.PatchExtractor", + "sklearn.feature_extraction.image.extract_patches_2d", + ]: + item.add_marker(skip_marker) + + +@pytest.fixture(scope="function") +def pyplot(): + """Setup and teardown fixture for matplotlib. + + This fixture checks if we can import matplotlib. If not, the tests will be + skipped. Otherwise, we close the figures before and after running the + functions. + + Returns + ------- + pyplot : module + The ``matplotlib.pyplot`` module. + """ + pyplot = pytest.importorskip("matplotlib.pyplot") + pyplot.close("all") + yield pyplot + pyplot.close("all") + + +def pytest_configure(config): + # Use matplotlib agg backend during the tests including doctests + try: + import matplotlib + + matplotlib.use("agg") + except ImportError: + pass + + allowed_parallelism = joblib.cpu_count(only_physical_cores=True) + xdist_worker_count = environ.get("PYTEST_XDIST_WORKER_COUNT") + if xdist_worker_count is not None: + # Set the number of OpenMP and BLAS threads based on the number of workers + # xdist is using to prevent oversubscription. + allowed_parallelism = max(allowed_parallelism // int(xdist_worker_count), 1) + threadpool_limits(allowed_parallelism) + + # Register global_random_seed plugin if it is not already registered + if not config.pluginmanager.hasplugin("sklearn.tests.random_seed"): + config.pluginmanager.register(random_seed) + + +@pytest.fixture +def hide_available_pandas(monkeypatch): + """Pretend pandas was not installed.""" + import_orig = builtins.__import__ + + def mocked_import(name, *args, **kwargs): + if name == "pandas": + raise ImportError() + return import_orig(name, *args, **kwargs) + + monkeypatch.setattr(builtins, "__import__", mocked_import) + + +@pytest.fixture +def print_changed_only_false(): + """Set `print_changed_only` to False for the duration of the test.""" + set_config(print_changed_only=False) + yield + set_config(print_changed_only=True) # reset to default diff --git a/sklearn/externals/conftest.py b/sklearn/externals/conftest.py new file mode 100644 index 0000000000000..7f7a4af349878 --- /dev/null +++ b/sklearn/externals/conftest.py @@ -0,0 +1,6 @@ +# Do not collect any tests in externals. This is more robust than using +# --ignore because --ignore needs a path and it is not convenient to pass in +# the externals path (very long install-dependent path in site-packages) when +# using --pyargs +def pytest_ignore_collect(path, config): + return True From 3fcfa6518d691dc56d7b961f408b9552ef77cd4d Mon Sep 17 00:00:00 2001 From: Yao Xiao <108576690+Charlie-XIAO@users.noreply.github.com> Date: Wed, 13 Dec 2023 17:21:57 +0800 Subject: [PATCH 3/3] remove root conftest --- conftest.py | 6 ------ 1 file changed, 6 deletions(-) delete mode 100644 conftest.py diff --git a/conftest.py b/conftest.py deleted file mode 100644 index e4e478d2d72d7..0000000000000 --- a/conftest.py +++ /dev/null @@ -1,6 +0,0 @@ -# Even if empty this file is useful so that when running from the root folder -# ./sklearn is added to sys.path by pytest. See -# https://docs.pytest.org/en/latest/explanation/pythonpath.html for more -# details. For example, this allows to build extensions in place and run pytest -# doc/modules/clustering.rst and use sklearn from the local folder rather than -# the one from site-packages.