From 3987a8b6bed890c177fa790e0b6bd353feb8d959 Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Mon, 28 Oct 2019 13:26:59 -0400 Subject: [PATCH 01/10] BUG Returns only public estimators --- sklearn/tests/test_common.py | 2 -- sklearn/utils/__init__.py | 33 +++++++++++++++++++-------------- 2 files changed, 19 insertions(+), 16 deletions(-) diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py index 5ff0efda1eb90..e6488196c9eb4 100644 --- a/sklearn/tests/test_common.py +++ b/sklearn/tests/test_common.py @@ -78,8 +78,6 @@ def _tested_estimators(): for name, Estimator in all_estimators(): if issubclass(Estimator, BiclusterMixin): continue - if name.startswith("_"): - continue try: estimator = _construct_instance(Estimator) except SkipTest: diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py index 7969a9ecd7fce..71ba33de12319 100644 --- a/sklearn/utils/__init__.py +++ b/sklearn/utils/__init__.py @@ -3,6 +3,7 @@ """ import pkgutil import inspect +from importlib import import_module from operator import itemgetter from collections.abc import Sequence from contextlib import contextmanager @@ -12,6 +13,7 @@ import platform import struct import timeit +from pathlib import Path import warnings import numpy as np @@ -1143,7 +1145,6 @@ def all_estimators(include_meta_estimators=None, and ``class`` is the actuall type of the class. """ # lazy import to avoid circular imports from sklearn.base - import sklearn from ._testing import ignore_warnings from ..base import (BaseEstimator, ClassifierMixin, RegressorMixin, TransformerMixin, ClusterMixin) @@ -1172,19 +1173,23 @@ def is_abstract(c): all_classes = [] # get parent folder - path = sklearn.__path__ - for importer, modname, ispkg in pkgutil.walk_packages( - path=path, prefix='sklearn.', onerror=lambda x: None): - if ".tests." in modname or "externals" in modname: - continue - if IS_PYPY and ('_svmlight_format' in modname or - 'feature_extraction._hashing' in modname): - continue - # Ignore deprecation warnings triggered at import time. - with ignore_warnings(category=DeprecationWarning): - module = __import__(modname, fromlist="dummy") - classes = inspect.getmembers(module, inspect.isclass) - all_classes.extend(classes) + modules_to_ignore = set(["tests", "externals"]) + root = str(Path(__file__).parent.parent) + # Ignore deprecation warnings triggered at import time and from walking + # packages + with ignore_warnings(category=DeprecationWarning): + for importer, modname, ispkg in pkgutil.walk_packages( + path=[root], prefix='sklearn.'): + mod_parts = modname.split(".") + if (modules_to_ignore & set(mod_parts) or + any(part.startswith("_") for part in mod_parts)): + continue + if IS_PYPY and ('_svmlight_format' in modname or + 'feature_extraction._hashing' in modname): + continue + module = import_module("..", modname) + classes = inspect.getmembers(module, inspect.isclass) + all_classes.extend(classes) all_classes = set(all_classes) From 5ed8d14d01193236a17761c1c368a165465ae0ba Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Mon, 28 Oct 2019 13:54:19 -0400 Subject: [PATCH 02/10] ENH Trigger pypy --- .circleci/config.yml | 30 +++++++++++++++--------------- sklearn/utils/__init__.py | 10 +++++----- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index de08f2d5622f5..7e81dc95d0e11 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -134,21 +134,21 @@ workflows: version: 2 build-doc-and-deploy: jobs: - - lint - - doc: - requires: - - lint - - doc-min-dependencies: - requires: - - lint - - pypy3: - filters: - branches: - only: - - 0.20.X - - deploy: - requires: - - doc + # - lint + # - doc: + # requires: + # - lint + # - doc-min-dependencies: + # requires: + # - lint + - pypy3 + # filters: + # branches: + # only: + # - 0.20.X + # - deploy: + # requires: + # - doc pypy: triggers: - schedule: diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py index 71ba33de12319..117b05a72f7ef 100644 --- a/sklearn/utils/__init__.py +++ b/sklearn/utils/__init__.py @@ -1173,7 +1173,7 @@ def is_abstract(c): all_classes = [] # get parent folder - modules_to_ignore = set(["tests", "externals"]) + modules_to_ignore = set(["tests", "externals", "setup"]) root = str(Path(__file__).parent.parent) # Ignore deprecation warnings triggered at import time and from walking # packages @@ -1181,11 +1181,11 @@ def is_abstract(c): for importer, modname, ispkg in pkgutil.walk_packages( path=[root], prefix='sklearn.'): mod_parts = modname.split(".") - if (modules_to_ignore & set(mod_parts) or - any(part.startswith("_") for part in mod_parts)): + if modules_to_ignore & set(mod_parts): continue - if IS_PYPY and ('_svmlight_format' in modname or - 'feature_extraction._hashing' in modname): + if IS_PYPY and ('svmlight_format' in modname or + 'feature_extraction._hashing' in modname or + 'feature_extraction.hashing' in modname): continue module = import_module("..", modname) classes = inspect.getmembers(module, inspect.isclass) From 5483f927fa52e3660e5116ee915592714d7cdc7d Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Mon, 28 Oct 2019 14:31:45 -0400 Subject: [PATCH 03/10] BUG Fixes all_estimators --- sklearn/utils/__init__.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py index 117b05a72f7ef..1de1d7e00ccf3 100644 --- a/sklearn/utils/__init__.py +++ b/sklearn/utils/__init__.py @@ -1187,8 +1187,11 @@ def is_abstract(c): 'feature_extraction._hashing' in modname or 'feature_extraction.hashing' in modname): continue - module = import_module("..", modname) + relative_module = modname.replace("sklearn.", "..") + module = import_module(relative_module, "sklearn.utils") classes = inspect.getmembers(module, inspect.isclass) + classes = [(name, est_cls) for name, est_cls in classes + if not name.startswith("_")] all_classes.extend(classes) all_classes = set(all_classes) From 96972dc8564bb2d9ffa31ca039b3c7294c42e2cf Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Mon, 28 Oct 2019 15:18:29 -0400 Subject: [PATCH 04/10] ENH Better handling on pypy --- sklearn/utils/__init__.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py index 1de1d7e00ccf3..3672038efabc1 100644 --- a/sklearn/utils/__init__.py +++ b/sklearn/utils/__init__.py @@ -1181,17 +1181,18 @@ def is_abstract(c): for importer, modname, ispkg in pkgutil.walk_packages( path=[root], prefix='sklearn.'): mod_parts = modname.split(".") - if modules_to_ignore & set(mod_parts): + if modules_to_ignore & set(mod_parts) or "._" in modname: continue - if IS_PYPY and ('svmlight_format' in modname or - 'feature_extraction._hashing' in modname or - 'feature_extraction.hashing' in modname): - continue - relative_module = modname.replace("sklearn.", "..") - module = import_module(relative_module, "sklearn.utils") + module = import_module(modname) classes = inspect.getmembers(module, inspect.isclass) classes = [(name, est_cls) for name, est_cls in classes if not name.startswith("_")] + + # remove FeatureHasher + if IS_PYPY and 'feature_extraction' in modname: + classes = [(name, est_cls) for name, est_cls in classes + if name == "FeatureHasher"] + all_classes.extend(classes) all_classes = set(all_classes) From 478a538fc555071a30957ef294ab3ad9e619e291 Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Thu, 31 Oct 2019 13:46:44 -0400 Subject: [PATCH 05/10] FIX Uses FutureWarning --- sklearn/utils/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py index 9cb5bdaae951f..837a6cd4fa7e7 100644 --- a/sklearn/utils/__init__.py +++ b/sklearn/utils/__init__.py @@ -1186,10 +1186,10 @@ def is_abstract(c): all_classes = [] # get parent folder modules_to_ignore = set(["tests", "externals", "setup"]) - root = str(Path(__file__).parent.parent) + root = Path(__file__).parent.parent # Ignore deprecation warnings triggered at import time and from walking # packages - with ignore_warnings(category=DeprecationWarning): + with ignore_warnings(category=FutureWarning): for importer, modname, ispkg in pkgutil.walk_packages( path=[root], prefix='sklearn.'): mod_parts = modname.split(".") From fc9acef3e766309493760f61017f00cf4da1950f Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Thu, 31 Oct 2019 13:57:12 -0400 Subject: [PATCH 06/10] FIX python 35 does not suport pathlib in walk_packages --- sklearn/utils/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py index 837a6cd4fa7e7..b56e8fb71b8a0 100644 --- a/sklearn/utils/__init__.py +++ b/sklearn/utils/__init__.py @@ -1186,7 +1186,7 @@ def is_abstract(c): all_classes = [] # get parent folder modules_to_ignore = set(["tests", "externals", "setup"]) - root = Path(__file__).parent.parent + root = str(Path(__file__).parent.parent) # Ignore deprecation warnings triggered at import time and from walking # packages with ignore_warnings(category=FutureWarning): From cc7bca50e44e9bf347127c6e2b56bb29fb02dff9 Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Fri, 1 Nov 2019 14:34:33 -0400 Subject: [PATCH 07/10] REV --- .circleci/config.yml | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 68ac47a1ea404..04c8979fa0d6a 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -131,21 +131,21 @@ workflows: version: 2 build-doc-and-deploy: jobs: - # - lint - # - doc: - # requires: - # - lint - # - doc-min-dependencies: - # requires: - # - lint - - pypy3 - # filters: - # branches: - # only: - # - 0.20.X - # - deploy: - # requires: - # - doc + - lint + - doc: + requires: + - lint + - doc-min-dependencies: + requires: + - lint + - pypy3: + filters: + branches: + only: + - 0.20.X + - deploy: + requires: + - doc pypy: triggers: - schedule: From efa253f0683560e5f62378ea84bcfc0d7d0163a0 Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Wed, 6 Nov 2019 17:00:39 -0500 Subject: [PATCH 08/10] CLN Address comments --- sklearn/utils/__init__.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py index b56e8fb71b8a0..a3ff2839a2951 100644 --- a/sklearn/utils/__init__.py +++ b/sklearn/utils/__init__.py @@ -1185,8 +1185,8 @@ def is_abstract(c): all_classes = [] # get parent folder - modules_to_ignore = set(["tests", "externals", "setup"]) - root = str(Path(__file__).parent.parent) + modules_to_ignore = {"tests", "externals", "setup"} + root = str(Path(__file__).parent.parent) # sklearn package # Ignore deprecation warnings triggered at import time and from walking # packages with ignore_warnings(category=FutureWarning): @@ -1200,7 +1200,8 @@ def is_abstract(c): classes = [(name, est_cls) for name, est_cls in classes if not name.startswith("_")] - # remove FeatureHasher + # TODO: Remove when FeatureHasher is implemented in PYPY + # Skips FeatureHasher for PYPY if IS_PYPY and 'feature_extraction' in modname: classes = [(name, est_cls) for name, est_cls in classes if name == "FeatureHasher"] From 57c9ecadec4a11ce8d3327fa6bdecc8d68995436 Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Wed, 6 Nov 2019 17:25:26 -0500 Subject: [PATCH 09/10] CLN Uses any --- sklearn/utils/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py index a3ff2839a2951..7c4e5913c1baf 100644 --- a/sklearn/utils/__init__.py +++ b/sklearn/utils/__init__.py @@ -1193,7 +1193,8 @@ def is_abstract(c): for importer, modname, ispkg in pkgutil.walk_packages( path=[root], prefix='sklearn.'): mod_parts = modname.split(".") - if modules_to_ignore & set(mod_parts) or "._" in modname: + if (any(part in modules_to_ignore for part in mod_parts) + or '._' in modname): continue module = import_module(modname) classes = inspect.getmembers(module, inspect.isclass) From 0838ae7a15158dadd08ddc048210c06586691d06 Mon Sep 17 00:00:00 2001 From: Thomas J Fan Date: Wed, 20 Nov 2019 15:42:36 -0500 Subject: [PATCH 10/10] ENH Adds test to test_estimator_checksD --- sklearn/utils/__init__.py | 3 +-- sklearn/utils/tests/test_estimator_checks.py | 9 +++++++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py index 7c4e5913c1baf..c20ea5ab11d31 100644 --- a/sklearn/utils/__init__.py +++ b/sklearn/utils/__init__.py @@ -1184,8 +1184,7 @@ def is_abstract(c): DeprecationWarning) all_classes = [] - # get parent folder - modules_to_ignore = {"tests", "externals", "setup"} + modules_to_ignore = {"tests", "externals", "setup", "conftest"} root = str(Path(__file__).parent.parent) # sklearn package # Ignore deprecation warnings triggered at import time and from walking # packages diff --git a/sklearn/utils/tests/test_estimator_checks.py b/sklearn/utils/tests/test_estimator_checks.py index f0c014829483f..15b423d6e0ce8 100644 --- a/sklearn/utils/tests/test_estimator_checks.py +++ b/sklearn/utils/tests/test_estimator_checks.py @@ -34,6 +34,7 @@ from sklearn.neighbors import KNeighborsRegressor from sklearn.tree import DecisionTreeClassifier from sklearn.utils.validation import check_X_y, check_array +from sklearn.utils import all_estimators class CorrectNotFittedError(ValueError): @@ -572,6 +573,14 @@ def test_check_class_weight_balanced_linear_classifier(): BadBalancedWeightsClassifier) +def test_all_estimators_all_public(): + # all_estimator should not fail when pytest is not installed and return + # only public estimators + estimators = all_estimators() + for est in estimators: + assert not est.__class__.__name__.startswith("_") + + if __name__ == '__main__': # This module is run as a script to check that we have no dependency on # pytest for estimator checks.