From 4be3314c56d0d7e440bdc180d008823c8fea2143 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Fri, 17 Apr 2020 10:26:17 -0400 Subject: [PATCH 1/7] simplified logic --- doc/developers/develop.rst | 2 +- sklearn/base.py | 2 +- sklearn/decomposition/_sparse_pca.py | 2 +- sklearn/dummy.py | 2 +- sklearn/neural_network/_rbm.py | 2 +- sklearn/svm/_classes.py | 2 +- sklearn/utils/estimator_checks.py | 28 +++++++++++++--------------- 7 files changed, 19 insertions(+), 21 deletions(-) diff --git a/doc/developers/develop.rst b/doc/developers/develop.rst index 96aa942fb9238..8bbbb0f93173e 100644 --- a/doc/developers/develop.rst +++ b/doc/developers/develop.rst @@ -538,7 +538,7 @@ _skip_test (default=False) whether to skip common tests entirely. Don't use this unless you have a *very good* reason. -_xfail_test (default=False) +_xfail_checks (default=False) dictionary ``{check_name : reason}`` of common checks to mark as a known failure, with the associated reason. Don't use this unless you have a *very good* reason. diff --git a/sklearn/base.py b/sklearn/base.py index 70dec8c030418..285b1b221b593 100644 --- a/sklearn/base.py +++ b/sklearn/base.py @@ -33,7 +33,7 @@ 'stateless': False, 'multilabel': False, '_skip_test': False, - '_xfail_test': False, + '_xfail_checks': {}, 'multioutput_only': False, 'binary_only': False, 'requires_fit': True} diff --git a/sklearn/decomposition/_sparse_pca.py b/sklearn/decomposition/_sparse_pca.py index 888d5d79e1e4b..cf1f5a2608e1c 100644 --- a/sklearn/decomposition/_sparse_pca.py +++ b/sklearn/decomposition/_sparse_pca.py @@ -234,7 +234,7 @@ def transform(self, X): def _more_tags(self): return { - '_xfail_test': { + '_xfail_checks': { "check_methods_subset_invariance": "fails for the transform method" } diff --git a/sklearn/dummy.py b/sklearn/dummy.py index 634943231860f..37e9145f7536c 100644 --- a/sklearn/dummy.py +++ b/sklearn/dummy.py @@ -358,7 +358,7 @@ def predict_log_proba(self, X): def _more_tags(self): return { 'poor_score': True, 'no_validation': True, - '_xfail_test': { + '_xfail_checks': { 'check_methods_subset_invariance': 'fails for the predict method' } diff --git a/sklearn/neural_network/_rbm.py b/sklearn/neural_network/_rbm.py index 06e7cc71bad3c..03b69c656b4a3 100644 --- a/sklearn/neural_network/_rbm.py +++ b/sklearn/neural_network/_rbm.py @@ -375,7 +375,7 @@ def fit(self, X, y=None): def _more_tags(self): return { - '_xfail_test': { + '_xfail_checks': { 'check_methods_subset_invariance': 'fails for the decision_function method' } diff --git a/sklearn/svm/_classes.py b/sklearn/svm/_classes.py index 46086729af35c..10975a6f8e4a2 100644 --- a/sklearn/svm/_classes.py +++ b/sklearn/svm/_classes.py @@ -855,7 +855,7 @@ def __init__(self, nu=0.5, kernel='rbf', degree=3, gamma='scale', def _more_tags(self): return { - '_xfail_test': { + '_xfail_checks': { 'check_methods_subset_invariance': 'fails for the decision_function method', 'check_class_weight_classifiers': 'class_weight is ignored.' diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index 34a0e25c7fcaa..a6923774516de 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -359,29 +359,27 @@ def _generate_class_checks(Estimator): def _mark_xfail_checks(estimator, check, pytest): - """Mark estimator check pairs with xfail""" + """Mark (estimator, check) pairs with xfail according to the + _xfail_checks_ tag""" if isinstance(estimator, type): - # try to construct estimator to get tags, if it is unable to then - # return the estimator class + # try to construct estimator instance, if it is unable to then + # return the estimator class ignoring the try: - xfail_checks = _safe_tags(_construct_instance(estimator), - '_xfail_test') + estimator = _construct_instance(estimator), except Exception: return estimator, check - else: - xfail_checks = _safe_tags(estimator, '_xfail_test') - - if not xfail_checks: - return estimator, check + xfail_checks = _safe_tags(estimator, '_xfail_checks') check_name = _set_check_estimator_ids(check) - msg = xfail_checks.get(check_name, None) - if msg is None: + if check_name not in xfail_checks: + # check isn't part of the xfail_checks tags, just return it return estimator, check - - return pytest.param( - estimator, check, marks=pytest.mark.xfail(reason=msg)) + else: + # check is in the tag, mark it as xfail for pytest + reason = xfail_checks[check_name] + return pytest.param(estimator, check, + marks=pytest.mark.xfail(reason=reason)) def parametrize_with_checks(estimators): From e7ffa5c55baa8d693a03e8edaa8a977b6827b900 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Fri, 17 Apr 2020 10:32:13 -0400 Subject: [PATCH 2/7] pep8 --- sklearn/utils/estimator_checks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index a6923774516de..dab4316e467a2 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -363,7 +363,7 @@ def _mark_xfail_checks(estimator, check, pytest): _xfail_checks_ tag""" if isinstance(estimator, type): # try to construct estimator instance, if it is unable to then - # return the estimator class ignoring the + # return the estimator class, ignoring the tag try: estimator = _construct_instance(estimator), except Exception: From 87a1eba210366e59653508822d3f4d91da36152e Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Fri, 17 Apr 2020 11:06:41 -0400 Subject: [PATCH 3/7] moved docs out of the UG --- doc/developers/develop.rst | 26 ++------------------------ sklearn/svm/_classes.py | 2 +- sklearn/utils/estimator_checks.py | 28 ++++++++++++++++++++++++---- 3 files changed, 27 insertions(+), 29 deletions(-) diff --git a/doc/developers/develop.rst b/doc/developers/develop.rst index 8bbbb0f93173e..285a9fc6fb359 100644 --- a/doc/developers/develop.rst +++ b/doc/developers/develop.rst @@ -246,7 +246,8 @@ whether it is just for you or for contributing it to scikit-learn, there are several internals of scikit-learn that you should be aware of in addition to the scikit-learn API outlined above. You can check whether your estimator adheres to the scikit-learn interface and standards by running -:func:`utils.estimator_checks.check_estimator` on the class:: +:func:`utils.estimator_checks.check_estimator` on the class (see its docstring +for details and possible interactions with `pytest`):: >>> from sklearn.utils.estimator_checks import check_estimator >>> from sklearn.svm import LinearSVC @@ -257,29 +258,6 @@ interface might be that you want to use it together with model evaluation and selection tools such as :class:`model_selection.GridSearchCV` and :class:`pipeline.Pipeline`. -Setting `generate_only=True` returns a generator that yields (estimator, check) -tuples where the check can be called independently from each other, i.e. -`check(estimator)`. This allows all checks to be run independently and report -the checks that are failing. scikit-learn provides a pytest specific decorator, -:func:`~sklearn.utils.parametrize_with_checks`, making it easier to test -multiple estimators:: - - from sklearn.utils.estimator_checks import parametrize_with_checks - from sklearn.linear_model import LogisticRegression - from sklearn.tree import DecisionTreeRegressor - - @parametrize_with_checks([LogisticRegression, DecisionTreeRegressor]) - def test_sklearn_compatible_estimator(estimator, check): - check(estimator) - -This decorator sets the `id` keyword in `pytest.mark.parameterize` exposing -the name of the underlying estimator and check in the test name. This allows -`pytest -k` to be used to specify which tests to run. - -.. code-block: bash - - pytest test_check_estimators.py -k check_estimators_fit_returns_self - Before detailing the required interface below, we describe two ways to achieve the correct interface more easily. diff --git a/sklearn/svm/_classes.py b/sklearn/svm/_classes.py index 10975a6f8e4a2..46086729af35c 100644 --- a/sklearn/svm/_classes.py +++ b/sklearn/svm/_classes.py @@ -855,7 +855,7 @@ def __init__(self, nu=0.5, kernel='rbf', degree=3, gamma='scale', def _more_tags(self): return { - '_xfail_checks': { + '_xfail_test': { 'check_methods_subset_invariance': 'fails for the decision_function method', 'check_class_weight_classifiers': 'class_weight is ignored.' diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index dab4316e467a2..a3e6642a0159d 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -385,10 +385,11 @@ def _mark_xfail_checks(estimator, check, pytest): def parametrize_with_checks(estimators): """Pytest specific decorator for parametrizing estimator checks. - The `id` of each test is set to be a pprint version of the estimator + The `id` of each check is set to be a pprint version of the estimator and the name of the check with its keyword arguments. + This allows to use `pytest -k` to specify which tests to run:: - Read more in the :ref:`User Guide`. + pytest test_check_estimators.py -k check_estimators_fit_returns_self Parameters ---------- @@ -398,6 +399,17 @@ def parametrize_with_checks(estimators): Returns ------- decorator : `pytest.mark.parametrize` + + Examples + -------- + >>> from sklearn.utils.estimator_checks import parametrize_with_checks + >>> from sklearn.linear_model import LogisticRegression + >>> from sklearn.tree import DecisionTreeRegressor + + >>> @parametrize_with_checks([LogisticRegression, DecisionTreeRegressor]) + >>> def test_sklearn_compatible_estimator(estimator, check): + >>> check(estimator) + """ import pytest @@ -417,7 +429,8 @@ def check_estimator(Estimator, generate_only=False): """Check if estimator adheres to scikit-learn conventions. This estimator will run an extensive test-suite for input validation, - shapes, etc. + shapes, etc, making sure that the estimator complies with `scikit-leanrn` + conventions as detailed in :ref:`rolling_your_own_estimator`. Additional tests for classifiers, regressors, clustering or transformers will be run if the Estimator class inherits from the corresponding mixin from sklearn.base. @@ -426,7 +439,14 @@ def check_estimator(Estimator, generate_only=False): Classes currently have some additional tests that related to construction, while passing instances allows the testing of multiple options. - Read more in :ref:`rolling_your_own_estimator`. + Setting `generate_only=True` returns a generator that yields (estimator, + check) tuples where the check can be called independently from each + other, i.e. `check(estimator)`. This allows all checks to be run + independently and report the checks that are failing. + + scikit-learn provides a pytest specific decorator, + :func:`~sklearn.utils.parametrize_with_checks`, making it easier to test + multiple estimators. Parameters ---------- From 2beedfd84304603e0159c855f195d0eebc9045d5 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Fri, 17 Apr 2020 11:26:44 -0400 Subject: [PATCH 4/7] Update doc/developers/develop.rst Co-Authored-By: Roman Yurchak --- doc/developers/develop.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/developers/develop.rst b/doc/developers/develop.rst index 285a9fc6fb359..59efc2a914682 100644 --- a/doc/developers/develop.rst +++ b/doc/developers/develop.rst @@ -246,8 +246,8 @@ whether it is just for you or for contributing it to scikit-learn, there are several internals of scikit-learn that you should be aware of in addition to the scikit-learn API outlined above. You can check whether your estimator adheres to the scikit-learn interface and standards by running -:func:`utils.estimator_checks.check_estimator` on the class (see its docstring -for details and possible interactions with `pytest`):: +:func:`utils.estimator_checks.check_estimator` on the class or using :func:`~sklearn.utils.parametrize_with_checks` pytest decorator +(see its docstring for details and possible interactions with `pytest`):: >>> from sklearn.utils.estimator_checks import check_estimator >>> from sklearn.svm import LinearSVC From 971f9423bcccbb5ffbfd935dc79172c6806d8eaa Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Fri, 17 Apr 2020 11:27:24 -0400 Subject: [PATCH 5/7] line length --- doc/developers/develop.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/doc/developers/develop.rst b/doc/developers/develop.rst index 59efc2a914682..d8ae6dd224840 100644 --- a/doc/developers/develop.rst +++ b/doc/developers/develop.rst @@ -246,8 +246,9 @@ whether it is just for you or for contributing it to scikit-learn, there are several internals of scikit-learn that you should be aware of in addition to the scikit-learn API outlined above. You can check whether your estimator adheres to the scikit-learn interface and standards by running -:func:`utils.estimator_checks.check_estimator` on the class or using :func:`~sklearn.utils.parametrize_with_checks` pytest decorator -(see its docstring for details and possible interactions with `pytest`):: +:func:`utils.estimator_checks.check_estimator` on the class or using +:func:`~sklearn.utils.parametrize_with_checks` pytest decorator (see its +docstring for details and possible interactions with `pytest`):: >>> from sklearn.utils.estimator_checks import check_estimator >>> from sklearn.svm import LinearSVC From 14efa9baf3549915fbd125eca028fb23b71b874f Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Fri, 17 Apr 2020 12:16:42 -0400 Subject: [PATCH 6/7] avoid mutable global --- sklearn/base.py | 2 +- sklearn/utils/estimator_checks.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/base.py b/sklearn/base.py index 285b1b221b593..8a6041cc17982 100644 --- a/sklearn/base.py +++ b/sklearn/base.py @@ -33,7 +33,7 @@ 'stateless': False, 'multilabel': False, '_skip_test': False, - '_xfail_checks': {}, + '_xfail_checks': False, 'multioutput_only': False, 'binary_only': False, 'requires_fit': True} diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index a3e6642a0159d..eef9109fb56f5 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -369,7 +369,7 @@ def _mark_xfail_checks(estimator, check, pytest): except Exception: return estimator, check - xfail_checks = _safe_tags(estimator, '_xfail_checks') + xfail_checks = _safe_tags(estimator, '_xfail_checks') or {} check_name = _set_check_estimator_ids(check) if check_name not in xfail_checks: From b03117501fde8c3b3a7684190809884507bd532e Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Fri, 17 Apr 2020 12:53:47 -0400 Subject: [PATCH 7/7] forgot one rename --- sklearn/svm/_classes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/svm/_classes.py b/sklearn/svm/_classes.py index 46086729af35c..10975a6f8e4a2 100644 --- a/sklearn/svm/_classes.py +++ b/sklearn/svm/_classes.py @@ -855,7 +855,7 @@ def __init__(self, nu=0.5, kernel='rbf', degree=3, gamma='scale', def _more_tags(self): return { - '_xfail_test': { + '_xfail_checks': { 'check_methods_subset_invariance': 'fails for the decision_function method', 'check_class_weight_classifiers': 'class_weight is ignored.'