Thanks to visit codestin.com
Credit goes to github.com

Skip to content

TST Add minimal setup to be able to run test suite on float32 #22690

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 24 commits into from
Mar 17, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
6d8efc5
TST Add minimal setup to be able to run test suite on float32
jjerphan Mar 4, 2022
eb911dd
TST Use dtype fixture in one test
jjerphan Mar 9, 2022
86bf47f
CI Do not skip 32bit test for py38_conda_defaults_openblas
jjerphan Mar 9, 2022
1727e71
Merge branch 'main' into float32-test-suite
jjerphan Mar 9, 2022
585e278
fixup! TST Use dtype fixture in one test
jjerphan Mar 9, 2022
09ccf31
MAINT Apply reviews comments
jjerphan Mar 10, 2022
f71db82
Use an more explicit name for the env variable
jjerphan Mar 10, 2022
915a4c7
DOC Rename and document the environement variable
jjerphan Mar 11, 2022
53caeeb
fixup! DOC Rename and document the environement variable
jjerphan Mar 11, 2022
b40febf
Merge branch 'main' into float32-test-suite
jjerphan Mar 14, 2022
9f78941
TST Introduce custom assert_allclose
jjerphan Mar 14, 2022
8826197
Review comments
jjerphan Mar 16, 2022
0d42609
TST Add tests to testing tests
jjerphan Mar 16, 2022
0c71649
Merge branch 'main' into float32-test-suite
jjerphan Mar 16, 2022
0a4a1f9
TST Add more rtols
jjerphan Mar 16, 2022
5701e8f
TST Adapt test_testing.py
jjerphan Mar 16, 2022
15d53d8
Julien clearing his mess
jjerphan Mar 16, 2022
2ff5675
Simplify
jjerphan Mar 16, 2022
f502f11
TST Trust numpy test suite
jjerphan Mar 17, 2022
831b3e7
C'mon, Julien
jjerphan Mar 17, 2022
354e9c1
TST Actually use custom assert_allclose
jjerphan Mar 17, 2022
efb0d02
Review comments
jjerphan Mar 17, 2022
1bd542e
TST Last changes
jjerphan Mar 17, 2022
fa673d2
Apply suggestions from code review
ogrisel Mar 17, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,7 @@ jobs:
MATPLOTLIB_VERSION: 'min'
THREADPOOLCTL_VERSION: '2.2.0'
SKLEARN_ENABLE_DEBUG_CYTHON_DIRECTIVES: '1'
SKLEARN_RUN_FLOAT32_TESTS: '1'
SKLEARN_TESTS_GLOBAL_RANDOM_SEED: '2' # non-default seed
# Linux environment to test the latest available dependencies.
# It runs tests requiring lightgbm, pandas and PyAMG.
Expand Down
8 changes: 8 additions & 0 deletions doc/computing/parallelism.rst
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,14 @@ When this environment variable is set to a non zero value, the tests that need
network access are skipped. When this environment variable is not set then
network tests are skipped.

`SKLEARN_RUN_FLOAT32_TESTS`
~~~~~~~~~~~~~~~~~~~~~~~~~~~

When this environment variable is set to '1', the tests using the
`global_dtype` fixture are also run on float32 data.
When this environment variable is not set, the tests are only run on
float64 data.

`SKLEARN_ENABLE_DEBUG_CYTHON_DIRECTIVES`
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Expand Down
16 changes: 16 additions & 0 deletions doc/developers/develop.rst
Original file line number Diff line number Diff line change
Expand Up @@ -774,3 +774,19 @@ The reason for this setup is reproducibility:
when an estimator is ``fit`` twice to the same data,
it should produce an identical model both times,
hence the validation in ``fit``, not ``__init__``.

Numerical assertions in tests
-----------------------------

When asserting the quasi-equality of arrays of continuous values,
do use :func:`sklearn.utils._testing.assert_allclose`.

The relative tolerance is automatically inferred from the provided arrays
dtypes (for float32 and float64 dtypes in particular) but you can override
via ``rtol``.

When comparing arrays of zero-elements, please do provide a non-zero value for
the absolute tolerance via ``atol``.

For more information, please refer to the docstring of
:func:`sklearn.utils._testing.assert_allclose`.
12 changes: 12 additions & 0 deletions sklearn/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import sys

import pytest
import numpy as np
from threadpoolctl import threadpool_limits
from _pytest.doctest import DoctestItem

Expand Down Expand Up @@ -41,6 +42,17 @@
"fetch_rcv1_fxt": fetch_rcv1,
}

_SKIP32_MARK = pytest.mark.skipif(
environ.get("SKLEARN_RUN_FLOAT32_TESTS", "0") != "1",
reason="Set SKLEARN_RUN_FLOAT32_TESTS=1 to run float32 dtype tests",
)


# Global fixtures
@pytest.fixture(params=[pytest.param(np.float32, marks=_SKIP32_MARK), np.float64])
def global_dtype(request):
yield request.param


def _fetch_fixture(f):
"""Fetch dataset (download if missing and requested by environment)."""
Expand Down
16 changes: 10 additions & 6 deletions sklearn/feature_selection/tests/test_mutual_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,11 @@
from scipy.sparse import csr_matrix

from sklearn.utils import check_random_state
from sklearn.utils._testing import assert_array_equal, assert_almost_equal
from sklearn.utils._testing import (
assert_array_equal,
assert_almost_equal,
assert_allclose,
)
from sklearn.feature_selection._mutual_info import _compute_mi
from sklearn.feature_selection import mutual_info_regression, mutual_info_classif

Expand All @@ -21,7 +25,7 @@ def test_compute_mi_dd():
assert_almost_equal(_compute_mi(x, y, True, True), I_xy)


def test_compute_mi_cc():
def test_compute_mi_cc(global_dtype):
# For two continuous variables a good approach is to test on bivariate
# normal distribution, where mutual information is known.

Expand All @@ -43,15 +47,15 @@ def test_compute_mi_cc():
I_theory = np.log(sigma_1) + np.log(sigma_2) - 0.5 * np.log(np.linalg.det(cov))

rng = check_random_state(0)
Z = rng.multivariate_normal(mean, cov, size=1000)
Z = rng.multivariate_normal(mean, cov, size=1000).astype(global_dtype, copy=False)

x, y = Z[:, 0], Z[:, 1]

# Theory and computed values won't be very close, assert that the
# first figures after decimal point match.
# Theory and computed values won't be very close
# We here check with a large relative tolerance
for n_neighbors in [3, 5, 7]:
I_computed = _compute_mi(x, y, False, False, n_neighbors)
assert_almost_equal(I_computed, I_theory, 1)
assert_allclose(I_computed, I_theory, rtol=1e-1)


def test_compute_mi_cd():
Expand Down
76 changes: 75 additions & 1 deletion sklearn/utils/_testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
except NameError:
WindowsError = None

from numpy.testing import assert_allclose
from numpy.testing import assert_allclose as np_assert_allclose
from numpy.testing import assert_almost_equal
from numpy.testing import assert_approx_equal
from numpy.testing import assert_array_equal
Expand Down Expand Up @@ -387,6 +387,80 @@ def assert_raise_message(exceptions, message, function, *args, **kwargs):
raise AssertionError("%s not raised by %s" % (names, function.__name__))


def assert_allclose(
actual, desired, rtol=None, atol=0.0, equal_nan=True, err_msg="", verbose=True
):
"""dtype-aware variant of numpy.testing.assert_allclose

This variant introspects the least precise floating point dtype
in the input argument and automatically sets the relative tolerance
parameter to 1e-4 float32 and use 1e-7 otherwise (typically float64
in scikit-learn).

`atol` is always left to 0. by default. It should be adjusted manually
to an assertion-specific value in case there are null values expected
in `desired`.

The aggregate tolerance is `atol + rtol * abs(desired)`.

Parameters
----------
actual : array_like
Array obtained.
desired : array_like
Array desired.
rtol : float, optional, default=None
Relative tolerance.
If None, it is set based on the provided arrays' dtypes.
atol : float, optional, default=0.
Absolute tolerance.
If None, it is set based on the provided arrays' dtypes.
equal_nan : bool, optional, default=True
If True, NaNs will compare equal.
err_msg : str, optional, default=''
The error message to be printed in case of failure.
verbose : bool, optional, default=True
If True, the conflicting values are appended to the error message.

Raises
------
AssertionError
If actual and desired are not equal up to specified precision.

See Also
--------
numpy.testing.assert_allclose

Examples
--------
>>> import numpy as np
>>> from sklearn.utils._testing import assert_allclose
>>> x = [1e-5, 1e-3, 1e-1]
>>> y = np.arccos(np.cos(x))
>>> assert_allclose(x, y, rtol=1e-5, atol=0)
>>> a = np.full(shape=10, fill_value=1e-5, dtype=np.float32)
>>> assert_allclose(a, 1e-5)
"""
dtypes = []

actual, desired = np.asanyarray(actual), np.asanyarray(desired)
dtypes = [actual.dtype, desired.dtype]

if rtol is None:
rtols = [1e-4 if dtype == np.float32 else 1e-7 for dtype in dtypes]
rtol = max(rtols)

np_assert_allclose(
actual,
desired,
rtol=rtol,
atol=atol,
equal_nan=equal_nan,
err_msg=err_msg,
verbose=verbose,
)


def assert_allclose_dense_sparse(x, y, rtol=1e-07, atol=1e-9, err_msg=""):
"""Assert allclose for sparse and dense data.

Expand Down
19 changes: 19 additions & 0 deletions sklearn/utils/tests/test_testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
_delete_folder,
_convert_container,
raises,
assert_allclose,
)

from sklearn.tree import DecisionTreeClassifier
Expand Down Expand Up @@ -854,3 +855,21 @@ def test_raises():
with pytest.raises(AssertionError):
with raises((TypeError, ValueError)):
pass


def test_float32_aware_assert_allclose():
# The relative tolerance for float32 inputs is 1e-4
assert_allclose(np.array([1.0 + 2e-5], dtype=np.float32), 1.0)
with pytest.raises(AssertionError):
assert_allclose(np.array([1.0 + 2e-4], dtype=np.float32), 1.0)

# The relative tolerance for other inputs is left to 1e-7 as in
# the original numpy version.
assert_allclose(np.array([1.0 + 2e-8], dtype=np.float64), 1.0)
with pytest.raises(AssertionError):
assert_allclose(np.array([1.0 + 2e-7], dtype=np.float64), 1.0)

# atol is left to 0.0 by default, even for float32
with pytest.raises(AssertionError):
assert_allclose(np.array([1e-5], dtype=np.float32), 0.0)
assert_allclose(np.array([1e-5], dtype=np.float32), 0.0, atol=2e-5)