From eb8ead6b63df45b148405bb979e0fbd4a32b4125 Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Mon, 26 Dec 2016 22:55:22 +0100 Subject: [PATCH 1/4] ENH: Add isnat function --- numpy/core/code_generators/generate_umath.py | 6 +++ .../core/code_generators/ufunc_docstrings.py | 37 ++++++++++++++++++- numpy/core/src/umath/loops.c.src | 9 +++++ numpy/core/src/umath/loops.h.src | 3 ++ numpy/core/src/umath/ufunc_type_resolution.c | 26 +++++++++++++ numpy/core/src/umath/ufunc_type_resolution.h | 7 ++++ numpy/core/tests/test_datetime.py | 29 +++++++++++++++ 7 files changed, 116 insertions(+), 1 deletion(-) diff --git a/numpy/core/code_generators/generate_umath.py b/numpy/core/code_generators/generate_umath.py index dfba04c18148..45476f931bdc 100644 --- a/numpy/core/code_generators/generate_umath.py +++ b/numpy/core/code_generators/generate_umath.py @@ -799,6 +799,12 @@ def english_upper(s): None, TD(inexact, out='?'), ), +'isnat': + Ufunc(1, 1, None, + docstrings.get('numpy.core.umath.isnat'), + 'PyUFunc_IsNaTTypeResolver', + TD(times, out='?'), + ), 'isinf': Ufunc(1, 1, None, docstrings.get('numpy.core.umath.isinf'), diff --git a/numpy/core/code_generators/ufunc_docstrings.py b/numpy/core/code_generators/ufunc_docstrings.py index ed9e05b15b25..c783d4595d0f 100644 --- a/numpy/core/code_generators/ufunc_docstrings.py +++ b/numpy/core/code_generators/ufunc_docstrings.py @@ -1690,7 +1690,7 @@ def add_newdoc(place, name, doc): See Also -------- - isinf, isneginf, isposinf, isfinite + isinf, isneginf, isposinf, isfinite, isnat Notes ----- @@ -1708,6 +1708,41 @@ def add_newdoc(place, name, doc): """) +add_newdoc('numpy.core.umath', 'isnat', + """ + Test element-wise for NaT (not a time) and return result as a boolean array. + + Parameters + ---------- + x : array_like + Input array with datetime or timedelta data type. + + Returns + ------- + y : ndarray or bool + For scalar input, the result is a new boolean with value True if + the input is NaT; otherwise the value is False. + + For array input, the result is a boolean array of the same + dimensions as the input and the values are True if the + corresponding element of the input is NaT; otherwise the values are + False. + + See Also + -------- + isnan, isinf, isneginf, isposinf, isfinite + + Examples + -------- + >>> np.isnat(np.datetime64("NaT")) + True + >>> np.isnat(np.datetime64("2016-01-01")) + False + >>> np.isnat(np.array(["NaT", "2016-01-01"], dtype="datetime64[ns]")) + array([ True, False], dtype=bool) + + """) + add_newdoc('numpy.core.umath', 'left_shift', """ Shift the bits of an integer to the left. diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src index 47faaf180262..e88b87b5ca1f 100644 --- a/numpy/core/src/umath/loops.c.src +++ b/numpy/core/src/umath/loops.c.src @@ -1227,6 +1227,15 @@ TIMEDELTA_sign(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNU * #TYPE = DATETIME, TIMEDELTA# */ +NPY_NO_EXPORT void +@TYPE@_isnat(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)) +{ + UNARY_LOOP { + const @type@ in1 = *(@type@ *)ip1; + *((npy_bool *)op1) = (in1 == NPY_DATETIME_NAT); + } +} + NPY_NO_EXPORT void @TYPE@__ones_like(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data)) { diff --git a/numpy/core/src/umath/loops.h.src b/numpy/core/src/umath/loops.h.src index d20b776a0132..c1b451c5bcc5 100644 --- a/numpy/core/src/umath/loops.h.src +++ b/numpy/core/src/umath/loops.h.src @@ -401,6 +401,9 @@ TIMEDELTA_sign(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNU * #TYPE = DATETIME, TIMEDELTA# */ +NPY_NO_EXPORT void +@TYPE@_isnat(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func)); + NPY_NO_EXPORT void @TYPE@__ones_like(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data)); diff --git a/numpy/core/src/umath/ufunc_type_resolution.c b/numpy/core/src/umath/ufunc_type_resolution.c index 154dbfb2f749..0fd3c45c5d36 100644 --- a/numpy/core/src/umath/ufunc_type_resolution.c +++ b/numpy/core/src/umath/ufunc_type_resolution.c @@ -536,6 +536,32 @@ PyUFunc_AbsoluteTypeResolver(PyUFuncObject *ufunc, } } +/* + * This function applies special type resolution rules for the isnat + * ufunc. This ufunc converts datetime/timedelta -> bool, and is not covered + * by the simple unary type resolution. + * + * Returns 0 on success, -1 on error. + */ +NPY_NO_EXPORT int +PyUFunc_IsNaTTypeResolver(PyUFuncObject *ufunc, + NPY_CASTING casting, + PyArrayObject **operands, + PyObject *type_tup, + PyArray_Descr **out_dtypes) +{ + if (!PyTypeNum_ISDATETIME(PyArray_DESCR(operands[0])->type_num)) { + PyErr_SetString(PyExc_ValueError, + "ufunc 'isnat' is only defined for datetime and timedelta."); + return -1; + } + + out_dtypes[0] = ensure_dtype_nbo(PyArray_DESCR(operands[0])); + out_dtypes[1] = PyArray_DescrFromType(NPY_BOOL); + + return 0; +} + /* * Creates a new NPY_TIMEDELTA dtype, copying the datetime metadata * from the given dtype. diff --git a/numpy/core/src/umath/ufunc_type_resolution.h b/numpy/core/src/umath/ufunc_type_resolution.h index d20c1e85b56b..eaf5e91cec67 100644 --- a/numpy/core/src/umath/ufunc_type_resolution.h +++ b/numpy/core/src/umath/ufunc_type_resolution.h @@ -42,6 +42,13 @@ PyUFunc_AbsoluteTypeResolver(PyUFuncObject *ufunc, PyArrayObject **operands, PyObject *type_tup, PyArray_Descr **out_dtypes); + +NPY_NO_EXPORT int +PyUFunc_IsNaTTypeResolver(PyUFuncObject *ufunc, + NPY_CASTING casting, + PyArrayObject **operands, + PyObject *type_tup, + PyArray_Descr **out_dtypes); NPY_NO_EXPORT int PyUFunc_AdditionTypeResolver(PyUFuncObject *ufunc, diff --git a/numpy/core/tests/test_datetime.py b/numpy/core/tests/test_datetime.py index 12ebd5ae9586..48afa728ddd0 100644 --- a/numpy/core/tests/test_datetime.py +++ b/numpy/core/tests/test_datetime.py @@ -1918,6 +1918,35 @@ def test_datetime_y2038(self): a = np.datetime64('2038-01-20T13:21:14') assert_equal(str(a), '2038-01-20T13:21:14') + def test_isnat(self): + assert_(np.isnat(np.datetime64('NaT', 'ms'))) + assert_(np.isnat(np.datetime64('NaT', 'ns'))) + assert_(not np.isnat(np.datetime64('2038-01-19T03:14:07'))) + + assert_(np.isnat(np.timedelta64('NaT', "ms"))) + assert_(not np.isnat(np.timedelta64(34, "ms"))) + + res = np.array([False, False, True]) + for unit in ['Y', 'M', 'W', 'D', + 'h', 'm', 's', 'ms', 'us', + 'ns', 'ps', 'fs', 'as']: + arr = np.array([123, -321, "NaT"], dtype=' Date: Mon, 26 Dec 2016 22:55:40 +0100 Subject: [PATCH 2/4] BUG: Remove warning filters from comparison assert functions These warning filters are not threadsafe, while it is nicer if the comparison functions can be called in a threaded environment since they do no explicite warning checking. Less filters are also cleaner in general, though may also mean that downstream projects see warnings that were previously hidden. --- numpy/testing/tests/test_utils.py | 37 ++++++++++++++- numpy/testing/utils.py | 76 ++++++++++++++++--------------- 2 files changed, 75 insertions(+), 38 deletions(-) diff --git a/numpy/testing/tests/test_utils.py b/numpy/testing/tests/test_utils.py index 804f22b7f091..e2c1052458f0 100644 --- a/numpy/testing/tests/test_utils.py +++ b/numpy/testing/tests/test_utils.py @@ -3,6 +3,7 @@ import warnings import sys import os +import itertools import numpy as np from numpy.testing import ( @@ -144,7 +145,10 @@ def test_recarrays(self): c['floupipi'] = a['floupi'].copy() c['floupa'] = a['floupa'].copy() - self._test_not_equal(c, b) + with suppress_warnings() as sup: + l = sup.record(FutureWarning, message="elementwise == ") + self._test_not_equal(c, b) + assert_(len(l) == 1) class TestBuildErrorMessage(unittest.TestCase): @@ -208,6 +212,37 @@ def test_inf_items(self): self._assert_func([np.inf], [np.inf]) self._test_not_equal(np.inf, [np.inf]) + def test_nat_items(self): + # not a datetime + nadt_no_unit = np.datetime64("NaT") + nadt_s = np.datetime64("NaT", "s") + nadt_d = np.datetime64("NaT", "ns") + # not a timedelta + natd_no_unit = np.timedelta64("NaT") + natd_s = np.timedelta64("NaT", "s") + natd_d = np.timedelta64("NaT", "ns") + + dts = [nadt_no_unit, nadt_s, nadt_d] + tds = [natd_no_unit, natd_s, natd_d] + for a, b in itertools.product(dts, dts): + self._assert_func(a, b) + self._assert_func([a], [b]) + self._test_not_equal([a], b) + + for a, b in itertools.product(tds, tds): + self._assert_func(a, b) + self._assert_func([a], [b]) + self._test_not_equal([a], b) + + for a, b in itertools.product(tds, dts): + self._test_not_equal(a, b) + self._test_not_equal(a, [b]) + self._test_not_equal([a], [b]) + self._test_not_equal([a], np.datetime64("2017-01-01", "s")) + self._test_not_equal([b], np.datetime64("2017-01-01", "s")) + self._test_not_equal([a], np.timedelta64(123, "s")) + self._test_not_equal([b], np.timedelta64(123, "s")) + def test_non_numeric(self): self._assert_func('ab', 'ab') self._test_not_equal('ab', 'abb') diff --git a/numpy/testing/utils.py b/numpy/testing/utils.py index b5a7e05c4fa7..f54995870d5d 100644 --- a/numpy/testing/utils.py +++ b/numpy/testing/utils.py @@ -15,7 +15,8 @@ from tempfile import mkdtemp, mkstemp from unittest.case import SkipTest -from numpy.core import float32, empty, arange, array_repr, ndarray +from numpy.core import( + float32, empty, arange, array_repr, ndarray, isnat, array) from numpy.lib.utils import deprecate if sys.version_info[0] >= 3: @@ -286,7 +287,7 @@ def build_err_msg(arrays, err_msg, header='Items are not equal:', return '\n'.join(msg) -def assert_equal(actual,desired,err_msg='',verbose=True): +def assert_equal(actual, desired, err_msg='', verbose=True): """ Raises an AssertionError if two objects are not equal. @@ -369,12 +370,12 @@ def assert_equal(actual,desired,err_msg='',verbose=True): except AssertionError: raise AssertionError(msg) + # isscalar test to check cases such as [np.nan] != np.nan + if isscalar(desired) != isscalar(actual): + raise AssertionError(msg) + # Inf/nan/negative zero handling try: - # isscalar test to check cases such as [np.nan] != np.nan - if isscalar(desired) != isscalar(actual): - raise AssertionError(msg) - # If one of desired/actual is not finite, handle it specially here: # check that both are nan if any is a nan, and test for equality # otherwise @@ -396,14 +397,24 @@ def assert_equal(actual,desired,err_msg='',verbose=True): except (TypeError, ValueError, NotImplementedError): pass - # Explicitly use __eq__ for comparison, ticket #2552 - with suppress_warnings() as sup: - # TODO: Better handling will to needed when change happens! - sup.filter(DeprecationWarning, ".*NAT ==") - sup.filter(FutureWarning, ".*NAT ==") - if not (desired == actual): + try: + # If both are NaT (and have the same dtype -- datetime or timedelta) + # they are considered equal. + if (isnat(desired) == isnat(actual) and + array(desired).dtype.type == array(actual).dtype.type): + return + else: raise AssertionError(msg) + # If TypeError or ValueError raised while using isnan and co, just handle + # as before + except (TypeError, ValueError, NotImplementedError): + pass + + # Explicitly use __eq__ for comparison, ticket #2552 + if not (desired == actual): + raise AssertionError(msg) + def print_assert_equal(test_string, actual, desired): """ @@ -674,33 +685,12 @@ def assert_array_compare(comparison, x, y, err_msg='', verbose=True, x = array(x, copy=False, subok=True) y = array(y, copy=False, subok=True) - def safe_comparison(*args, **kwargs): - # There are a number of cases where comparing two arrays hits special - # cases in array_richcompare, specifically around strings and void - # dtypes. Basically, we just can't do comparisons involving these - # types, unless both arrays have exactly the *same* type. So - # e.g. you can apply == to two string arrays, or two arrays with - # identical structured dtypes. But if you compare a non-string array - # to a string array, or two arrays with non-identical structured - # dtypes, or anything like that, then internally stuff blows up. - # Currently, when things blow up, we just return a scalar False or - # True. But we also emit a DeprecationWarning, b/c eventually we - # should raise an error here. (Ideally we might even make this work - # properly, but since that will require rewriting a bunch of how - # ufuncs work then we are not counting on that.) - # - # The point of this little function is to let the DeprecationWarning - # pass (or maybe eventually catch the errors and return False, I - # dunno, that's a little trickier and we can figure that out when the - # time comes). - with suppress_warnings() as sup: - sup.filter(DeprecationWarning, ".*==") - sup.filter(FutureWarning, ".*==") - return comparison(*args, **kwargs) - def isnumber(x): return x.dtype.char in '?bhilqpBHILQPefdgFDG' + def istime(x): + return x.dtype.char in "Mm" + def chk_same_position(x_id, y_id, hasval='nan'): """Handling nan/inf: check that x and y have the nan/inf at the same locations.""" @@ -756,7 +746,19 @@ def chk_same_position(x_id, y_id, hasval='nan'): if x.size == 0: return - val = safe_comparison(x, y) + elif istime(x) and istime(y): + # If one is datetime64 and the other timedelta64 there is no point + if equal_nan and x.dtype.type == y.dtype.type: + x_isnat, y_isnat = isnat(x), isnat(y) + + if any(x_isnat) or any(y_isnat): + chk_same_position(x_isnat, y_isnat, hasval="NaT") + + if any(x_isnat) or any(y_isnat): + x = x[~x_isnat] + y = y[~y_isnat] + + val = comparison(x, y) if isinstance(val, bool): cond = val From ca05d523c3d022eba8ea1bf75b50c60550d30d5a Mon Sep 17 00:00:00 2001 From: Sebastian Berg Date: Sun, 7 May 2017 16:06:20 +0200 Subject: [PATCH 3/4] BUG: Remove warning NaT filter from masked array test utils This may show warnings in downstream projects, which should likely not be there. We could also drop in the normal assert_equal there, which would support NaT, and NaN, but checks for scalarness, which this currently does not (which also could create problems). Works around the only test with this problem, by using the normal assert_equal there. --- numpy/ma/tests/test_core.py | 4 ++-- numpy/ma/testutils.py | 8 +++----- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/numpy/ma/tests/test_core.py b/numpy/ma/tests/test_core.py index f807fc8ae6fa..b8cb8f1a45ea 100644 --- a/numpy/ma/tests/test_core.py +++ b/numpy/ma/tests/test_core.py @@ -1841,11 +1841,11 @@ def test_fillvalue_datetime_timedelta(self): "h", "D", "W", "M", "Y"): control = numpy.datetime64("NaT", timecode) test = default_fill_value(numpy.dtype(" Date: Sun, 7 May 2017 16:20:25 +0200 Subject: [PATCH 4/4] DOC: Document isnat addition and related warning changes --- doc/release/1.13.0-notes.rst | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/doc/release/1.13.0-notes.rst b/doc/release/1.13.0-notes.rst index 2f32ddb283a6..8de6eb8694c1 100644 --- a/doc/release/1.13.0-notes.rst +++ b/doc/release/1.13.0-notes.rst @@ -160,6 +160,11 @@ In an N-dimensional array, the user can now choose the axis along which to look for duplicate N-1-dimensional elements using ``numpy.unique``. The original behaviour is recovered if ``axis=None`` (default). +``np.isnat`` function to test for NaT special datetime and timedelta values +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +``np.isnat`` can now be used to find the positions of special NaT values +within datetime and timedelta arrays. This is analogous to ``np.isnan``. + ``isin`` function, improving on ``in1d`` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The new function ``isin`` tests whether each element of an N-dimensonal @@ -460,6 +465,17 @@ is not the expected behavior both according to documentation and intuitively. Now, -inf < x < inf is considered ``True`` for any real number x and all other cases fail. +``assert_array_`` and masked arrays ``assert_equal`` hide less warnings +----------------------------------------------------------------------- +Some warnings that were previously hidden by the ``assert_array_`` +functions are not hidden anymore. In most cases the warnings should be +correct and, should they occur, will require changes to the tests using +these functions. +For the masked array ``assert_equal`` version, warnings may occur when +comparing NaT. The function presently does not handle NaT or NaN +specifically and it may be best to avoid it at this time should a warning +show up due to this change. + ``offset`` attribute value in ``memmap`` objects ------------------------------------------------ The ``offset`` attribute in a ``memmap`` object is now set to the