From 7635fe13aa5215b6446f1cc4906985fcc28c4b32 Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Thu, 21 Oct 2021 10:25:17 -0400 Subject: [PATCH 01/42] TEMP: Add isoformatter test --- isoformatter.py | 401 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 401 insertions(+) create mode 100644 isoformatter.py diff --git a/isoformatter.py b/isoformatter.py new file mode 100644 index 00000000000000..3103cbe5d100ce --- /dev/null +++ b/isoformatter.py @@ -0,0 +1,401 @@ +import re +import itertools + +from datetime import datetime, time, timedelta, timezone + +# import hypothesis +from test.support.hypothesis_helper import hypothesis + +import unittest + + +def _valid_date_formats(): + return ('%Y-%m-%d', '%Y%m%d', '%G-W%V', '%GW%V', '%G-W%V-%u', '%GW%V%u') + + +def _valid_time_formats(max_precision=9): + subsecond_format_tuples = itertools.product( + ('%H:%M:%S', '%H%M%S'), + (f'%(f{prec})' for prec in range(1, max_precision)), + ) + subsecond_formats = ( + ('.'.join(comps), ','.join(comps)) for comps in subsecond_format_tuples + ) + time_formats = ('%H', '%H:%M', '%H:%M:%S', '%H%M', '%H%M%S') + tuple( + itertools.chain.from_iterable(subsecond_formats) + ) + + tz_formats = ('',) + tuple( + (f'[TZ:{tz_fmt}]' for tz_fmt in time_formats + ('Z',)) + ) + + return tuple(map(''.join, itertools.product(time_formats, tz_formats))) + + +VALID_DATE_FORMATS = _valid_date_formats() +VALID_TIME_FORMATS = _valid_time_formats() + + +def _make_isoformatter_strategy(): + time_format = hypothesis.strategies.one_of( + hypothesis.strategies.just(()), # No time format + hypothesis.strategies.tuples( + hypothesis.strategies.one_of( + hypothesis.strategies.just("T"), # Shrink towards T and space + hypothesis.strategies.just(" "), + hypothesis.strategies.characters(), + ), + hypothesis.strategies.sampled_from(VALID_TIME_FORMATS), + ), + ) + + return hypothesis.strategies.tuples( + hypothesis.strategies.sampled_from(VALID_DATE_FORMATS), time_format + ).map(lambda x: IsoFormatter(''.join((x[0],) + x[1]))) + + +ISOFORMATTERS = _make_isoformatter_strategy() +TIMEZONES = hypothesis.strategies.one_of( + hypothesis.strategies.none(), + hypothesis.strategies.timedeltas( + min_value=timedelta( + hours=-23, minutes=59, seconds=59, microseconds=999999 + ), + max_value=timedelta( + hours=23, minutes=59, seconds=59, microseconds=999999 + ), + ).map(timezone), + hypothesis.strategies.timezones(), +) + + +class IsoFormatter: + _TZ_RE = re.compile(r'\[TZ:(?P[^\]]+)\]$') + _FLOAT_RE = re.compile(r'%\(f(?P\d+)\)$') + + _MICROSECOND = timedelta(microseconds=1) + _SECOND = timedelta(seconds=1) + _MINUTE = timedelta(minutes=1) + _HOUR = timedelta(hours=1) + _ZERO = timedelta(0) + + def __init__(self, format_str): + self._format_str = format_str + + if (m := self._TZ_RE.search(format_str)) is not None: + self._tz_str = m.group('fmt') + format_str = format_str[: m.start()] + else: + self._tz_str = None + + try: + time_str_start = format_str.index('%H') + except ValueError: + time_str_start = None + + if time_str_start is not None: + self._time_str = format_str[time_str_start:] + self._sep = format_str[time_str_start - 1] + self._date_str = format_str[: time_str_start - 1] + else: + self._time_str = None + self._sep = '' + self._date_str = format_str + + self._date_str = self._date_str.replace("%Y", "%4Y").replace( + "%G", "%4G" + ) + + self._populate_time() + self._populate_tz() + + if 'W' in self._date_str: + expected_components = ('%4G', '%V') + else: + expected_components = ('%4Y', '%m', '%d') + assert self._all_in( + self._date_str, expected_components + ), f'Must specify all date components: {self._format_str}' + + def __repr__(self): + return f'{self.__class__.__name__}(\'{self._format_str}\')' + + def format(self, dt): + """Apply the specified ISO8601 format to a datetime.""" + return ( + f'{format(dt, self._date_str)}{self._sep}' + + f'{self._time_formatter(dt)}{self._tz_formatter(dt)}' + ) + + def truncate(self, dt): + """Truncate a datetime to the precision level of the format.""" + truncator = {} + if 'W' in self._date_str and '%u' not in self._date_str: + iso_year, week, weekday = dt.isocalendar() + if weekday != 1: + truncated_dt = datetime.fromisocalendar(iso_year, week, 1) + for comp in ('year', 'month', 'day'): + if getattr(dt, comp) != ( + new_comp := getattr(truncated_dt, comp) + ): + truncator[comp] = new_comp + + truncator.update(self._time_truncator(dt)) + truncator.update(self._tz_truncator(dt)) + + if truncator: + return dt.replace(**truncator) + else: + return dt + + def _populate_time(self): + if self._time_str is not None: + time_formatter, time_truncation = self._make_timelike_formatter( + self._time_str + ) + self._time_formatter = time_formatter + self._time_truncator = self._make_time_truncator(time_truncation) + else: + self._time_formatter = self._null_formatter + self._time_truncator = self._make_time_truncator(timedelta(days=1)) + + def _populate_tz(self): + if self._tz_str is not None: + if self._tz_str == 'Z': + self._tz_formatter = self._tz_z_formatter + self._tz_truncator = self._make_tz_truncator(None) + else: + base_formatter, tz_truncation = self._make_timelike_formatter( + self._tz_str + ) + + self._tz_formatter = self._make_tz_formatter(base_formatter) + self._tz_truncator = self._make_tz_truncator(tz_truncation) + else: + self._tz_formatter = self._null_formatter + self._tz_truncator = self._remove_tzinfo_truncator + + def _make_timelike_formatter(self, time_str): + time_elements = ('%(f', '%S', '%M', '%H') + truncation_elements = (None, self._SECOND, self._MINUTE, self._HOUR) + + truncation = None + for i, elem in enumerate(time_elements): + if elem in time_str: + assert self._all_in( + time_str, time_elements[(i + 1) :] + ), f'Invalid time str: {time_str}' + truncation = truncation_elements[i] + break + else: + assert False, f'Invalid time str: {time_str}' + + if (m := self._FLOAT_RE.search(time_str)) is not None: + time_str = time_str[: m.start()] + + precision = int(m.group('prec')) + assert precision > 0, '0 and negative precision is not supported' + + truncation = timedelta(microseconds=10 ** (6 - min(6, precision))) + + def format_time(dt, *, time_str=time_str, precision=precision): + if precision < 7: + return ( + format(dt, time_str) + + f'{dt.microsecond:06d}'[0:precision] + ) + else: + return ( + format(dt, time_str) + + f'{dt.microsecond:06d}' + + '0' * (precision - 6) + ) + + else: + + def format_time(dt, *, time_str=time_str): + return format(dt, time_str) + + return format_time, truncation + + _ARBITRARY_DT = datetime(2000, 1, 1) + + def _make_tz_formatter(self, base_formatter): + def tz_formatter(dt, *, _self=self, _base_formatter=base_formatter): + if dt.tzinfo is None: + return '' + utcoffset = dt.utcoffset() + + t = self._ARBITRARY_DT + abs(utcoffset) + + sign = '+' if utcoffset >= _self._ZERO else '-' + + return sign + _base_formatter(t) + + return tz_formatter + + def _make_time_truncator(self, truncation): + if truncation is None: + + def time_truncator(dt): + return {} + + else: + + def time_truncator(dt, *, _time_truncation=truncation): + time_as_td = timedelta( + hours=dt.hour, + minutes=dt.minute, + seconds=dt.second, + microseconds=dt.microsecond, + ) + truncated = _time_truncation * (time_as_td // _time_truncation) + + if truncated == time_as_td: + return {} + + td_as_datetime = datetime(1970, 1, 1) + truncated + return { + component: getattr(td_as_datetime, component) + for component in ('hour', 'minute', 'second', 'microsecond') + } + + return time_truncator + + def _make_tz_truncator(self, truncation): + if truncation is None: + + def tz_truncator(dt): + return {} + + else: + + def tz_truncator(dt, *, _tz_truncation=truncation): + if dt.tzinfo is None: + return {} + + offset = dt.utcoffset() + sign = -1 if offset < self._ZERO else 1 + + tmp, remainder = divmod(abs(offset), _tz_truncation) + if not remainder: + return {} + + new_offset = tmp * _tz_truncation + new_tzinfo = timezone(sign * new_offset) + return {'tzinfo': new_tzinfo} + + return tz_truncator + + def _null_formatter(self, dt): + return '' + + def _remove_tzinfo_truncator(self, dt): + if dt.tzinfo is not None: + return {'tzinfo': None} + return {} + + def _tz_z_formatter(self, dt): + if dt.tzinfo is None: + return '' + + utcoffset = dt.utcoffset() + + if utcoffset == timedelta(0): + return 'Z' + + hours, rem = divmod(utcoffset, timedelta(hours=1)) + + rv = f'{hours:+03d}' + if not rem: + return rv + + minutes, rem = divmod(rem, timedelta(minutes=1)) + rv += f':{rem.total_seconds():02f}' + if not rem: + return rv + + microseconds = rem // timedelta(microseconds=1) + rv += f'.{microseconds:06d}' + return rv + + @staticmethod + def _all_in(string, substrings): + for substring in substrings: + if substring not in string: + return False + return True + + +DEFAULT_DT = datetime(2025, 1, 2, 3, 4, 5, 678901) +AWARE_UTC_DT = datetime(2025, 1, 2, 3, 4, 5, 678901, tzinfo=timezone.utc) +AWARE_POS_DT = datetime( + 2025, 1, 2, 3, 5, 6, 678901, tzinfo=timezone(timedelta(hours=3)) +) +AWARE_NEG_DT = datetime( + 2025, 1, 2, 3, 5, 6, 678901, tzinfo=timezone(-timedelta(hours=3)) +) + + +class IsoFormatTest(unittest.TestCase): + @hypothesis.given( + dt=hypothesis.strategies.datetimes(timezones=TIMEZONES), + iso_formatter=ISOFORMATTERS, + ) + # fmt: off + @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y-%m-%d")) + @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y%m%d")) + @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y%m%dT%H")) + @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H")) + @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y%m%dT%H")) + @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y%m%dT%H:%M")) + @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M")) + @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H%M")) + @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M:%S")) + @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H%M%S")) + @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H%M")) + @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H%M%S")) + @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M:%S.%(f1)")) + @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M:%S,%(f1)")) + @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M:%S[TZ:Z]")) + @hypothesis.example(dt=AWARE_UTC_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M:%S[TZ:Z]")) + @hypothesis.example(dt=AWARE_POS_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M:%S[TZ:Z]")) + @hypothesis.example(dt=AWARE_UTC_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M:%S[TZ:%H]")) + @hypothesis.example(dt=AWARE_NEG_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M:%S[TZ:%H]")) + @hypothesis.example(dt=AWARE_POS_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M:%S[TZ:%H]")) + @hypothesis.example(dt=AWARE_UTC_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M:%S[TZ:%H%M]")) + @hypothesis.example(dt=AWARE_NEG_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M:%S[TZ:%H%M]")) + @hypothesis.example(dt=AWARE_POS_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M:%S[TZ:%H%M]")) + @hypothesis.example(dt=datetime(2000, 1, 1, + tzinfo=timezone(-timedelta(hours=-22, microseconds=1))), + iso_formatter=IsoFormatter("%Y-%m-%dT%H[TZ:%H]")) + @hypothesis.example(dt=AWARE_UTC_DT, + iso_formatter=IsoFormatter("%Y-%m-%d0%H:%M:%S,%(f1)[TZ:%H:%M:%S.%(f2)]")) + @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%G-W%V")) + @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%G-W%V-%u")) + @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%GW%V:%H")) + @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%GW%V5%H")) + @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%GW%V%u5%H")) + @hypothesis.example(dt=AWARE_UTC_DT, iso_formatter=IsoFormatter("%G-W%V0%H[TZ:%H]")) + # fmt: on + def test_fromisoformat(self, dt, iso_formatter): + + if "%G" in iso_formatter._format_str: + if ( + iso_formatter._format_str.startswith("%G-W%V-%u") + and len(iso_formatter._format_str) > 9 + ): + hypothesis.assume(not iso_formatter._format_str[9].isdigit()) + + input_str = iso_formatter.format(dt) + actual = datetime.fromisoformat(input_str) + expected = iso_formatter.truncate(dt) + + self.assertEqual( + actual, + expected, + f"\n{actual} != {expected}\n" + + f"actual = {actual!r}\n" + + f"expected = {expected!r} \n" + + f"input_str = {input_str}", + ) From b9b7a0344b47925d1a72540c5360ad2fdf051b32 Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Thu, 21 Oct 2021 11:00:46 -0400 Subject: [PATCH 02/42] Add support for YYYYMMDD --- Modules/_datetimemodule.c | 162 +++++++++++++++++++++++++++++++------- 1 file changed, 135 insertions(+), 27 deletions(-) diff --git a/Modules/_datetimemodule.c b/Modules/_datetimemodule.c index 20cdb1822ab964..2afd9c162dc461 100644 --- a/Modules/_datetimemodule.c +++ b/Modules/_datetimemodule.c @@ -703,7 +703,7 @@ parse_isoformat_date(const char *dtstr, int *year, int *month, int *day) * Return codes: * 0: Success * -1: Failed to parse date component - * -2: Failed to parse dateseparator + * -2: Inconsistent date separator usage */ const char *p = dtstr; p = parse_digits(p, year, 4); @@ -711,8 +711,9 @@ parse_isoformat_date(const char *dtstr, int *year, int *month, int *day) return -1; } - if (*(p++) != '-') { - return -2; + const unsigned char uses_separator = (*p == '-'); + if (uses_separator) { + ++p; } p = parse_digits(p, month, 2); @@ -720,15 +721,15 @@ parse_isoformat_date(const char *dtstr, int *year, int *month, int *day) return -1; } - if (*(p++) != '-') { - return -2; + if (uses_separator) { + if (*(p++) != '-') { + return -2; + } } - p = parse_digits(p, day, 2); if (p == NULL) { return -1; } - return 0; } @@ -739,8 +740,9 @@ parse_hh_mm_ss_ff(const char *tstr, const char *tstr_end, int *hour, const char *p = tstr; const char *p_end = tstr_end; int *vals[3] = {hour, minute, second}; + unsigned char has_separator = 2; - // Parse [HH[:MM[:SS]]] + // Parse [HH[:?MM[:?SS]]] for (size_t i = 0; i < 3; ++i) { p = parse_digits(p, vals[i], 2); if (NULL == p) { @@ -751,7 +753,10 @@ parse_hh_mm_ss_ff(const char *tstr, const char *tstr_end, int *hour, if (p >= p_end) { return c != '\0'; } - else if (c == ':') { + else if (has_separator == 2 || (has_separator && c == ':')) { + if (has_separator == 2) { + has_separator = (c == ':'); + } continue; } else if (c == '.') { @@ -5182,7 +5187,7 @@ datetime_combine(PyObject *cls, PyObject *args, PyObject *kw) } static PyObject * -_sanitize_isoformat_str(PyObject *dtstr) +_sanitize_isoformat_str(PyObject *dtstr, Py_ssize_t len, Py_ssize_t separator_location) { // `fromisoformat` allows surrogate characters in exactly one position, // the separator; to allow datetime_fromisoformat to make the simplifying @@ -5190,13 +5195,8 @@ _sanitize_isoformat_str(PyObject *dtstr) // replaces any surrogate character separators with `T`. // // The result of this, if not NULL, returns a new reference - Py_ssize_t len = PyUnicode_GetLength(dtstr); - if (len < 0) { - return NULL; - } - - if (len <= 10 || - !Py_UNICODE_IS_SURROGATE(PyUnicode_READ_CHAR(dtstr, 10))) { + if (len <= separator_location || + !Py_UNICODE_IS_SURROGATE(PyUnicode_READ_CHAR(dtstr, separator_location))) { Py_INCREF(dtstr); return dtstr; } @@ -5206,7 +5206,7 @@ _sanitize_isoformat_str(PyObject *dtstr) return NULL; } - if (PyUnicode_WriteChar(str_out, 10, (Py_UCS4)'T')) { + if (PyUnicode_WriteChar(str_out, separator_location, (Py_UCS4)'T')) { Py_DECREF(str_out); return NULL; } @@ -5214,6 +5214,100 @@ _sanitize_isoformat_str(PyObject *dtstr) return str_out; } +#define MODE_STANDARD 0 +#define MODE_ISOCALENDAR 1 +#define MODE_AMBIGUOUS 2 + +static Py_ssize_t +_find_isoformat_separator(PyObject *dtstr, Py_ssize_t len, unsigned char* mode) { + // The valid date formats can all be distinguished by characters 4 and 5 + // and further narrowed down by character + // which tells us where to look for the separator character. + // Format | As-rendered | Position + // --------------------------------------- + // %Y-%m-%d | YYYY-MM-DD | 10 + // %Y%m%d | YYYYMMDD | 8 + // %Y-W%V | YYYY-Www | 8 + // %YW%V | YYYYWww | 7 + // %Y-W%V-%u | YYYY-Www-d | 10 + // %YW%V%u | YYYYWwwd | 8 + // + // Note that because we allow *any* character for the separator, in the + // case where character 4 is W, it's not straightforward to determine where + // the separator is — in the case of YYYY-Www-d, you have actual ambiguity, + // e.g. 2020-W01-0000 could be YYYY-Www-D0HH or YYYY-Www-HHMM, when the + // separator character is a number in the former case or a hyphen in the + // latter case. + // + // The case of YYYYWww can be distinguished from YYYYWwwd by tracking ahead + // to either the end of the string or the first non-numeric character — + // since the time components all come in pairs YYYYWww#HH can be + // distinguished from YYYYWwwd#HH by the fact that there will always be an + // odd number of digits before the first non-digit character in the former + // case. + static const Py_UCS4 date_separator = '-'; + static const Py_UCS4 week_indicator = 'W'; + + const Py_UCS4 char_4 = PyUnicode_READ_CHAR(dtstr, 4); + + *mode = MODE_STANDARD; + if (char_4 == date_separator) { + if (PyUnicode_READ_CHAR(dtstr, 5) == week_indicator) { + *mode = MODE_ISOCALENDAR; + if (len < 8) { + return -1; + } + + // YYYY-Www-D (10) or YYYY-Www-HH (8) + if (len > 8 && PyUnicode_READ_CHAR(dtstr, 8) == date_separator) { + if (len == 9) { return -1; } + if (len > 10 && Py_UNICODE_ISDIGIT(PyUnicode_READ_CHAR(dtstr, 10))) { + // This is as far as we'll try to go to resolve the + // ambiguity for the moment — if we have YYYY-Www-##, the + // separator is either a hyphen at 8 or a number at 10. + // + // We'll assume it's a hyphen at 8 because it's way more + // likely that someone will use a hyphen as a separator + // than a number, but at this point it's really best effort + // because this is an extension of the spec anyway. + *mode = *mode | MODE_AMBIGUOUS; + return 8; + } + + return 10; + } else { + // YYYY-Www (8) + return 8; + } + } else { + // YYYY-MM-DD (10) + return 10; + } + } else { + if (char_4 == week_indicator) { + *mode = MODE_ISOCALENDAR; + // YYYYWww (7) or YYYYWwwd (8) + ssize_t idx = 7; + for (; idx < len; ++idx) { + // Keep going until we run out of digits. + if (!Py_UNICODE_ISDIGIT(PyUnicode_READ_CHAR(dtstr, idx))) { + break; + } + } + + if (len == 7 || idx % 2 == 0) { + // If the index of the last number is even, it's YYYYWwwd + return 7; + } else { + return 8; + } + } else { + // YYYYMMDD (8) + return 8; + } + } +} + static PyObject * datetime_fromisoformat(PyObject *cls, PyObject *dtstr) { @@ -5225,12 +5319,25 @@ datetime_fromisoformat(PyObject *cls, PyObject *dtstr) return NULL; } - PyObject *dtstr_clean = _sanitize_isoformat_str(dtstr); + unsigned char mode; + Py_ssize_t len = PyUnicode_GetLength(dtstr); + if (len < 7) { // All valid ISO8601 strings are at least 7 characters long + goto error; + } + + const Py_ssize_t separator_location = _find_isoformat_separator( + dtstr, len, &mode); + + // We only need to sanitize this string if the separator is a surrogate + // character. In the situation where the separator location is ambiguous, + // we don't have to sanitize it anything because that can only happen when + // the separator is either '-' or a number. This should mostly be a noop + // but it makes the reference counting easier if we still sanitize. + PyObject *dtstr_clean = _sanitize_isoformat_str(dtstr, len, separator_location); if (dtstr_clean == NULL) { goto error; } - Py_ssize_t len; const char *dt_ptr = PyUnicode_AsUTF8AndSize(dtstr_clean, &len); if (dt_ptr == NULL) { @@ -5252,21 +5359,22 @@ datetime_fromisoformat(PyObject *cls, PyObject *dtstr) // date has a fixed length of 10 int rv = parse_isoformat_date(p, &year, &month, &day); - if (!rv && len > 10) { + if (!rv && len > separator_location) { // In UTF-8, the length of multi-byte characters is encoded in the MSB - if ((p[10] & 0x80) == 0) { - p += 11; + p += separator_location; + if ((p[0] & 0x80) == 0) { + p += 1; } else { - switch (p[10] & 0xf0) { + switch (p[0] & 0xf0) { case 0xe0: - p += 13; + p += 3; break; case 0xf0: - p += 14; + p += 4; break; default: - p += 12; + p += 2; break; } } From c746b96d804aabe0a0b5a9550b037c982507af78 Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Thu, 21 Oct 2021 15:04:27 -0400 Subject: [PATCH 03/42] Expand support for ISO 8601 times --- Modules/_datetimemodule.c | 69 +++++++++++++++++++++++++-------------- 1 file changed, 45 insertions(+), 24 deletions(-) diff --git a/Modules/_datetimemodule.c b/Modules/_datetimemodule.c index 2afd9c162dc461..2361721b6e6baa 100644 --- a/Modules/_datetimemodule.c +++ b/Modules/_datetimemodule.c @@ -680,6 +680,11 @@ set_date_fields(PyDateTime_Date *self, int y, int m, int d) * String parsing utilities and helper functions */ +static const unsigned char * +is_digit(const char c) { + return ((unsigned int)(c - '0')) < 10; +} + static const char * parse_digits(const char *ptr, int *var, size_t num_digits) { @@ -740,7 +745,7 @@ parse_hh_mm_ss_ff(const char *tstr, const char *tstr_end, int *hour, const char *p = tstr; const char *p_end = tstr_end; int *vals[3] = {hour, minute, second}; - unsigned char has_separator = 2; + unsigned char has_separator = 1; // Parse [HH[:?MM[:?SS]]] for (size_t i = 0; i < 3; ++i) { @@ -750,36 +755,50 @@ parse_hh_mm_ss_ff(const char *tstr, const char *tstr_end, int *hour, } char c = *(p++); + if (i == 0) { + has_separator = (c == ':'); + } + if (p >= p_end) { return c != '\0'; } - else if (has_separator == 2 || (has_separator && c == ':')) { - if (has_separator == 2) { - has_separator = (c == ':'); - } + else if (has_separator && (c == ':')) { continue; } - else if (c == '.') { + else if (c == '.' || c == ',') { break; - } - else { + } else if (!has_separator) { + --p; + } else { return -4; // Malformed time separator } } - // Parse .fff[fff] + // Parse fractional components size_t len_remains = p_end - p; - if (!(len_remains == 6 || len_remains == 3)) { - return -3; + size_t to_parse = len_remains; + if (len_remains >= 6) { + to_parse = 6; } - p = parse_digits(p, microsecond, len_remains); + p = parse_digits(p, microsecond, to_parse); if (NULL == p) { return -3; } - if (len_remains == 3) { - *microsecond *= 1000; + static int correction[5] = { + 100000, 10000, 1000, 100, 10 + }; + + if (to_parse < 6) { + *microsecond *= correction[to_parse-1]; + } + + for (size_t i = 0; i < len_remains - 6; ++i) { + if (!is_digit(*p)) { + break; + } + p++; } // Return 1 if it's not the end of the string @@ -805,7 +824,7 @@ parse_isoformat_time(const char *dtstr, size_t dtlen, int *hour, int *minute, const char *tzinfo_pos = p; do { - if (*tzinfo_pos == '+' || *tzinfo_pos == '-') { + if (*tzinfo_pos == 'Z' || *tzinfo_pos == '+' || *tzinfo_pos == '-') { break; } } while (++tzinfo_pos < p_end); @@ -827,14 +846,16 @@ parse_isoformat_time(const char *dtstr, size_t dtlen, int *hour, int *minute, } } - // Parse time zone component - // Valid formats are: - // - +HH:MM (len 6) - // - +HH:MM:SS (len 9) - // - +HH:MM:SS.ffffff (len 16) - size_t tzlen = p_end - tzinfo_pos; - if (!(tzlen == 6 || tzlen == 9 || tzlen == 16)) { - return -5; + // Special case UTC / Zulu time. + if (*tzinfo_pos == 'Z') { + *tzoffset = 0; + *tzmicrosecond = 0; + + if (*(tzinfo_pos + 1) != '\0') { + return -6; + } else { + return 1; + } } int tzsign = (*tzinfo_pos == '-') ? -1 : 1; @@ -846,7 +867,7 @@ parse_isoformat_time(const char *dtstr, size_t dtlen, int *hour, int *minute, *tzoffset = tzsign * ((tzhour * 3600) + (tzminute * 60) + tzsecond); *tzmicrosecond *= tzsign; - return rv ? -5 : 1; + return rv ? -7 : 1; } /* --------------------------------------------------------------------------- From 00978f9a5be6abfa804021fbc4f009128d3cf614 Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Fri, 22 Oct 2021 10:37:38 -0400 Subject: [PATCH 04/42] Add support for ISO calendar-style strings --- Modules/_datetimemodule.c | 117 ++++++++++++++++++++++++++++---------- 1 file changed, 86 insertions(+), 31 deletions(-) diff --git a/Modules/_datetimemodule.c b/Modules/_datetimemodule.c index 2361721b6e6baa..1f8bda45d2f0ff 100644 --- a/Modules/_datetimemodule.c +++ b/Modules/_datetimemodule.c @@ -395,6 +395,39 @@ iso_week1_monday(int year) return week1_monday; } +static int +iso_to_ymd(const int iso_year, const int iso_week, const int iso_day, + int *year, int *month, int *day) { + if (iso_week <= 0 || iso_week >= 53) { + int out_of_range = 1; + if (iso_week == 53) { + // ISO years have 53 weeks in it on years starting with a Thursday + // and on leap years starting on Wednesday + int first_weekday = weekday(iso_year, 1, 1); + if (first_weekday == 3 || (first_weekday == 2 && is_leap(iso_year))) { + out_of_range = 0; + } + } + + if (out_of_range) { + return -2; + } + } + + if (iso_day <= 0 || iso_day >= 8) { + return -3; + } + + // Convert (Y, W, D) to (Y, M, D) in-place + int day_1 = iso_week1_monday(iso_year); + + int day_offset = (iso_week - 1)*7 + iso_day - 1; + + ord_to_ymd(day_1 + day_offset, year, month, day); + return 0; +} + + /* --------------------------------------------------------------------------- * Range checkers. */ @@ -680,7 +713,7 @@ set_date_fields(PyDateTime_Date *self, int y, int m, int d) * String parsing utilities and helper functions */ -static const unsigned char * +static unsigned char is_digit(const char c) { return ((unsigned int)(c - '0')) < 10; } @@ -701,7 +734,7 @@ parse_digits(const char *ptr, int *var, size_t num_digits) } static int -parse_isoformat_date(const char *dtstr, int *year, int *month, int *day) +parse_isoformat_date(const char *dtstr, const size_t len, int *year, int *month, int *day) { /* Parse the date components of the result of date.isoformat() * @@ -721,6 +754,39 @@ parse_isoformat_date(const char *dtstr, int *year, int *month, int *day) ++p; } + if(*p == 'W') { + // This is an isocalendar-style date string + p++; + int iso_week = 0; + int iso_day = 0; + + p = parse_digits(p, &iso_week, 2); + if (NULL == p) { + return -3; + } + + assert(p > dtstr); + if ((size_t)(p - dtstr) < len) { + if (uses_separator && *(p++) != '-') { + return -2; + } + + p = parse_digits(p, &iso_day, 1); + if (NULL == p) { + return -4; + } + } else { + iso_day = 1; + } + + int rv = iso_to_ymd(*year, iso_week, iso_day, year, month, day); + if (rv) { + return 3 - rv; + } else { + return 0; + } + } + p = parse_digits(p, month, 2); if (NULL == p) { return -1; @@ -3009,8 +3075,8 @@ date_fromisoformat(PyObject *cls, PyObject *dtstr) int year = 0, month = 0, day = 0; int rv; - if (len == 10) { - rv = parse_isoformat_date(dt_ptr, &year, &month, &day); + if (len == 7 || len == 8 || len == 10) { + rv = parse_isoformat_date(dt_ptr, len, &year, &month, &day); } else { rv = -1; @@ -3053,37 +3119,21 @@ date_fromisocalendar(PyObject *cls, PyObject *args, PyObject *kw) return NULL; } - if (week <= 0 || week >= 53) { - int out_of_range = 1; - if (week == 53) { - // ISO years have 53 weeks in it on years starting with a Thursday - // and on leap years starting on Wednesday - int first_weekday = weekday(year, 1, 1); - if (first_weekday == 3 || (first_weekday == 2 && is_leap(year))) { - out_of_range = 0; - } - } + int month; + Py_ssize_t rv = iso_to_ymd(year, week, day, &year, &month, &day); - if (out_of_range) { - PyErr_Format(PyExc_ValueError, "Invalid week: %d", week); - return NULL; - } + + if (rv == -2) { + PyErr_Format(PyExc_ValueError, "Invalid week: %d", week); + return NULL; } - if (day <= 0 || day >= 8) { + if (rv == -3) { PyErr_Format(PyExc_ValueError, "Invalid day: %d (range is [1, 7])", day); return NULL; } - // Convert (Y, W, D) to (Y, M, D) in-place - int day_1 = iso_week1_monday(year); - - int month = week; - int day_offset = (month - 1)*7 + day - 1; - - ord_to_ymd(day_1 + day_offset, &year, &month, &day); - return new_date_subclass_ex(year, month, day, cls); } @@ -5316,7 +5366,11 @@ _find_isoformat_separator(PyObject *dtstr, Py_ssize_t len, unsigned char* mode) } } - if (len == 7 || idx % 2 == 0) { + if (idx < 9) { + return idx; + } + + if (idx % 2 == 0) { // If the index of the last number is even, it's YYYYWwwd return 7; } else { @@ -5340,6 +5394,7 @@ datetime_fromisoformat(PyObject *cls, PyObject *dtstr) return NULL; } + PyObject *dtstr_clean = NULL; unsigned char mode; Py_ssize_t len = PyUnicode_GetLength(dtstr); if (len < 7) { // All valid ISO8601 strings are at least 7 characters long @@ -5354,7 +5409,7 @@ datetime_fromisoformat(PyObject *cls, PyObject *dtstr) // we don't have to sanitize it anything because that can only happen when // the separator is either '-' or a number. This should mostly be a noop // but it makes the reference counting easier if we still sanitize. - PyObject *dtstr_clean = _sanitize_isoformat_str(dtstr, len, separator_location); + dtstr_clean = _sanitize_isoformat_str(dtstr, len, separator_location); if (dtstr_clean == NULL) { goto error; } @@ -5377,8 +5432,8 @@ datetime_fromisoformat(PyObject *cls, PyObject *dtstr) int hour = 0, minute = 0, second = 0, microsecond = 0; int tzoffset = 0, tzusec = 0; - // date has a fixed length of 10 - int rv = parse_isoformat_date(p, &year, &month, &day); + // date runs up to separator_location + int rv = parse_isoformat_date(p, separator_location, &year, &month, &day); if (!rv && len > separator_location) { // In UTF-8, the length of multi-byte characters is encoded in the MSB From c36e306019437716f1776c01c03d4284bd3c7bfc Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Fri, 22 Oct 2021 11:09:02 -0400 Subject: [PATCH 05/42] Rework how string sanitization works Rather than attempting to detect where the separator is first, we can take advantage of the fact that it really can only be in one of 3 locations to do the sanitization before any separator detection occurs. --- Modules/_datetimemodule.c | 74 ++++++++++++++++++++++++++------------- 1 file changed, 49 insertions(+), 25 deletions(-) diff --git a/Modules/_datetimemodule.c b/Modules/_datetimemodule.c index 1f8bda45d2f0ff..7d9f40e37521aa 100644 --- a/Modules/_datetimemodule.c +++ b/Modules/_datetimemodule.c @@ -5258,16 +5258,42 @@ datetime_combine(PyObject *cls, PyObject *args, PyObject *kw) } static PyObject * -_sanitize_isoformat_str(PyObject *dtstr, Py_ssize_t len, Py_ssize_t separator_location) +_sanitize_isoformat_str(PyObject *dtstr) { + Py_ssize_t len = PyUnicode_GetLength(dtstr); + if (len < 7) { // All valid ISO8601 strings are at least 7 characters long + return NULL; + } + // `fromisoformat` allows surrogate characters in exactly one position, // the separator; to allow datetime_fromisoformat to make the simplifying // assumption that all valid strings can be encoded in UTF-8, this function // replaces any surrogate character separators with `T`. // // The result of this, if not NULL, returns a new reference - if (len <= separator_location || - !Py_UNICODE_IS_SURROGATE(PyUnicode_READ_CHAR(dtstr, separator_location))) { + const void* const unicode_data = PyUnicode_DATA(dtstr); + const unsigned int kind = PyUnicode_KIND(dtstr); + + // Depending on the format of the string, the separator can only ever be + // in positions 7, 8 or 10. We'll check each of these for a surrogate and + // if we find one, replace it with `T`. If there is more than one surrogate, + // we don't have to bother sanitizing it, because the function will later + // fail when we try to convert the function into unicode characters. + static const size_t potential_separators[3] = {7, 8, 10}; + size_t surrogate_separator = 0; + for(size_t idx = 0; idx < 3; ++idx) { + size_t pos = potential_separators[idx]; + if (pos > (size_t)len) { + break; + } + + if(Py_UNICODE_IS_SURROGATE(PyUnicode_READ(kind, unicode_data, pos))) { + surrogate_separator = pos; + break; + } + } + + if (surrogate_separator == 0) { Py_INCREF(dtstr); return dtstr; } @@ -5277,7 +5303,7 @@ _sanitize_isoformat_str(PyObject *dtstr, Py_ssize_t len, Py_ssize_t separator_lo return NULL; } - if (PyUnicode_WriteChar(str_out, separator_location, (Py_UCS4)'T')) { + if (PyUnicode_WriteChar(str_out, surrogate_separator, (Py_UCS4)'T')) { Py_DECREF(str_out); return NULL; } @@ -5290,7 +5316,7 @@ _sanitize_isoformat_str(PyObject *dtstr, Py_ssize_t len, Py_ssize_t separator_lo #define MODE_AMBIGUOUS 2 static Py_ssize_t -_find_isoformat_separator(PyObject *dtstr, Py_ssize_t len, unsigned char* mode) { +_find_isoformat_separator(const char *dtstr, Py_ssize_t len, unsigned char* mode) { // The valid date formats can all be distinguished by characters 4 and 5 // and further narrowed down by character // which tells us where to look for the separator character. @@ -5302,6 +5328,8 @@ _find_isoformat_separator(PyObject *dtstr, Py_ssize_t len, unsigned char* mode) // %YW%V | YYYYWww | 7 // %Y-W%V-%u | YYYY-Www-d | 10 // %YW%V%u | YYYYWwwd | 8 + // %Y-%j | YYYY-DDD | 8 + // %Y%j | YYYYDDD | 7 // // Note that because we allow *any* character for the separator, in the // case where character 4 is W, it's not straightforward to determine where @@ -5316,23 +5344,23 @@ _find_isoformat_separator(PyObject *dtstr, Py_ssize_t len, unsigned char* mode) // distinguished from YYYYWwwd#HH by the fact that there will always be an // odd number of digits before the first non-digit character in the former // case. - static const Py_UCS4 date_separator = '-'; - static const Py_UCS4 week_indicator = 'W'; + static const char date_separator = '-'; + static const char week_indicator = 'W'; - const Py_UCS4 char_4 = PyUnicode_READ_CHAR(dtstr, 4); + assert(len > 7); *mode = MODE_STANDARD; - if (char_4 == date_separator) { - if (PyUnicode_READ_CHAR(dtstr, 5) == week_indicator) { + if (dtstr[4] == date_separator) { + if (dtstr[5] == week_indicator) { *mode = MODE_ISOCALENDAR; if (len < 8) { return -1; } // YYYY-Www-D (10) or YYYY-Www-HH (8) - if (len > 8 && PyUnicode_READ_CHAR(dtstr, 8) == date_separator) { + if (len > 8 && dtstr[8] == date_separator) { if (len == 9) { return -1; } - if (len > 10 && Py_UNICODE_ISDIGIT(PyUnicode_READ_CHAR(dtstr, 10))) { + if (len > 10 && is_digit(dtstr[10])) { // This is as far as we'll try to go to resolve the // ambiguity for the moment — if we have YYYY-Www-##, the // separator is either a hyphen at 8 or a number at 10. @@ -5355,13 +5383,13 @@ _find_isoformat_separator(PyObject *dtstr, Py_ssize_t len, unsigned char* mode) return 10; } } else { - if (char_4 == week_indicator) { + if (dtstr[4] == week_indicator) { *mode = MODE_ISOCALENDAR; // YYYYWww (7) or YYYYWwwd (8) ssize_t idx = 7; for (; idx < len; ++idx) { // Keep going until we run out of digits. - if (!Py_UNICODE_ISDIGIT(PyUnicode_READ_CHAR(dtstr, idx))) { + if (!is_digit(dtstr[idx])) { break; } } @@ -5394,26 +5422,17 @@ datetime_fromisoformat(PyObject *cls, PyObject *dtstr) return NULL; } - PyObject *dtstr_clean = NULL; - unsigned char mode; - Py_ssize_t len = PyUnicode_GetLength(dtstr); - if (len < 7) { // All valid ISO8601 strings are at least 7 characters long - goto error; - } - - const Py_ssize_t separator_location = _find_isoformat_separator( - dtstr, len, &mode); - // We only need to sanitize this string if the separator is a surrogate // character. In the situation where the separator location is ambiguous, // we don't have to sanitize it anything because that can only happen when // the separator is either '-' or a number. This should mostly be a noop // but it makes the reference counting easier if we still sanitize. - dtstr_clean = _sanitize_isoformat_str(dtstr, len, separator_location); + PyObject *dtstr_clean = _sanitize_isoformat_str(dtstr); if (dtstr_clean == NULL) { goto error; } + Py_ssize_t len; const char *dt_ptr = PyUnicode_AsUTF8AndSize(dtstr_clean, &len); if (dt_ptr == NULL) { @@ -5426,6 +5445,11 @@ datetime_fromisoformat(PyObject *cls, PyObject *dtstr) } } + unsigned char mode; + const Py_ssize_t separator_location = _find_isoformat_separator( + dt_ptr, len, &mode); + + const char *p = dt_ptr; int year = 0, month = 0, day = 0; From 0234cae2bf0599d678470ef7b690d272164dbb06 Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Tue, 16 Nov 2021 11:52:10 -0500 Subject: [PATCH 06/42] WIP --- isoformatter.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/isoformatter.py b/isoformatter.py index 3103cbe5d100ce..76c8e97975472c 100644 --- a/isoformatter.py +++ b/isoformatter.py @@ -1,7 +1,8 @@ import re import itertools +import functools -from datetime import datetime, time, timedelta, timezone +from datetime import date, datetime, time, timedelta, timezone # import hypothesis from test.support.hypothesis_helper import hypothesis @@ -120,13 +121,22 @@ def __init__(self, format_str): def __repr__(self): return f'{self.__class__.__name__}(\'{self._format_str}\')' - def format(self, dt): + @functools.singledispatchmethod + def format(self, dt : datetime) -> str: """Apply the specified ISO8601 format to a datetime.""" return ( f'{format(dt, self._date_str)}{self._sep}' + f'{self._time_formatter(dt)}{self._tz_formatter(dt)}' ) + @format.register + def _(self, dt: date) -> str: + return f'{format(dt, self._date_str)}' + + @format.register + def _(self, dt: time) -> str: + return f'{self._time_formatter(dt)} + def truncate(self, dt): """Truncate a datetime to the precision level of the format.""" truncator = {} From ee1a7e3208f2f19c0e2cdc4e1383748a8e5fdfcc Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Wed, 27 Apr 2022 14:29:30 -0600 Subject: [PATCH 07/42] Move Isoformatter into test helper, add date/time tests --- Lib/test/isoformat_helper.py | 295 +++++++++++++++++++++++++ isoformatter.py | 408 +++++++++++------------------------ 2 files changed, 417 insertions(+), 286 deletions(-) create mode 100644 Lib/test/isoformat_helper.py diff --git a/Lib/test/isoformat_helper.py b/Lib/test/isoformat_helper.py new file mode 100644 index 00000000000000..d5069939a3c784 --- /dev/null +++ b/Lib/test/isoformat_helper.py @@ -0,0 +1,295 @@ +import re +import itertools +import functools + +from datetime import date, datetime, time, timedelta, timezone + + +class IsoFormatter: + """Helper class to make it possible to round-trip a given ISO 8601 format. + + The main problem this solves is that many ISO 8601 formats are lossy, e.g.:: + + >>> datetime(2022, 5, 19, 12, 30, 15).isoformat(timespec="hours") + 2022-05-19T12 + + This prevents us from easily writing tests that take arbitrary input + datetimes, serializes them to an arbitrary ISO 8601 format and ensures that + the same thing comes back when we try and parse it. + + This class allows you to specify an ISO 8601 format and generate both the + ISO 8601 string and the truncated datetime, like so: + + >>> formatter = IsoFormatter("%Y-%m-%dT%H") + >>> dt = datetime(2022, 5, 19, 12, 30, 15) + >>> formatter.format(dt) + "2022-05-19T12" + >>> formatter.truncate(dt) + datetime.datetime(2022, 5, 19, 12, 0) + """ + + _TZ_RE = re.compile(r"\[TZ:(?P[^\]]+)\]$") + _FLOAT_RE = re.compile(r"%\(f(?P\d+)\)$") + + # Create instances of these unit values for convenience and performance. + _MICROSECOND = timedelta(microseconds=1) + _SECOND = timedelta(seconds=1) + _MINUTE = timedelta(minutes=1) + _HOUR = timedelta(hours=1) + _ZERO = timedelta(0) + + def __init__(self, format_str): + self._format_str = format_str + + if (m := self._TZ_RE.search(format_str)) is not None: + self._tz_str = m.group("fmt") + format_str = format_str[: m.start()] + else: + self._tz_str = None + + try: + time_str_start = format_str.index("%H") + except ValueError: + time_str_start = None + + if time_str_start is not None: + self._time_str = format_str[time_str_start:] + self._sep = format_str[time_str_start - 1] + self._date_str = format_str[: time_str_start - 1] + else: + self._time_str = None + self._sep = "" + self._date_str = format_str + + self._date_str = self._date_str.replace("%Y", "%4Y").replace( + "%G", "%4G" + ) + + self._populate_time() + self._populate_tz() + + if "W" in self._date_str: + expected_components = ("%4G", "%V") + else: + expected_components = ("%4Y", "%m", "%d") + + def __repr__(self): + return f"{self.__class__.__name__}('{self._format_str}')" + + @functools.singledispatchmethod + def format(self, dt: datetime) -> str: + """Apply the specified ISO8601 format to a datetime.""" + return ( + f"{format(dt, self._date_str)}{self._sep}" + + f"{self._time_formatter(dt)}{self._tz_formatter(dt)}" + ) + + @format.register + def _(self, dt: date) -> str: + return f"{format(dt, self._date_str)}" + + @format.register + def _(self, dt: time) -> str: + return f"{self._time_formatter(dt)}{self._tz_formatter(dt)}" + + def truncate(self, dt): + """Truncate a datetime to the precision level of the format.""" + truncator = {} + if "W" in self._date_str and "%u" not in self._date_str: + iso_year, week, weekday = dt.isocalendar() + if weekday != 1: + truncated_dt = datetime.fromisocalendar(iso_year, week, 1) + for comp in ("year", "month", "day"): + if getattr(dt, comp) != ( + new_comp := getattr(truncated_dt, comp) + ): + truncator[comp] = new_comp + + if isinstance(dt, (datetime, time)): + truncator.update(self._time_truncator(dt)) + truncator.update(self._tz_truncator(dt)) + + if truncator: + return dt.replace(**truncator) + else: + return dt + + def _populate_time(self): + if self._time_str is not None: + time_formatter, time_truncation = self._make_timelike_formatter( + self._time_str + ) + self._time_formatter = time_formatter + self._time_truncator = self._make_time_truncator(time_truncation) + else: + self._time_formatter = self._null_formatter + self._time_truncator = self._make_time_truncator(timedelta(days=1)) + + def _populate_tz(self): + if self._tz_str is not None: + if self._tz_str == "Z": + self._tz_formatter = self._tz_z_formatter + self._tz_truncator = self._make_tz_truncator(None) + else: + base_formatter, tz_truncation = self._make_timelike_formatter( + self._tz_str + ) + + self._tz_formatter = self._make_tz_formatter(base_formatter) + self._tz_truncator = self._make_tz_truncator(tz_truncation) + else: + self._tz_formatter = self._null_formatter + self._tz_truncator = self._remove_tzinfo_truncator + + def _make_timelike_formatter(self, time_str): + time_elements = ("%(f", "%S", "%M", "%H") + truncation_elements = (None, self._SECOND, self._MINUTE, self._HOUR) + + truncation = None + for i, elem in enumerate(time_elements): + if elem in time_str: + assert self._all_in( + time_str, time_elements[(i + 1) :] + ), f"Invalid time str: {time_str}" + truncation = truncation_elements[i] + break + else: + assert False, f"Invalid time str: {time_str}" + + if (m := self._FLOAT_RE.search(time_str)) is not None: + time_str = time_str[: m.start()] + + precision = int(m.group("prec")) + assert precision > 0, "0 and negative precision is not supported" + + truncation = timedelta(microseconds=10 ** (6 - min(6, precision))) + + def format_time(dt, *, time_str=time_str, precision=precision): + if precision < 7: + return ( + format(dt, time_str) + + f"{dt.microsecond:06d}"[0:precision] + ) + else: + return ( + format(dt, time_str) + + f"{dt.microsecond:06d}" + + "0" * (precision - 6) + ) + + else: + + def format_time(dt, *, time_str=time_str): + return format(dt, time_str) + + return format_time, truncation + + _ARBITRARY_DT = datetime(2000, 1, 1) + + def _make_tz_formatter(self, base_formatter): + def tz_formatter(dt, *, _self=self, _base_formatter=base_formatter): + if dt.tzinfo is None: + return "" + utcoffset = dt.utcoffset() + + t = self._ARBITRARY_DT + abs(utcoffset) + + sign = "+" if utcoffset >= _self._ZERO else "-" + + return sign + _base_formatter(t) + + return tz_formatter + + def _make_time_truncator(self, truncation): + if truncation is None: + + def time_truncator(dt): + return {} + + else: + + def time_truncator(dt, *, _time_truncation=truncation): + time_as_td = timedelta( + hours=dt.hour, + minutes=dt.minute, + seconds=dt.second, + microseconds=dt.microsecond, + ) + truncated = _time_truncation * (time_as_td // _time_truncation) + + if truncated == time_as_td: + return {} + + td_as_datetime = datetime(1970, 1, 1) + truncated + return { + component: getattr(td_as_datetime, component) + for component in ("hour", "minute", "second", "microsecond") + } + + return time_truncator + + def _make_tz_truncator(self, truncation): + if truncation is None: + + def tz_truncator(dt): + return {} + + else: + + def tz_truncator(dt, *, _tz_truncation=truncation): + if dt.tzinfo is None: + return {} + + offset = dt.utcoffset() + sign = -1 if offset < self._ZERO else 1 + + tmp, remainder = divmod(abs(offset), _tz_truncation) + if not remainder: + return {} + + new_offset = tmp * _tz_truncation + new_tzinfo = timezone(sign * new_offset) + return {"tzinfo": new_tzinfo} + + return tz_truncator + + def _null_formatter(self, dt): + return "" + + def _remove_tzinfo_truncator(self, dt): + if dt.tzinfo is not None: + return {"tzinfo": None} + return {} + + def _tz_z_formatter(self, dt): + if dt.tzinfo is None: + return "" + + utcoffset = dt.utcoffset() + + if utcoffset == timedelta(0): + return "Z" + + hours, rem = divmod(utcoffset, timedelta(hours=1)) + + rv = f"{hours:+03d}" + if not rem: + return rv + + minutes, rem = divmod(rem, timedelta(minutes=1)) + rv += f":{rem.total_seconds():02f}" + if not rem: + return rv + + microseconds = rem // timedelta(microseconds=1) + rv += f".{microseconds:06d}" + return rv + + @staticmethod + def _all_in(string, substrings): + for substring in substrings: + if substring not in string: + return False + return True + + diff --git a/isoformatter.py b/isoformatter.py index 76c8e97975472c..6530c3e39813fa 100644 --- a/isoformatter.py +++ b/isoformatter.py @@ -1,36 +1,37 @@ -import re -import itertools import functools +import itertools from datetime import date, datetime, time, timedelta, timezone # import hypothesis from test.support.hypothesis_helper import hypothesis +from test.isoformat_helper import IsoFormatter + import unittest def _valid_date_formats(): - return ('%Y-%m-%d', '%Y%m%d', '%G-W%V', '%GW%V', '%G-W%V-%u', '%GW%V%u') + return ("%Y-%m-%d", "%Y%m%d", "%G-W%V", "%GW%V", "%G-W%V-%u", "%GW%V%u") def _valid_time_formats(max_precision=9): subsecond_format_tuples = itertools.product( - ('%H:%M:%S', '%H%M%S'), - (f'%(f{prec})' for prec in range(1, max_precision)), + ("%H:%M:%S", "%H%M%S"), + (f"%(f{prec})" for prec in range(1, max_precision)), ) subsecond_formats = ( - ('.'.join(comps), ','.join(comps)) for comps in subsecond_format_tuples + (".".join(comps), ",".join(comps)) for comps in subsecond_format_tuples ) - time_formats = ('%H', '%H:%M', '%H:%M:%S', '%H%M', '%H%M%S') + tuple( + time_formats = ("%H", "%H:%M", "%H:%M:%S", "%H%M", "%H%M%S") + tuple( itertools.chain.from_iterable(subsecond_formats) ) - tz_formats = ('',) + tuple( - (f'[TZ:{tz_fmt}]' for tz_fmt in time_formats + ('Z',)) + tz_formats = ("",) + tuple( + (f"[TZ:{tz_fmt}]" for tz_fmt in time_formats + ("Z",)) ) - return tuple(map(''.join, itertools.product(time_formats, tz_formats))) + return tuple(map("".join, itertools.product(time_formats, tz_formats))) VALID_DATE_FORMATS = _valid_date_formats() @@ -52,291 +53,28 @@ def _make_isoformatter_strategy(): return hypothesis.strategies.tuples( hypothesis.strategies.sampled_from(VALID_DATE_FORMATS), time_format - ).map(lambda x: IsoFormatter(''.join((x[0],) + x[1]))) + ).map(lambda x: IsoFormatter("".join((x[0],) + x[1]))) +DATE_ISOFORMATTERS = hypothesis.strategies.sampled_from(VALID_DATE_FORMATS).map( + IsoFormatter +) +TIME_ISOFORMATTERS = hypothesis.strategies.sampled_from(VALID_TIME_FORMATS).map( + IsoFormatter +) ISOFORMATTERS = _make_isoformatter_strategy() +FIXED_TIMEZONES = hypothesis.strategies.timedeltas( + min_value=timedelta(hours=-23, minutes=59, seconds=59, microseconds=999999), + max_value=timedelta(hours=23, minutes=59, seconds=59, microseconds=999999), +).map(timezone) TIMEZONES = hypothesis.strategies.one_of( hypothesis.strategies.none(), - hypothesis.strategies.timedeltas( - min_value=timedelta( - hours=-23, minutes=59, seconds=59, microseconds=999999 - ), - max_value=timedelta( - hours=23, minutes=59, seconds=59, microseconds=999999 - ), - ).map(timezone), + FIXED_TIMEZONES, hypothesis.strategies.timezones(), ) -class IsoFormatter: - _TZ_RE = re.compile(r'\[TZ:(?P[^\]]+)\]$') - _FLOAT_RE = re.compile(r'%\(f(?P\d+)\)$') - - _MICROSECOND = timedelta(microseconds=1) - _SECOND = timedelta(seconds=1) - _MINUTE = timedelta(minutes=1) - _HOUR = timedelta(hours=1) - _ZERO = timedelta(0) - - def __init__(self, format_str): - self._format_str = format_str - - if (m := self._TZ_RE.search(format_str)) is not None: - self._tz_str = m.group('fmt') - format_str = format_str[: m.start()] - else: - self._tz_str = None - - try: - time_str_start = format_str.index('%H') - except ValueError: - time_str_start = None - - if time_str_start is not None: - self._time_str = format_str[time_str_start:] - self._sep = format_str[time_str_start - 1] - self._date_str = format_str[: time_str_start - 1] - else: - self._time_str = None - self._sep = '' - self._date_str = format_str - - self._date_str = self._date_str.replace("%Y", "%4Y").replace( - "%G", "%4G" - ) - - self._populate_time() - self._populate_tz() - - if 'W' in self._date_str: - expected_components = ('%4G', '%V') - else: - expected_components = ('%4Y', '%m', '%d') - assert self._all_in( - self._date_str, expected_components - ), f'Must specify all date components: {self._format_str}' - - def __repr__(self): - return f'{self.__class__.__name__}(\'{self._format_str}\')' - - @functools.singledispatchmethod - def format(self, dt : datetime) -> str: - """Apply the specified ISO8601 format to a datetime.""" - return ( - f'{format(dt, self._date_str)}{self._sep}' - + f'{self._time_formatter(dt)}{self._tz_formatter(dt)}' - ) - - @format.register - def _(self, dt: date) -> str: - return f'{format(dt, self._date_str)}' - - @format.register - def _(self, dt: time) -> str: - return f'{self._time_formatter(dt)} - - def truncate(self, dt): - """Truncate a datetime to the precision level of the format.""" - truncator = {} - if 'W' in self._date_str and '%u' not in self._date_str: - iso_year, week, weekday = dt.isocalendar() - if weekday != 1: - truncated_dt = datetime.fromisocalendar(iso_year, week, 1) - for comp in ('year', 'month', 'day'): - if getattr(dt, comp) != ( - new_comp := getattr(truncated_dt, comp) - ): - truncator[comp] = new_comp - - truncator.update(self._time_truncator(dt)) - truncator.update(self._tz_truncator(dt)) - - if truncator: - return dt.replace(**truncator) - else: - return dt - - def _populate_time(self): - if self._time_str is not None: - time_formatter, time_truncation = self._make_timelike_formatter( - self._time_str - ) - self._time_formatter = time_formatter - self._time_truncator = self._make_time_truncator(time_truncation) - else: - self._time_formatter = self._null_formatter - self._time_truncator = self._make_time_truncator(timedelta(days=1)) - - def _populate_tz(self): - if self._tz_str is not None: - if self._tz_str == 'Z': - self._tz_formatter = self._tz_z_formatter - self._tz_truncator = self._make_tz_truncator(None) - else: - base_formatter, tz_truncation = self._make_timelike_formatter( - self._tz_str - ) - - self._tz_formatter = self._make_tz_formatter(base_formatter) - self._tz_truncator = self._make_tz_truncator(tz_truncation) - else: - self._tz_formatter = self._null_formatter - self._tz_truncator = self._remove_tzinfo_truncator - - def _make_timelike_formatter(self, time_str): - time_elements = ('%(f', '%S', '%M', '%H') - truncation_elements = (None, self._SECOND, self._MINUTE, self._HOUR) - - truncation = None - for i, elem in enumerate(time_elements): - if elem in time_str: - assert self._all_in( - time_str, time_elements[(i + 1) :] - ), f'Invalid time str: {time_str}' - truncation = truncation_elements[i] - break - else: - assert False, f'Invalid time str: {time_str}' - - if (m := self._FLOAT_RE.search(time_str)) is not None: - time_str = time_str[: m.start()] - - precision = int(m.group('prec')) - assert precision > 0, '0 and negative precision is not supported' - - truncation = timedelta(microseconds=10 ** (6 - min(6, precision))) - - def format_time(dt, *, time_str=time_str, precision=precision): - if precision < 7: - return ( - format(dt, time_str) - + f'{dt.microsecond:06d}'[0:precision] - ) - else: - return ( - format(dt, time_str) - + f'{dt.microsecond:06d}' - + '0' * (precision - 6) - ) - - else: - - def format_time(dt, *, time_str=time_str): - return format(dt, time_str) - - return format_time, truncation - - _ARBITRARY_DT = datetime(2000, 1, 1) - - def _make_tz_formatter(self, base_formatter): - def tz_formatter(dt, *, _self=self, _base_formatter=base_formatter): - if dt.tzinfo is None: - return '' - utcoffset = dt.utcoffset() - - t = self._ARBITRARY_DT + abs(utcoffset) - - sign = '+' if utcoffset >= _self._ZERO else '-' - - return sign + _base_formatter(t) - - return tz_formatter - - def _make_time_truncator(self, truncation): - if truncation is None: - - def time_truncator(dt): - return {} - - else: - - def time_truncator(dt, *, _time_truncation=truncation): - time_as_td = timedelta( - hours=dt.hour, - minutes=dt.minute, - seconds=dt.second, - microseconds=dt.microsecond, - ) - truncated = _time_truncation * (time_as_td // _time_truncation) - - if truncated == time_as_td: - return {} - - td_as_datetime = datetime(1970, 1, 1) + truncated - return { - component: getattr(td_as_datetime, component) - for component in ('hour', 'minute', 'second', 'microsecond') - } - - return time_truncator - - def _make_tz_truncator(self, truncation): - if truncation is None: - - def tz_truncator(dt): - return {} - - else: - - def tz_truncator(dt, *, _tz_truncation=truncation): - if dt.tzinfo is None: - return {} - - offset = dt.utcoffset() - sign = -1 if offset < self._ZERO else 1 - - tmp, remainder = divmod(abs(offset), _tz_truncation) - if not remainder: - return {} - - new_offset = tmp * _tz_truncation - new_tzinfo = timezone(sign * new_offset) - return {'tzinfo': new_tzinfo} - - return tz_truncator - - def _null_formatter(self, dt): - return '' - - def _remove_tzinfo_truncator(self, dt): - if dt.tzinfo is not None: - return {'tzinfo': None} - return {} - - def _tz_z_formatter(self, dt): - if dt.tzinfo is None: - return '' - - utcoffset = dt.utcoffset() - - if utcoffset == timedelta(0): - return 'Z' - - hours, rem = divmod(utcoffset, timedelta(hours=1)) - - rv = f'{hours:+03d}' - if not rem: - return rv - - minutes, rem = divmod(rem, timedelta(minutes=1)) - rv += f':{rem.total_seconds():02f}' - if not rem: - return rv - - microseconds = rem // timedelta(microseconds=1) - rv += f'.{microseconds:06d}' - return rv - - @staticmethod - def _all_in(string, substrings): - for substring in substrings: - if substring not in string: - return False - return True - - +DEFAULT_D = date(2025, 1, 2) DEFAULT_DT = datetime(2025, 1, 2, 3, 4, 5, 678901) AWARE_UTC_DT = datetime(2025, 1, 2, 3, 4, 5, 678901, tzinfo=timezone.utc) AWARE_POS_DT = datetime( @@ -347,7 +85,105 @@ def _all_in(string, substrings): ) +def _cross_product_examples(**kwargs): + params, values = zip(*kwargs.items()) + + example_stack = [] + for value_set in itertools.product(*values): + example_stack.append(hypothesis.example(**dict(zip(params, value_set)))) + + return functools.reduce(lambda a, b: a(b), example_stack) + + class IsoFormatTest(unittest.TestCase): + @hypothesis.given( + d=hypothesis.strategies.dates(), + iso_formatter=DATE_ISOFORMATTERS, + ) + @_cross_product_examples( + d=[ + date(2025, 1, 2), + date(2000, 1, 1), + date(1, 1, 1), + date(9999, 12, 31), + ], + iso_formatter=map(IsoFormatter, ["%Y-%m-%d", "%Y%m%d"]), + ) + @_cross_product_examples( + d=[date(2025, 1, 2), date(2025, 12, 31), date(2023, 1, 1)], + iso_formatter=map( + IsoFormatter, ["%G-W%V", "%GW%V", "%G-W%V-%u", "%GW%V%u"] + ), + ) + def test_dates(self, d, iso_formatter): + input_str = iso_formatter.format(d) + actual = type(d).fromisoformat(input_str) + expected = iso_formatter.truncate(d) + + self.assertEqual( + actual, + expected, + f"\n{actual} != {expected}\n" + + f"actual = {actual!r}\n" + + f"expected = {expected!r}\n" + + f"input_str = {input_str}\n" + + f"formatter = {iso_formatter!r}", + ) + + @hypothesis.given( + t=hypothesis.strategies.times( + timezones=FIXED_TIMEZONES | hypothesis.strategies.none() + ), + iso_formatter=TIME_ISOFORMATTERS, + ) + @_cross_product_examples( + t=[ + time(0, 0), + time(12, 0), + time(23, 59, 59, 999999), + time(12, 0, tzinfo=timezone.utc), + time(12, 0, tzinfo=timezone(timedelta(hours=-5))), + ], + iso_formatter=map( + IsoFormatter, + [ + "%H:%M:%S", + "%H%M%S", + "%H:%M:%S.%f", + "%H%M%S.%f", + "%H:%M:%S[TZ:%H:%M]", + "%H:%M:%S[TZ:%H%M]", + ], + ), + ) + @hypothesis.example( + t=time(0, 0, tzinfo=timezone.utc), + iso_formatter=IsoFormatter("%H:%M:%S[TZ:Z]"), + ) + @_cross_product_examples( + t=[ + time(0, 0, tzinfo=timezone(timedelta(hours=5, minutes=30))), + ], + iso_formatter=map( + IsoFormatter, ("%H:%M:%S[TZ:%H]", "%H:%M:%S[TZ:%H:%M]") + ), + ) + def test_times(self, t, iso_formatter): + input_str = iso_formatter.format(t) + actual = type(t).fromisoformat(input_str) + expected = iso_formatter.truncate(t) + + self.assertEqual( + actual, + expected, + f"\n{actual} != {expected}\n" + + f"actual = {actual!r}\n" + + f"expected = {expected!r} \n" + + f"input_str = {input_str}\n" + + f"formatter = {iso_formatter!r}", + ) + + @unittest.skip("Broken atm") @hypothesis.given( dt=hypothesis.strategies.datetimes(timezones=TIMEZONES), iso_formatter=ISOFORMATTERS, From 7d2fd330ae4b726bd50de954fd1490f9331febe1 Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Wed, 27 Apr 2022 16:39:22 -0600 Subject: [PATCH 08/42] Final location for isoformatter and strategies --- Lib/test/isoformat_helpers/__init__.py | 0 .../isoformatter.py} | 0 Lib/test/isoformat_helpers/strategies.py | 67 +++++++++++++++++++ 3 files changed, 67 insertions(+) create mode 100644 Lib/test/isoformat_helpers/__init__.py rename Lib/test/{isoformat_helper.py => isoformat_helpers/isoformatter.py} (100%) create mode 100644 Lib/test/isoformat_helpers/strategies.py diff --git a/Lib/test/isoformat_helpers/__init__.py b/Lib/test/isoformat_helpers/__init__.py new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/Lib/test/isoformat_helper.py b/Lib/test/isoformat_helpers/isoformatter.py similarity index 100% rename from Lib/test/isoformat_helper.py rename to Lib/test/isoformat_helpers/isoformatter.py diff --git a/Lib/test/isoformat_helpers/strategies.py b/Lib/test/isoformat_helpers/strategies.py new file mode 100644 index 00000000000000..ca3ceb348edb86 --- /dev/null +++ b/Lib/test/isoformat_helpers/strategies.py @@ -0,0 +1,67 @@ +from datetime import date, datetime, time, timedelta, timezone +import itertools + +from test.support.hypothesis_helper import hypothesis + +from .isoformatter import IsoFormatter + +def _valid_date_formats(): + return ("%Y-%m-%d", "%Y%m%d", "%G-W%V", "%GW%V", "%G-W%V-%u", "%GW%V%u") + + +def _valid_time_formats(max_precision=9): + subsecond_format_tuples = itertools.product( + ("%H:%M:%S", "%H%M%S"), + (f"%(f{prec})" for prec in range(1, max_precision)), + ) + subsecond_formats = ( + (".".join(comps), ",".join(comps)) for comps in subsecond_format_tuples + ) + time_formats = ("%H", "%H:%M", "%H:%M:%S", "%H%M", "%H%M%S") + tuple( + itertools.chain.from_iterable(subsecond_formats) + ) + + tz_formats = ("",) + tuple( + (f"[TZ:{tz_fmt}]" for tz_fmt in time_formats + ("Z",)) + ) + + return tuple(map("".join, itertools.product(time_formats, tz_formats))) + +def _make_isoformatter_strategy(): + time_format = hypothesis.strategies.one_of( + hypothesis.strategies.just(()), # No time format + hypothesis.strategies.tuples( + hypothesis.strategies.one_of( + hypothesis.strategies.just("T"), # Shrink towards T and space + hypothesis.strategies.just(" "), + hypothesis.strategies.characters(), + ), + hypothesis.strategies.sampled_from(VALID_TIME_FORMATS), + ), + ) + + return hypothesis.strategies.tuples( + hypothesis.strategies.sampled_from(VALID_DATE_FORMATS), time_format + ).map(lambda x: IsoFormatter("".join((x[0],) + x[1]))) + + + +VALID_DATE_FORMATS = _valid_date_formats() +VALID_TIME_FORMATS = _valid_time_formats() + +DATE_ISOFORMATTERS = hypothesis.strategies.sampled_from(VALID_DATE_FORMATS).map( + IsoFormatter +) +TIME_ISOFORMATTERS = hypothesis.strategies.sampled_from(VALID_TIME_FORMATS).map( + IsoFormatter +) +ISOFORMATTERS = _make_isoformatter_strategy() +FIXED_TIMEZONES = hypothesis.strategies.timedeltas( + min_value=timedelta(hours=-23, minutes=59, seconds=59, microseconds=999999), + max_value=timedelta(hours=23, minutes=59, seconds=59, microseconds=999999), +).map(timezone) +TIMEZONES = hypothesis.strategies.one_of( + hypothesis.strategies.none(), + FIXED_TIMEZONES, + hypothesis.strategies.timezones(), +) From 72266c42af58fc4a3f25f9850410985e82f78b55 Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Wed, 27 Apr 2022 16:40:18 -0600 Subject: [PATCH 09/42] Working version of date.isoformat --- Lib/datetime.py | 98 ++++++++++++++++++++++++-------------- Lib/test/datetimetester.py | 72 +++++++++++++++++++++++++++- 2 files changed, 134 insertions(+), 36 deletions(-) diff --git a/Lib/datetime.py b/Lib/datetime.py index 7f79aa436eb5ea..02b7a0d9c1aa0c 100644 --- a/Lib/datetime.py +++ b/Lib/datetime.py @@ -265,18 +265,38 @@ def _wrap_strftime(object, format, timetuple): def _parse_isoformat_date(dtstr): # It is assumed that this function will only be called with a # string of length exactly 10, and (though this is not used) ASCII-only + assert len(dtstr) in (7, 8, 10) year = int(dtstr[0:4]) - if dtstr[4] != '-': - raise ValueError('Invalid date separator: %s' % dtstr[4]) + has_sep = dtstr[4] == '-' - month = int(dtstr[5:7]) + pos = 4 + has_sep + if dtstr[pos:pos + 1] == "W": + # YYYY-?Www-?D? + pos += 1 + weekno = int(dtstr[pos:pos + 2]) + pos += 2 + + dayno = 1 + if len(dtstr) > pos: + if (dtstr[pos:pos + 1] == '-') != has_sep: + raise ValueError('Inconsistent use of dash separator') + + pos += has_sep - if dtstr[7] != '-': - raise ValueError('Invalid date separator') + dayno = int(dtstr[pos:pos + 1]) + + return _isoweek_to_gregorian(year, weekno, dayno) + else: + month = int(dtstr[pos:pos + 2]) + pos += 2 + if (dtstr[pos:pos + 1] == "-") != has_sep: + raise ValueError('Inconsistent use of dash separator') - day = int(dtstr[8:10]) + pos += has_sep + day = int(dtstr[pos:pos + 2]) + + return year, month, day - return [year, month, day] def _parse_hh_mm_ss_ff(tstr): # Parses things of the form HH[:MM[:SS[.fff[fff]]]] @@ -356,6 +376,38 @@ def _parse_isoformat_time(tstr): return time_comps +# tuple[int, int, int] -> tuple[int, int, int] version of date.fromisocalendar +def _isoweek_to_gregorian(year, week, day): + # Year is bounded this way because 9999-12-31 is (9999, 52, 5) + if not MINYEAR <= year <= MAXYEAR: + raise ValueError(f"Year is out of range: {year}") + + if not 0 < week < 53: + out_of_range = True + + if week == 53: + # ISO years have 53 weeks in them on years starting with a + # Thursday and leap years starting on a Wednesday + first_weekday = _ymd2ord(year, 1, 1) % 7 + if (first_weekday == 4 or (first_weekday == 3 and + _is_leap(year))): + out_of_range = False + + if out_of_range: + raise ValueError(f"Invalid week: {week}") + + if not 0 < day < 8: + raise ValueError(f"Invalid weekday: {day} (range is [1, 7])") + + # Now compute the offset from (Y, 1, 1) in days: + day_offset = (week - 1) * 7 + (day - 1) + + # Calculate the ordinal day for monday, week 1 + day_1 = _isoweek1monday(year) + ord_day = day_1 + day_offset + + return _ord2ymd(ord_day) + # Just raise TypeError if the arg isn't None or a string. def _check_tzname(name): @@ -851,8 +903,10 @@ def fromisoformat(cls, date_string): if not isinstance(date_string, str): raise TypeError('fromisoformat: argument must be str') + if len(date_string) not in (7, 8, 10): + raise ValueError(f'Invalid isoformat string: {date_string!r}') + try: - assert len(date_string) == 10 return cls(*_parse_isoformat_date(date_string)) except Exception: raise ValueError(f'Invalid isoformat string: {date_string!r}') @@ -862,33 +916,7 @@ def fromisocalendar(cls, year, week, day): """Construct a date from the ISO year, week number and weekday. This is the inverse of the date.isocalendar() function""" - # Year is bounded this way because 9999-12-31 is (9999, 52, 5) - if not MINYEAR <= year <= MAXYEAR: - raise ValueError(f"Year is out of range: {year}") - - if not 0 < week < 53: - out_of_range = True - - if week == 53: - # ISO years have 53 weeks in them on years starting with a - # Thursday and leap years starting on a Wednesday - first_weekday = _ymd2ord(year, 1, 1) % 7 - if (first_weekday == 4 or (first_weekday == 3 and - _is_leap(year))): - out_of_range = False - - if out_of_range: - raise ValueError(f"Invalid week: {week}") - - if not 0 < day < 8: - raise ValueError(f"Invalid weekday: {day} (range is [1, 7])") - - # Now compute the offset from (Y, 1, 1) in days: - day_offset = (week - 1) * 7 + (day - 1) - - # Calculate the ordinal day for monday, week 1 - day_1 = _isoweek1monday(year) - ord_day = day_1 + day_offset + return cls(*_isoweek_to_gregorian(year, week, day)) return cls(*_ord2ymd(ord_day)) diff --git a/Lib/test/datetimetester.py b/Lib/test/datetimetester.py index d85b5466f7fc28..c8f3d46da5af05 100644 --- a/Lib/test/datetimetester.py +++ b/Lib/test/datetimetester.py @@ -7,6 +7,7 @@ import bisect import copy import decimal +import functools import sys import os import pickle @@ -20,7 +21,10 @@ from operator import lt, le, gt, ge, eq, ne, truediv, floordiv, mod from test import support +from test.isoformat_helpers.isoformatter import IsoFormatter +from test.isoformat_helpers import strategies as iso_strategies from test.support import is_resource_enabled, ALWAYS_EQ, LARGEST, SMALLEST +from test.support.hypothesis_helper import hypothesis import datetime as datetime_module from datetime import MINYEAR, MAXYEAR @@ -57,6 +61,36 @@ NAN = float("nan") +def _cross_product_examples(**kwargs): + """Adds the cross-product of multiple hypothesis examples. + + This is a helper function to make specifying a bunch of examples less + complicated. By example: + + @_cross_product_examples(a=[1, 2], b=["a", "b"]) + def test_x(a, b): + ... + + Is equivalent to this (order not guaranteed): + + @hypothesis.example(a=1, b="a") + @hypothesis.example(a=2, b="a") + @hypothesis.example(a=1, b="b") + @hypothesis.example(a=2, b="b") + def test_x(a, b): + ... + """ + params, values = zip(*kwargs.items()) + + def inner(f): + out = f + for value_set in itertools.product(*values): + out = hypothesis.example(**dict(zip(params, value_set)))(out) + return out + + return inner + + ############################################################################# # module tests @@ -1862,7 +1896,6 @@ def test_fromisoformat_fails(self): '2009-12-0a', # Invalid character in day '2009-01-32', # Invalid day '2009-02-29', # Invalid leap day - '20090228', # Valid ISO8601 output not from isoformat() '2009\ud80002\ud80028', # Separators are surrogate codepoints ] @@ -1877,6 +1910,43 @@ def test_fromisoformat_fails_typeerror(self): with self.assertRaises(TypeError): self.theclass.fromisoformat(bad_type) + @hypothesis.given( + d=hypothesis.strategies.dates(), + iso_formatter=iso_strategies.DATE_ISOFORMATTERS, + ) + @_cross_product_examples( + d=[ + date(2025, 1, 2), + date(2000, 1, 1), + date(1, 1, 1), + date(9999, 12, 31), + ], + iso_formatter=map(IsoFormatter, ["%Y-%m-%d", "%Y%m%d"]), + ) + @_cross_product_examples( + d=[date(2025, 1, 2), date(2025, 12, 31), date(2023, 1, 1)], + iso_formatter=map( + IsoFormatter, ["%G-W%V", "%GW%V", "%G-W%V-%u", "%GW%V%u"] + ), + ) + def test_fromisoformat_dates(self, d, iso_formatter): + if type(d) != self.theclass: + d = self.theclass(d.year, d.month, d.day) + + input_str = iso_formatter.format(d) + actual = self.theclass.fromisoformat(input_str) + expected = iso_formatter.truncate(d) + + self.assertEqual( + actual, + expected, + f"\n{actual} != {expected}\n" + + f"actual = {actual!r}\n" + + f"expected = {expected!r}\n" + + f"input_str = {input_str}\n" + + f"formatter = {iso_formatter!r}", + ) + def test_fromisocalendar(self): # For each test case, assert that fromisocalendar is the # inverse of the isocalendar function From 8067af1b3245d0299447a96aaeafca85ef6165dc Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Sun, 1 May 2022 13:01:37 -0600 Subject: [PATCH 10/42] Fix failure to set an error --- Modules/_datetimemodule.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/_datetimemodule.c b/Modules/_datetimemodule.c index 7d9f40e37521aa..37dfe5b28e7469 100644 --- a/Modules/_datetimemodule.c +++ b/Modules/_datetimemodule.c @@ -5429,7 +5429,7 @@ datetime_fromisoformat(PyObject *cls, PyObject *dtstr) // but it makes the reference counting easier if we still sanitize. PyObject *dtstr_clean = _sanitize_isoformat_str(dtstr); if (dtstr_clean == NULL) { - goto error; + goto invalid_string_error; } Py_ssize_t len; From 7b9bca528fda5f6db29569e92224e7b144db79fd Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Sun, 1 May 2022 13:03:00 -0600 Subject: [PATCH 11/42] First version with time parsing allowed --- Lib/datetime.py | 123 ++++++++++++++++++++++++++++++------- Lib/test/datetimetester.py | 94 ++++++++++++++++++++++++++-- Lib/test/test_datetime.py | 4 +- 3 files changed, 195 insertions(+), 26 deletions(-) diff --git a/Lib/datetime.py b/Lib/datetime.py index 02b7a0d9c1aa0c..ed34e0c9823330 100644 --- a/Lib/datetime.py +++ b/Lib/datetime.py @@ -262,6 +262,60 @@ def _wrap_strftime(object, format, timetuple): return _time.strftime(newformat, timetuple) # Helpers for parsing the result of isoformat() +def _find_isoformat_separator(dtstr): + # See the comment in _datetimemodule.c:_findisoformat_separator + len_dtstr = len(dtstr) + if len_dtstr == 7: + return 7 + + assert len_dtstr > 7 + date_separator = "-" + week_indicator = "W" + + if dtstr[4] == date_separator: + if dtstr[5] == week_indicator: + if len_dtstr < 8: + raise ValueError("Invalid ISO string") + if len_dtstr > 8 and dtstr[8] == date_separator: + if len_dtstr == 9: + raise ValueError("Invalid ISO string") + if len_dtstr > 10 and dtstr[10].isdigit(): + # This is as far as we need to resolve the ambiguity for + # the moment - if we have YYYY-Www-##, the separator is + # either a hyphen at 8 or a number at 10. + # + # We'll assume it's a hyphen at 8 because it's way more + # likely that someone will use a hyphen as a separator than + # a number, but at this point it's really best effort + # because this is an extension of the spec anyway. + # TODO(pganssle): Document this + return 8 + return 10 + else: + # YYYY-Www (8) + return 8 + else: + # YYYY-MM-DD (10) + return 10 + else: + if dtstr[4] == week_indicator: + # YYYYWww (7) or YYYYWwwd (8) + for idx in range(7, len_dtstr): + if not dtstr[idx].isdigit(): + break + if idx < 9: + return idx + + if idx % 2 == 0: + # If the index of the last number is even, it's YYYYWwwd + return 7 + else: + return 8 + else: + # YYYYMMDD (8) + return 8 + + def _parse_isoformat_date(dtstr): # It is assumed that this function will only be called with a # string of length exactly 10, and (though this is not used) ASCII-only @@ -295,11 +349,14 @@ def _parse_isoformat_date(dtstr): pos += has_sep day = int(dtstr[pos:pos + 2]) - return year, month, day + return [year, month, day] + + +_FRACTION_CORRECTION = [100000, 10000, 1000, 100, 10] def _parse_hh_mm_ss_ff(tstr): - # Parses things of the form HH[:MM[:SS[.fff[fff]]]] + # Parses things of the form HH[:?MM[:?SS[{.,}fff[fff]]]] len_str = len(tstr) time_comps = [0, 0, 0, 0] @@ -313,27 +370,36 @@ def _parse_hh_mm_ss_ff(tstr): pos += 2 next_char = tstr[pos:pos+1] + if comp == 0: + has_sep = next_char == ':' + if not next_char or comp >= 2: break - if next_char != ':': + if has_sep and next_char != ':': raise ValueError('Invalid time separator: %c' % next_char) - pos += 1 + pos += has_sep if pos < len_str: - if tstr[pos] != '.': + if tstr[pos] not in '.,': raise ValueError('Invalid microsecond component') else: pos += 1 len_remainder = len_str - pos - if len_remainder not in (3, 6): - raise ValueError('Invalid microsecond component') - time_comps[3] = int(tstr[pos:]) - if len_remainder == 3: - time_comps[3] *= 1000 + if len_remainder >= 6: + to_parse = 6 + else: + to_parse = len_remainder + + time_comps[3] = int(tstr[pos:(pos+to_parse)]) + if to_parse < 6: + time_comps[3] *= _FRACTION_CORRECTION[to_parse-1] + if (len_remainder > to_parse + and not tstr[(pos+to_parse):].isdigit()): + raise ValueError('Non-digit values in unparsed fraction') return time_comps @@ -343,25 +409,35 @@ def _parse_isoformat_time(tstr): if len_str < 2: raise ValueError('Isoformat time too short') - # This is equivalent to re.search('[+-]', tstr), but faster - tz_pos = (tstr.find('-') + 1 or tstr.find('+') + 1) + # This is equivalent to re.search('[+-Z]', tstr), but faster + tz_pos = (tstr.find('-') + 1 or tstr.find('+') + 1 or tstr.find('Z') + 1) timestr = tstr[:tz_pos-1] if tz_pos > 0 else tstr time_comps = _parse_hh_mm_ss_ff(timestr) tzi = None - if tz_pos > 0: + if tz_pos == len_str and tstr[-1] == 'Z': + tzi = timezone.utc + elif tz_pos > 0: tzstr = tstr[tz_pos:] # Valid time zone strings are: + # HH len: 2 + # HHMM len: 4 # HH:MM len: 5 + # HHMMSS len: 6 # HH:MM:SS len: 8 - # HH:MM:SS.ffffff len: 15 + # HH:MM:SS.f+ len: 10+ - if len(tzstr) not in (5, 8, 15): + if (len_tzstr := len(tzstr)) < 10 and (len_tzstr % 2) and len_tzstr != 5: raise ValueError('Malformed time zone string') - tz_comps = _parse_hh_mm_ss_ff(tzstr) + + if tzstr == 'Z': + tz_comps = (0, 0, 0, 0) + else: + tz_comps = _parse_hh_mm_ss_ff(tzstr) + if all(x == 0 for x in tz_comps): tzi = timezone.utc else: @@ -406,7 +482,7 @@ def _isoweek_to_gregorian(year, week, day): day_1 = _isoweek1monday(year) ord_day = day_1 + day_offset - return _ord2ymd(ord_day) + return list(_ord2ymd(ord_day)) # Just raise TypeError if the arg isn't None or a string. @@ -1743,11 +1819,15 @@ def fromisoformat(cls, date_string): if not isinstance(date_string, str): raise TypeError('fromisoformat: argument must be str') - # Split this at the separator - dstr = date_string[0:10] - tstr = date_string[11:] + if len(date_string) < 7: + raise ValueError(f'Invalid isoformat string: {date_string!r}') + # Split this at the separator try: + separator_location = _find_isoformat_separator(date_string) + dstr = date_string[0:separator_location] + tstr = date_string[(separator_location+1):] + date_components = _parse_isoformat_date(dstr) except ValueError: raise ValueError(f'Invalid isoformat string: {date_string!r}') @@ -2537,7 +2617,8 @@ def _name_from_offset(delta): _format_time, _format_offset, _index, _is_leap, _isoweek1monday, _math, _ord2ymd, _time, _time_class, _tzinfo_class, _wrap_strftime, _ymd2ord, _divide_and_round, _parse_isoformat_date, _parse_isoformat_time, - _parse_hh_mm_ss_ff, _IsoCalendarDate) + _parse_hh_mm_ss_ff, _IsoCalendarDate, _isoweek_to_gregorian, + _find_isoformat_separator, _FRACTION_CORRECTION) # XXX Since import * above excludes names that start with _, # docstring does not get overwritten. In the future, it may be # appropriate to maintain a single module level docstring and diff --git a/Lib/test/datetimetester.py b/Lib/test/datetimetester.py index c8f3d46da5af05..ffa96c28d799fa 100644 --- a/Lib/test/datetimetester.py +++ b/Lib/test/datetimetester.py @@ -3073,6 +3073,18 @@ def test_fromisoformat_timespecs(self): dt_rt = self.theclass.fromisoformat(dtstr) self.assertEqual(dt, dt_rt) + def test_fromisoformat_examples_datetime(self): + test_cases = [ + ('2009-04-19T03:15:45.2345', self.theclass(2009, 4, 19, 3, 15, 45, 234500)), + ('2009-04-19T03:15:45.1234567', self.theclass(2009, 4, 19, 3, 15, 45, 123456)), + ] + + for input_str, expected in test_cases: + with self.subTest(input_str=input_str): + actual = self.theclass.fromisoformat(input_str) + + self.assertEqual(actual, expected) + def test_fromisoformat_fails_datetime(self): # Test that fromisoformat() fails on invalid values bad_strs = [ @@ -3086,8 +3098,6 @@ def test_fromisoformat_fails_datetime(self): '2009-04-19T03;15:45', # Bad first time separator '2009-04-19T03:15;45', # Bad second time separator '2009-04-19T03:15:4500:00', # Bad time zone separator - '2009-04-19T03:15:45.2345', # Too many digits for milliseconds - '2009-04-19T03:15:45.1234567', # Too many digits for microseconds '2009-04-19T03:15:45.123456+24:30', # Invalid time zone offset '2009-04-19T03:15:45.123456-24:30', # Invalid negative offset '2009-04-10ᛇᛇᛇᛇᛇ12:15', # Too many unicode separators @@ -4032,6 +4042,24 @@ def test_fromisoformat_timespecs(self): t_rt = self.theclass.fromisoformat(tstr) self.assertEqual(t, t_rt) + def test_fromisoformat_fractions(self): + strs = [ + ('12:30:45.1', (12, 30, 45, 100000)), + ('12:30:45.12', (12, 30, 45, 120000)), + ('12:30:45.123', (12, 30, 45, 123000)), + ('12:30:45.1234', (12, 30, 45, 123400)), + ('12:30:45.12345', (12, 30, 45, 123450)), + ('12:30:45.123456', (12, 30, 45, 123456)), + ('12:30:45.1234567', (12, 30, 45, 123456)), + ('12:30:45.12345678', (12, 30, 45, 123456)), + ] + + for time_str, time_comps in strs: + expected = self.theclass(*time_comps) + actual = self.theclass.fromisoformat(time_str) + + self.assertEqual(actual, expected) + def test_fromisoformat_fails(self): bad_strs = [ '', # Empty string @@ -4045,15 +4073,17 @@ def test_fromisoformat_fails(self): '1a:30:45.334034', # Invalid character in hours '12:a0:45.334034', # Invalid character in minutes '12:30:a5.334034', # Invalid character in seconds - '12:30:45.1234', # Too many digits for milliseconds - '12:30:45.1234567', # Too many digits for microseconds '12:30:45.123456+24:30', # Invalid time zone offset '12:30:45.123456-24:30', # Invalid negative offset '12:30:45', # Uses full-width unicode colons + '12:30:45.123456a', # Non-numeric data after 6 components + '12:30:45.123456789a', # Non-numeric data after 9 components '12:30:45․123456', # Uses \u2024 in place of decimal point '12:30:45a', # Extra at tend of basic time '12:30:45.123a', # Extra at end of millisecond time '12:30:45.123456a', # Extra at end of microsecond time + '12:30:45.123456-', # Extra at end of microsecond time + '12:30:45.123456+', # Extra at end of microsecond time '12:30:45.123456+12:00:30a', # Extra at end of full time ] @@ -4080,6 +4110,62 @@ class TimeSubclass(self.theclass): self.assertEqual(tsc, tsc_rt) self.assertIsInstance(tsc_rt, TimeSubclass) + @hypothesis.given( + t=hypothesis.strategies.times( + timezones=iso_strategies.FIXED_TIMEZONES | hypothesis.strategies.none() + ), + iso_formatter=iso_strategies.TIME_ISOFORMATTERS, + ) + @_cross_product_examples( + t=[ + time(0, 0), + time(12, 0), + time(23, 59, 59, 999999), + time(12, 0, tzinfo=timezone.utc), + time(12, 0, tzinfo=timezone(timedelta(hours=-5))), + ], + iso_formatter=map( + IsoFormatter, + [ + "%H:%M:%S", + "%H%M%S", + "%H:%M:%S.%(f6)", + "%H%M%S.%(f6)", + "%H:%M:%S.%(f3)", + "%H%M%S.%(f3)", + "%H:%M:%S[TZ:%H:%M]", + "%H:%M:%S[TZ:%H%M]", + ], + ), + ) + @hypothesis.example( + t=time(0, 0, tzinfo=timezone.utc), + iso_formatter=IsoFormatter("%H:%M:%S[TZ:Z]"), + ) + @_cross_product_examples( + t=[ + time(0, 0, tzinfo=timezone(timedelta(hours=5, minutes=30))), + ], + iso_formatter=map( + IsoFormatter, ("%H:%M:%S[TZ:%H]", "%H:%M:%S[TZ:%H:%M]") + ), + ) + def test_isoformat_times(self, t, iso_formatter): + input_str = iso_formatter.format(t) + actual = type(t).fromisoformat(input_str) + expected = iso_formatter.truncate(t) + + self.assertEqual( + actual, + expected, + f"\n{actual} != {expected}\n" + + f"actual = {actual!r}\n" + + f"expected = {expected!r} \n" + + f"input_str = {input_str}\n" + + f"formatter = {iso_formatter!r}", + ) + + def test_subclass_timetz(self): class C(self.theclass): diff --git a/Lib/test/test_datetime.py b/Lib/test/test_datetime.py index 7f9094fa7bd4e6..1bfdb248a69663 100644 --- a/Lib/test/test_datetime.py +++ b/Lib/test/test_datetime.py @@ -8,7 +8,9 @@ def load_tests(loader, tests, pattern): try: - pure_tests = import_fresh_module(TESTS, fresh=['datetime', '_strptime'], + pure_tests = import_fresh_module(TESTS, fresh=[ + 'datetime', '_strptime', 'test.isoformat_helpers.isoformatter', + 'test.isoformat_helpers.strategies'], blocked=['_datetime']) fast_tests = import_fresh_module(TESTS, fresh=['datetime', '_datetime', '_strptime']) From 328e781fb3443eab5ff90799d0b35560a5fa943f Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Sun, 1 May 2022 13:29:24 -0600 Subject: [PATCH 12/42] Add support for leading T in time formatters --- Lib/datetime.py | 5 +++++ Lib/test/datetimetester.py | 5 +++++ Modules/_datetimemodule.c | 8 ++++++++ 3 files changed, 18 insertions(+) diff --git a/Lib/datetime.py b/Lib/datetime.py index ed34e0c9823330..b5bccb185e5c58 100644 --- a/Lib/datetime.py +++ b/Lib/datetime.py @@ -1535,6 +1535,11 @@ def fromisoformat(cls, time_string): if not isinstance(time_string, str): raise TypeError('fromisoformat: argument must be str') + # The spec actually requires that time-only ISO-8601 strings start with + # T, but the extended format allows this to be omitted as long as there + # is no ambiguity with date strings. + time_string = time_string.removeprefix('T') + try: return cls(*_parse_isoformat_time(time_string)) except Exception: diff --git a/Lib/test/datetimetester.py b/Lib/test/datetimetester.py index ffa96c28d799fa..9118081fbd5282 100644 --- a/Lib/test/datetimetester.py +++ b/Lib/test/datetimetester.py @@ -4127,6 +4127,9 @@ class TimeSubclass(self.theclass): iso_formatter=map( IsoFormatter, [ + "%H:%M", + "T%H:%M", + "%H%M", "%H:%M:%S", "%H%M%S", "%H:%M:%S.%(f6)", @@ -4135,6 +4138,8 @@ class TimeSubclass(self.theclass): "%H%M%S.%(f3)", "%H:%M:%S[TZ:%H:%M]", "%H:%M:%S[TZ:%H%M]", + "T%H:%M:%S", + "T%H%M%S", ], ), ) diff --git a/Modules/_datetimemodule.c b/Modules/_datetimemodule.c index 37dfe5b28e7469..8f63fdc4acedbb 100644 --- a/Modules/_datetimemodule.c +++ b/Modules/_datetimemodule.c @@ -4640,6 +4640,14 @@ time_fromisoformat(PyObject *cls, PyObject *tstr) { goto invalid_string_error; } + // The spec actually requires that time-only ISO-8601 strings start with + // T, but the extended format allows this to be omitted as long as there + // is no ambiguity with date strings. + if (*p == 'T') { + p += 1; + len -= 1; + } + int hour = 0, minute = 0, second = 0, microsecond = 0; int tzoffset, tzimicrosecond = 0; int rv = parse_isoformat_time(p, len, From 4d0e3a90f63f5c791128cb3c04d76e472f020a1c Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Sun, 1 May 2022 13:29:52 -0600 Subject: [PATCH 13/42] Fix pure python separator detection in YYYYWwwd --- Lib/datetime.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Lib/datetime.py b/Lib/datetime.py index b5bccb185e5c58..fd7eb30e3c9915 100644 --- a/Lib/datetime.py +++ b/Lib/datetime.py @@ -300,9 +300,12 @@ def _find_isoformat_separator(dtstr): else: if dtstr[4] == week_indicator: # YYYYWww (7) or YYYYWwwd (8) - for idx in range(7, len_dtstr): + idx = 7 + while idx < len_dtstr: if not dtstr[idx].isdigit(): break + idx += 1 + if idx < 9: return idx From 3e600f2ec7341bd1530702f78654dfc908d27be3 Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Sun, 1 May 2022 15:06:22 -0600 Subject: [PATCH 14/42] Version with all tests passing --- Lib/test/datetimetester.py | 84 ++++++++++++++++++++++ Lib/test/isoformat_helpers/isoformatter.py | 7 +- 2 files changed, 90 insertions(+), 1 deletion(-) diff --git a/Lib/test/datetimetester.py b/Lib/test/datetimetester.py index 9118081fbd5282..7d506a0ada494d 100644 --- a/Lib/test/datetimetester.py +++ b/Lib/test/datetimetester.py @@ -3085,6 +3085,90 @@ def test_fromisoformat_examples_datetime(self): self.assertEqual(actual, expected) + DEFAULT_DT = datetime(2025, 1, 2, 3, 4, 5, 678901) + AWARE_UTC_DT = datetime(2025, 1, 2, 3, 4, 5, 678901, tzinfo=timezone.utc) + AWARE_POS_DT = datetime( + 2025, 1, 2, 3, 5, 6, 678901, tzinfo=timezone(timedelta(hours=3)) + ) + AWARE_NEG_DT = datetime( + 2025, 1, 2, 3, 5, 6, 678901, tzinfo=timezone(-timedelta(hours=3)) + ) + + @hypothesis.given( + dt=hypothesis.strategies.datetimes(timezones=iso_strategies.TIMEZONES), + iso_formatter=iso_strategies.ISOFORMATTERS, + ) + # fmt: off + @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y-%m-%d")) + @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y%m%d")) + @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y%m%dT%H")) + @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H")) + @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y%m%dT%H")) + @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y%m%dT%H:%M")) + @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M")) + @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H%M")) + @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M:%S")) + @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H%M%S")) + @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H%M")) + @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H%M%S")) + @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M:%S.%(f1)")) + @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M:%S,%(f1)")) + @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M:%S[TZ:Z]")) + @hypothesis.example(dt=AWARE_UTC_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M:%S[TZ:Z]")) + @hypothesis.example(dt=AWARE_POS_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M:%S[TZ:Z]")) + @hypothesis.example(dt=AWARE_UTC_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M:%S[TZ:%H]")) + @hypothesis.example(dt=AWARE_NEG_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M:%S[TZ:%H]")) + @hypothesis.example(dt=AWARE_POS_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M:%S[TZ:%H]")) + @hypothesis.example(dt=AWARE_UTC_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M:%S[TZ:%H%M]")) + @hypothesis.example(dt=AWARE_NEG_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M:%S[TZ:%H%M]")) + @hypothesis.example(dt=AWARE_POS_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M:%S[TZ:%H%M]")) + @hypothesis.example(dt=datetime(2000, 1, 1, + tzinfo=timezone(-timedelta(hours=-22, microseconds=1))), + iso_formatter=IsoFormatter("%Y-%m-%dT%H[TZ:%H]")) + @hypothesis.example(dt=AWARE_UTC_DT, + iso_formatter=IsoFormatter("%Y-%m-%d0%H:%M:%S,%(f1)[TZ:%H:%M:%S.%(f2)]")) + @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%G-W%V")) + @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%G-W%V-%u")) + @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%GW%V:%H")) + @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%GW%V5%H")) + @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%GW%V%u5%H")) + @hypothesis.example(dt=AWARE_UTC_DT, iso_formatter=IsoFormatter("%G-W%V0%H[TZ:%H]")) + # fmt: on + def test_fromisoformat(self, dt, iso_formatter): + + dt = self.theclass( + dt.year, + dt.month, + dt.day, + dt.hour, + dt.minute, + dt.second, + dt.microsecond, + tzinfo=dt.tzinfo, + fold=dt.fold + ) + + if "%G" in iso_formatter._format_str: + if ( + iso_formatter._format_str.startswith("%G-W%V-%u") + and len(iso_formatter._format_str) > 9 + ): + hypothesis.assume(not iso_formatter._format_str[9].isdigit()) + + input_str = iso_formatter.format(dt) + actual = self.theclass.fromisoformat(input_str) + expected = iso_formatter.truncate(dt) + + self.assertEqual( + actual, + expected, + f"\n{actual} != {expected}\n" + + f"actual = {actual!r}\n" + + f"expected = {expected!r} \n" + + f"input_str = {input_str}", + ) + + def test_fromisoformat_fails_datetime(self): # Test that fromisoformat() fails on invalid values bad_strs = [ diff --git a/Lib/test/isoformat_helpers/isoformatter.py b/Lib/test/isoformat_helpers/isoformatter.py index d5069939a3c784..72a493897e073a 100644 --- a/Lib/test/isoformat_helpers/isoformatter.py +++ b/Lib/test/isoformat_helpers/isoformatter.py @@ -3,6 +3,7 @@ import functools from datetime import date, datetime, time, timedelta, timezone +from typing import Any class IsoFormatter: @@ -77,7 +78,11 @@ def __repr__(self): return f"{self.__class__.__name__}('{self._format_str}')" @functools.singledispatchmethod - def format(self, dt: datetime) -> str: + def format(self, dt: Any) -> str: + raise NotImplementedError() + + @format.register + def _(self, dt: datetime) -> str: """Apply the specified ISO8601 format to a datetime.""" return ( f"{format(dt, self._date_str)}{self._sep}" From e26f06f09320e7cc2bdc0bc35f7bbf61dc754005 Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Mon, 2 May 2022 12:48:10 -0600 Subject: [PATCH 15/42] Migrate fromisoformat tests to their own file --- Lib/test/datetimetester.py | 215 ------ Lib/test/isoformat_helpers/__init__.py | 0 Lib/test/isoformat_helpers/isoformatter.py | 300 --------- Lib/test/isoformat_helpers/strategies.py | 67 -- Lib/test/test_datetime.py | 4 +- Lib/test/test_fromisoformat.py | 726 +++++++++++++++++++++ 6 files changed, 727 insertions(+), 585 deletions(-) delete mode 100644 Lib/test/isoformat_helpers/__init__.py delete mode 100644 Lib/test/isoformat_helpers/isoformatter.py delete mode 100644 Lib/test/isoformat_helpers/strategies.py create mode 100644 Lib/test/test_fromisoformat.py diff --git a/Lib/test/datetimetester.py b/Lib/test/datetimetester.py index 7d506a0ada494d..6836a3e25e51bd 100644 --- a/Lib/test/datetimetester.py +++ b/Lib/test/datetimetester.py @@ -21,10 +21,7 @@ from operator import lt, le, gt, ge, eq, ne, truediv, floordiv, mod from test import support -from test.isoformat_helpers.isoformatter import IsoFormatter -from test.isoformat_helpers import strategies as iso_strategies from test.support import is_resource_enabled, ALWAYS_EQ, LARGEST, SMALLEST -from test.support.hypothesis_helper import hypothesis import datetime as datetime_module from datetime import MINYEAR, MAXYEAR @@ -61,36 +58,6 @@ NAN = float("nan") -def _cross_product_examples(**kwargs): - """Adds the cross-product of multiple hypothesis examples. - - This is a helper function to make specifying a bunch of examples less - complicated. By example: - - @_cross_product_examples(a=[1, 2], b=["a", "b"]) - def test_x(a, b): - ... - - Is equivalent to this (order not guaranteed): - - @hypothesis.example(a=1, b="a") - @hypothesis.example(a=2, b="a") - @hypothesis.example(a=1, b="b") - @hypothesis.example(a=2, b="b") - def test_x(a, b): - ... - """ - params, values = zip(*kwargs.items()) - - def inner(f): - out = f - for value_set in itertools.product(*values): - out = hypothesis.example(**dict(zip(params, value_set)))(out) - return out - - return inner - - ############################################################################# # module tests @@ -1910,43 +1877,6 @@ def test_fromisoformat_fails_typeerror(self): with self.assertRaises(TypeError): self.theclass.fromisoformat(bad_type) - @hypothesis.given( - d=hypothesis.strategies.dates(), - iso_formatter=iso_strategies.DATE_ISOFORMATTERS, - ) - @_cross_product_examples( - d=[ - date(2025, 1, 2), - date(2000, 1, 1), - date(1, 1, 1), - date(9999, 12, 31), - ], - iso_formatter=map(IsoFormatter, ["%Y-%m-%d", "%Y%m%d"]), - ) - @_cross_product_examples( - d=[date(2025, 1, 2), date(2025, 12, 31), date(2023, 1, 1)], - iso_formatter=map( - IsoFormatter, ["%G-W%V", "%GW%V", "%G-W%V-%u", "%GW%V%u"] - ), - ) - def test_fromisoformat_dates(self, d, iso_formatter): - if type(d) != self.theclass: - d = self.theclass(d.year, d.month, d.day) - - input_str = iso_formatter.format(d) - actual = self.theclass.fromisoformat(input_str) - expected = iso_formatter.truncate(d) - - self.assertEqual( - actual, - expected, - f"\n{actual} != {expected}\n" - + f"actual = {actual!r}\n" - + f"expected = {expected!r}\n" - + f"input_str = {input_str}\n" - + f"formatter = {iso_formatter!r}", - ) - def test_fromisocalendar(self): # For each test case, assert that fromisocalendar is the # inverse of the isocalendar function @@ -3085,90 +3015,6 @@ def test_fromisoformat_examples_datetime(self): self.assertEqual(actual, expected) - DEFAULT_DT = datetime(2025, 1, 2, 3, 4, 5, 678901) - AWARE_UTC_DT = datetime(2025, 1, 2, 3, 4, 5, 678901, tzinfo=timezone.utc) - AWARE_POS_DT = datetime( - 2025, 1, 2, 3, 5, 6, 678901, tzinfo=timezone(timedelta(hours=3)) - ) - AWARE_NEG_DT = datetime( - 2025, 1, 2, 3, 5, 6, 678901, tzinfo=timezone(-timedelta(hours=3)) - ) - - @hypothesis.given( - dt=hypothesis.strategies.datetimes(timezones=iso_strategies.TIMEZONES), - iso_formatter=iso_strategies.ISOFORMATTERS, - ) - # fmt: off - @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y-%m-%d")) - @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y%m%d")) - @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y%m%dT%H")) - @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H")) - @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y%m%dT%H")) - @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y%m%dT%H:%M")) - @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M")) - @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H%M")) - @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M:%S")) - @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H%M%S")) - @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H%M")) - @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H%M%S")) - @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M:%S.%(f1)")) - @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M:%S,%(f1)")) - @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M:%S[TZ:Z]")) - @hypothesis.example(dt=AWARE_UTC_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M:%S[TZ:Z]")) - @hypothesis.example(dt=AWARE_POS_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M:%S[TZ:Z]")) - @hypothesis.example(dt=AWARE_UTC_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M:%S[TZ:%H]")) - @hypothesis.example(dt=AWARE_NEG_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M:%S[TZ:%H]")) - @hypothesis.example(dt=AWARE_POS_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M:%S[TZ:%H]")) - @hypothesis.example(dt=AWARE_UTC_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M:%S[TZ:%H%M]")) - @hypothesis.example(dt=AWARE_NEG_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M:%S[TZ:%H%M]")) - @hypothesis.example(dt=AWARE_POS_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M:%S[TZ:%H%M]")) - @hypothesis.example(dt=datetime(2000, 1, 1, - tzinfo=timezone(-timedelta(hours=-22, microseconds=1))), - iso_formatter=IsoFormatter("%Y-%m-%dT%H[TZ:%H]")) - @hypothesis.example(dt=AWARE_UTC_DT, - iso_formatter=IsoFormatter("%Y-%m-%d0%H:%M:%S,%(f1)[TZ:%H:%M:%S.%(f2)]")) - @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%G-W%V")) - @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%G-W%V-%u")) - @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%GW%V:%H")) - @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%GW%V5%H")) - @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%GW%V%u5%H")) - @hypothesis.example(dt=AWARE_UTC_DT, iso_formatter=IsoFormatter("%G-W%V0%H[TZ:%H]")) - # fmt: on - def test_fromisoformat(self, dt, iso_formatter): - - dt = self.theclass( - dt.year, - dt.month, - dt.day, - dt.hour, - dt.minute, - dt.second, - dt.microsecond, - tzinfo=dt.tzinfo, - fold=dt.fold - ) - - if "%G" in iso_formatter._format_str: - if ( - iso_formatter._format_str.startswith("%G-W%V-%u") - and len(iso_formatter._format_str) > 9 - ): - hypothesis.assume(not iso_formatter._format_str[9].isdigit()) - - input_str = iso_formatter.format(dt) - actual = self.theclass.fromisoformat(input_str) - expected = iso_formatter.truncate(dt) - - self.assertEqual( - actual, - expected, - f"\n{actual} != {expected}\n" - + f"actual = {actual!r}\n" - + f"expected = {expected!r} \n" - + f"input_str = {input_str}", - ) - - def test_fromisoformat_fails_datetime(self): # Test that fromisoformat() fails on invalid values bad_strs = [ @@ -4194,67 +4040,6 @@ class TimeSubclass(self.theclass): self.assertEqual(tsc, tsc_rt) self.assertIsInstance(tsc_rt, TimeSubclass) - @hypothesis.given( - t=hypothesis.strategies.times( - timezones=iso_strategies.FIXED_TIMEZONES | hypothesis.strategies.none() - ), - iso_formatter=iso_strategies.TIME_ISOFORMATTERS, - ) - @_cross_product_examples( - t=[ - time(0, 0), - time(12, 0), - time(23, 59, 59, 999999), - time(12, 0, tzinfo=timezone.utc), - time(12, 0, tzinfo=timezone(timedelta(hours=-5))), - ], - iso_formatter=map( - IsoFormatter, - [ - "%H:%M", - "T%H:%M", - "%H%M", - "%H:%M:%S", - "%H%M%S", - "%H:%M:%S.%(f6)", - "%H%M%S.%(f6)", - "%H:%M:%S.%(f3)", - "%H%M%S.%(f3)", - "%H:%M:%S[TZ:%H:%M]", - "%H:%M:%S[TZ:%H%M]", - "T%H:%M:%S", - "T%H%M%S", - ], - ), - ) - @hypothesis.example( - t=time(0, 0, tzinfo=timezone.utc), - iso_formatter=IsoFormatter("%H:%M:%S[TZ:Z]"), - ) - @_cross_product_examples( - t=[ - time(0, 0, tzinfo=timezone(timedelta(hours=5, minutes=30))), - ], - iso_formatter=map( - IsoFormatter, ("%H:%M:%S[TZ:%H]", "%H:%M:%S[TZ:%H:%M]") - ), - ) - def test_isoformat_times(self, t, iso_formatter): - input_str = iso_formatter.format(t) - actual = type(t).fromisoformat(input_str) - expected = iso_formatter.truncate(t) - - self.assertEqual( - actual, - expected, - f"\n{actual} != {expected}\n" - + f"actual = {actual!r}\n" - + f"expected = {expected!r} \n" - + f"input_str = {input_str}\n" - + f"formatter = {iso_formatter!r}", - ) - - def test_subclass_timetz(self): class C(self.theclass): diff --git a/Lib/test/isoformat_helpers/__init__.py b/Lib/test/isoformat_helpers/__init__.py deleted file mode 100644 index e69de29bb2d1d6..00000000000000 diff --git a/Lib/test/isoformat_helpers/isoformatter.py b/Lib/test/isoformat_helpers/isoformatter.py deleted file mode 100644 index 72a493897e073a..00000000000000 --- a/Lib/test/isoformat_helpers/isoformatter.py +++ /dev/null @@ -1,300 +0,0 @@ -import re -import itertools -import functools - -from datetime import date, datetime, time, timedelta, timezone -from typing import Any - - -class IsoFormatter: - """Helper class to make it possible to round-trip a given ISO 8601 format. - - The main problem this solves is that many ISO 8601 formats are lossy, e.g.:: - - >>> datetime(2022, 5, 19, 12, 30, 15).isoformat(timespec="hours") - 2022-05-19T12 - - This prevents us from easily writing tests that take arbitrary input - datetimes, serializes them to an arbitrary ISO 8601 format and ensures that - the same thing comes back when we try and parse it. - - This class allows you to specify an ISO 8601 format and generate both the - ISO 8601 string and the truncated datetime, like so: - - >>> formatter = IsoFormatter("%Y-%m-%dT%H") - >>> dt = datetime(2022, 5, 19, 12, 30, 15) - >>> formatter.format(dt) - "2022-05-19T12" - >>> formatter.truncate(dt) - datetime.datetime(2022, 5, 19, 12, 0) - """ - - _TZ_RE = re.compile(r"\[TZ:(?P[^\]]+)\]$") - _FLOAT_RE = re.compile(r"%\(f(?P\d+)\)$") - - # Create instances of these unit values for convenience and performance. - _MICROSECOND = timedelta(microseconds=1) - _SECOND = timedelta(seconds=1) - _MINUTE = timedelta(minutes=1) - _HOUR = timedelta(hours=1) - _ZERO = timedelta(0) - - def __init__(self, format_str): - self._format_str = format_str - - if (m := self._TZ_RE.search(format_str)) is not None: - self._tz_str = m.group("fmt") - format_str = format_str[: m.start()] - else: - self._tz_str = None - - try: - time_str_start = format_str.index("%H") - except ValueError: - time_str_start = None - - if time_str_start is not None: - self._time_str = format_str[time_str_start:] - self._sep = format_str[time_str_start - 1] - self._date_str = format_str[: time_str_start - 1] - else: - self._time_str = None - self._sep = "" - self._date_str = format_str - - self._date_str = self._date_str.replace("%Y", "%4Y").replace( - "%G", "%4G" - ) - - self._populate_time() - self._populate_tz() - - if "W" in self._date_str: - expected_components = ("%4G", "%V") - else: - expected_components = ("%4Y", "%m", "%d") - - def __repr__(self): - return f"{self.__class__.__name__}('{self._format_str}')" - - @functools.singledispatchmethod - def format(self, dt: Any) -> str: - raise NotImplementedError() - - @format.register - def _(self, dt: datetime) -> str: - """Apply the specified ISO8601 format to a datetime.""" - return ( - f"{format(dt, self._date_str)}{self._sep}" - + f"{self._time_formatter(dt)}{self._tz_formatter(dt)}" - ) - - @format.register - def _(self, dt: date) -> str: - return f"{format(dt, self._date_str)}" - - @format.register - def _(self, dt: time) -> str: - return f"{self._time_formatter(dt)}{self._tz_formatter(dt)}" - - def truncate(self, dt): - """Truncate a datetime to the precision level of the format.""" - truncator = {} - if "W" in self._date_str and "%u" not in self._date_str: - iso_year, week, weekday = dt.isocalendar() - if weekday != 1: - truncated_dt = datetime.fromisocalendar(iso_year, week, 1) - for comp in ("year", "month", "day"): - if getattr(dt, comp) != ( - new_comp := getattr(truncated_dt, comp) - ): - truncator[comp] = new_comp - - if isinstance(dt, (datetime, time)): - truncator.update(self._time_truncator(dt)) - truncator.update(self._tz_truncator(dt)) - - if truncator: - return dt.replace(**truncator) - else: - return dt - - def _populate_time(self): - if self._time_str is not None: - time_formatter, time_truncation = self._make_timelike_formatter( - self._time_str - ) - self._time_formatter = time_formatter - self._time_truncator = self._make_time_truncator(time_truncation) - else: - self._time_formatter = self._null_formatter - self._time_truncator = self._make_time_truncator(timedelta(days=1)) - - def _populate_tz(self): - if self._tz_str is not None: - if self._tz_str == "Z": - self._tz_formatter = self._tz_z_formatter - self._tz_truncator = self._make_tz_truncator(None) - else: - base_formatter, tz_truncation = self._make_timelike_formatter( - self._tz_str - ) - - self._tz_formatter = self._make_tz_formatter(base_formatter) - self._tz_truncator = self._make_tz_truncator(tz_truncation) - else: - self._tz_formatter = self._null_formatter - self._tz_truncator = self._remove_tzinfo_truncator - - def _make_timelike_formatter(self, time_str): - time_elements = ("%(f", "%S", "%M", "%H") - truncation_elements = (None, self._SECOND, self._MINUTE, self._HOUR) - - truncation = None - for i, elem in enumerate(time_elements): - if elem in time_str: - assert self._all_in( - time_str, time_elements[(i + 1) :] - ), f"Invalid time str: {time_str}" - truncation = truncation_elements[i] - break - else: - assert False, f"Invalid time str: {time_str}" - - if (m := self._FLOAT_RE.search(time_str)) is not None: - time_str = time_str[: m.start()] - - precision = int(m.group("prec")) - assert precision > 0, "0 and negative precision is not supported" - - truncation = timedelta(microseconds=10 ** (6 - min(6, precision))) - - def format_time(dt, *, time_str=time_str, precision=precision): - if precision < 7: - return ( - format(dt, time_str) - + f"{dt.microsecond:06d}"[0:precision] - ) - else: - return ( - format(dt, time_str) - + f"{dt.microsecond:06d}" - + "0" * (precision - 6) - ) - - else: - - def format_time(dt, *, time_str=time_str): - return format(dt, time_str) - - return format_time, truncation - - _ARBITRARY_DT = datetime(2000, 1, 1) - - def _make_tz_formatter(self, base_formatter): - def tz_formatter(dt, *, _self=self, _base_formatter=base_formatter): - if dt.tzinfo is None: - return "" - utcoffset = dt.utcoffset() - - t = self._ARBITRARY_DT + abs(utcoffset) - - sign = "+" if utcoffset >= _self._ZERO else "-" - - return sign + _base_formatter(t) - - return tz_formatter - - def _make_time_truncator(self, truncation): - if truncation is None: - - def time_truncator(dt): - return {} - - else: - - def time_truncator(dt, *, _time_truncation=truncation): - time_as_td = timedelta( - hours=dt.hour, - minutes=dt.minute, - seconds=dt.second, - microseconds=dt.microsecond, - ) - truncated = _time_truncation * (time_as_td // _time_truncation) - - if truncated == time_as_td: - return {} - - td_as_datetime = datetime(1970, 1, 1) + truncated - return { - component: getattr(td_as_datetime, component) - for component in ("hour", "minute", "second", "microsecond") - } - - return time_truncator - - def _make_tz_truncator(self, truncation): - if truncation is None: - - def tz_truncator(dt): - return {} - - else: - - def tz_truncator(dt, *, _tz_truncation=truncation): - if dt.tzinfo is None: - return {} - - offset = dt.utcoffset() - sign = -1 if offset < self._ZERO else 1 - - tmp, remainder = divmod(abs(offset), _tz_truncation) - if not remainder: - return {} - - new_offset = tmp * _tz_truncation - new_tzinfo = timezone(sign * new_offset) - return {"tzinfo": new_tzinfo} - - return tz_truncator - - def _null_formatter(self, dt): - return "" - - def _remove_tzinfo_truncator(self, dt): - if dt.tzinfo is not None: - return {"tzinfo": None} - return {} - - def _tz_z_formatter(self, dt): - if dt.tzinfo is None: - return "" - - utcoffset = dt.utcoffset() - - if utcoffset == timedelta(0): - return "Z" - - hours, rem = divmod(utcoffset, timedelta(hours=1)) - - rv = f"{hours:+03d}" - if not rem: - return rv - - minutes, rem = divmod(rem, timedelta(minutes=1)) - rv += f":{rem.total_seconds():02f}" - if not rem: - return rv - - microseconds = rem // timedelta(microseconds=1) - rv += f".{microseconds:06d}" - return rv - - @staticmethod - def _all_in(string, substrings): - for substring in substrings: - if substring not in string: - return False - return True - - diff --git a/Lib/test/isoformat_helpers/strategies.py b/Lib/test/isoformat_helpers/strategies.py deleted file mode 100644 index ca3ceb348edb86..00000000000000 --- a/Lib/test/isoformat_helpers/strategies.py +++ /dev/null @@ -1,67 +0,0 @@ -from datetime import date, datetime, time, timedelta, timezone -import itertools - -from test.support.hypothesis_helper import hypothesis - -from .isoformatter import IsoFormatter - -def _valid_date_formats(): - return ("%Y-%m-%d", "%Y%m%d", "%G-W%V", "%GW%V", "%G-W%V-%u", "%GW%V%u") - - -def _valid_time_formats(max_precision=9): - subsecond_format_tuples = itertools.product( - ("%H:%M:%S", "%H%M%S"), - (f"%(f{prec})" for prec in range(1, max_precision)), - ) - subsecond_formats = ( - (".".join(comps), ",".join(comps)) for comps in subsecond_format_tuples - ) - time_formats = ("%H", "%H:%M", "%H:%M:%S", "%H%M", "%H%M%S") + tuple( - itertools.chain.from_iterable(subsecond_formats) - ) - - tz_formats = ("",) + tuple( - (f"[TZ:{tz_fmt}]" for tz_fmt in time_formats + ("Z",)) - ) - - return tuple(map("".join, itertools.product(time_formats, tz_formats))) - -def _make_isoformatter_strategy(): - time_format = hypothesis.strategies.one_of( - hypothesis.strategies.just(()), # No time format - hypothesis.strategies.tuples( - hypothesis.strategies.one_of( - hypothesis.strategies.just("T"), # Shrink towards T and space - hypothesis.strategies.just(" "), - hypothesis.strategies.characters(), - ), - hypothesis.strategies.sampled_from(VALID_TIME_FORMATS), - ), - ) - - return hypothesis.strategies.tuples( - hypothesis.strategies.sampled_from(VALID_DATE_FORMATS), time_format - ).map(lambda x: IsoFormatter("".join((x[0],) + x[1]))) - - - -VALID_DATE_FORMATS = _valid_date_formats() -VALID_TIME_FORMATS = _valid_time_formats() - -DATE_ISOFORMATTERS = hypothesis.strategies.sampled_from(VALID_DATE_FORMATS).map( - IsoFormatter -) -TIME_ISOFORMATTERS = hypothesis.strategies.sampled_from(VALID_TIME_FORMATS).map( - IsoFormatter -) -ISOFORMATTERS = _make_isoformatter_strategy() -FIXED_TIMEZONES = hypothesis.strategies.timedeltas( - min_value=timedelta(hours=-23, minutes=59, seconds=59, microseconds=999999), - max_value=timedelta(hours=23, minutes=59, seconds=59, microseconds=999999), -).map(timezone) -TIMEZONES = hypothesis.strategies.one_of( - hypothesis.strategies.none(), - FIXED_TIMEZONES, - hypothesis.strategies.timezones(), -) diff --git a/Lib/test/test_datetime.py b/Lib/test/test_datetime.py index 1bfdb248a69663..7f9094fa7bd4e6 100644 --- a/Lib/test/test_datetime.py +++ b/Lib/test/test_datetime.py @@ -8,9 +8,7 @@ def load_tests(loader, tests, pattern): try: - pure_tests = import_fresh_module(TESTS, fresh=[ - 'datetime', '_strptime', 'test.isoformat_helpers.isoformatter', - 'test.isoformat_helpers.strategies'], + pure_tests = import_fresh_module(TESTS, fresh=['datetime', '_strptime'], blocked=['_datetime']) fast_tests = import_fresh_module(TESTS, fresh=['datetime', '_datetime', '_strptime']) diff --git a/Lib/test/test_fromisoformat.py b/Lib/test/test_fromisoformat.py new file mode 100644 index 00000000000000..a7a655d163279c --- /dev/null +++ b/Lib/test/test_fromisoformat.py @@ -0,0 +1,726 @@ +import functools +import itertools +import re +import sys +import unittest +import zoneinfo +from datetime import date, datetime, time, timedelta, timezone +from test.support.hypothesis_helper import hypothesis +from test.support.import_helper import import_fresh_module +from typing import Any + + +def _get_modules(): + import datetime as c_datetime + import zoneinfo as c_zoneinfo + + py_datetime = import_fresh_module( + "datetime", fresh=["datetime", "_strptime"], blocked=["_datetime"] + ) + + return c_datetime, py_datetime + + +(c_datetime, py_datetime) = _get_modules() + + +@functools.lru_cache +def make_timedelta(module, *args, **kwargs): + return module.timedelta(*args, **kwargs) + + +@functools.lru_cache +def make_cached_datetime(module, *args, **kwargs): + return module.datetime(*args, **kwargs) + + +class IsoFormatter: + """Helper class to make it possible to round-trip a given ISO 8601 format. + + The main problem this solves is that many ISO 8601 formats are lossy, e.g.:: + + >>> datetime(2022, 5, 19, 12, 30, 15).isoformat(timespec="hours") + 2022-05-19T12 + + This prevents us from easily writing tests that take arbitrary input + datetimes, serializes them to an arbitrary ISO 8601 format and ensures that + the same thing comes back when we try and parse it. + + This class allows you to specify an ISO 8601 format and generate both the + ISO 8601 string and the truncated datetime, like so: + + >>> formatter = IsoFormatter("%Y-%m-%dT%H") + >>> dt = datetime(2022, 5, 19, 12, 30, 15) + >>> formatter.format_datetime(dt) + "2022-05-19T12" + >>> formatter.truncate(dt) + datetime.datetime(2022, 5, 19, 12, 0) + """ + + _TZ_RE = re.compile(r"\[TZ:(?P[^\]]+)\]$") + _FLOAT_RE = re.compile(r"%\(f(?P\d+)\)$") + + def __init__(self, format_str, datetime_module=c_datetime): + self._format_str = format_str + self._module = datetime_module + + # Create instances of these unit values for convenience and performance. + self._MICROSECOND = make_timedelta(self._module, microseconds=1) + self._SECOND = make_timedelta(self._module, seconds=1) + self._MINUTE = make_timedelta(self._module, minutes=1) + self._HOUR = make_timedelta(self._module, hours=1) + self._ZERO = make_timedelta(self._module, 0) + self._ARBITRARY_DT = make_cached_datetime(self._module, 2000, 1, 1) + + if (m := self._TZ_RE.search(format_str)) is not None: + self._tz_str = m.group("fmt") + format_str = format_str[: m.start()] + else: + self._tz_str = None + + try: + time_str_start = format_str.index("%H") + except ValueError: + time_str_start = None + + if time_str_start is not None: + self._time_str = format_str[time_str_start:] + self._sep = format_str[time_str_start - 1] + self._date_str = format_str[: time_str_start - 1] + else: + self._time_str = None + self._sep = "" + self._date_str = format_str + + self._date_str = self._date_str.replace("%Y", "%4Y").replace( + "%G", "%4G" + ) + + self._populate_time() + self._populate_tz() + + if "W" in self._date_str: + expected_components = ("%4G", "%V") + else: + expected_components = ("%4Y", "%m", "%d") + + def __repr__(self): + return f"{self.__class__.__name__}('{self._format_str}')" + + def with_module(self, module): + if self._module is module: + return self + return self.__class__(self._format_str, datetime_module=module) + + def format_datetime(self, dt) -> str: + """Apply the specified ISO8601 format to a datetime.""" + return ( + f"{format(dt, self._date_str)}{self._sep}" + + f"{self._time_formatter(dt)}{self._tz_formatter(dt)}" + ) + + def format_date(self, dt) -> str: + return f"{format(dt, self._date_str)}" + + def format_time(self, dt) -> str: + return f"{self._time_formatter(dt)}{self._tz_formatter(dt)}" + + def truncate(self, dt): + """Truncate a datetime to the precision level of the format.""" + truncator = {} + if "W" in self._date_str and "%u" not in self._date_str: + iso_year, week, weekday = dt.isocalendar() + if weekday != 1: + truncated_dt = self._module.datetime.fromisocalendar( + iso_year, week, 1 + ) + for comp in ("year", "month", "day"): + if getattr(dt, comp) != ( + new_comp := getattr(truncated_dt, comp) + ): + truncator[comp] = new_comp + + if hasattr(dt, "tzinfo"): + truncator.update(self._time_truncator(dt)) + truncator.update(self._tz_truncator(dt)) + + if truncator: + return dt.replace(**truncator) + else: + return dt + + def _populate_time(self): + if self._time_str is not None: + time_formatter, time_truncation = self._make_timelike_formatter( + self._time_str + ) + self._time_formatter = time_formatter + self._time_truncator = self._make_time_truncator(time_truncation) + else: + self._time_formatter = self._null_formatter + self._time_truncator = self._make_time_truncator( + self._module.timedelta(days=1) + ) + + def _populate_tz(self): + if self._tz_str is not None: + if self._tz_str == "Z": + self._tz_formatter = self._tz_z_formatter + self._tz_truncator = self._make_tz_truncator(None) + else: + base_formatter, tz_truncation = self._make_timelike_formatter( + self._tz_str + ) + + self._tz_formatter = self._make_tz_formatter(base_formatter) + self._tz_truncator = self._make_tz_truncator(tz_truncation) + else: + self._tz_formatter = self._null_formatter + self._tz_truncator = self._remove_tzinfo_truncator + + def _make_timelike_formatter(self, time_str): + time_elements = ("%(f", "%S", "%M", "%H") + truncation_elements = (None, self._SECOND, self._MINUTE, self._HOUR) + + truncation = None + for i, elem in enumerate(time_elements): + if elem in time_str: + assert self._all_in( + time_str, time_elements[(i + 1) :] + ), f"Invalid time str: {time_str}" + truncation = truncation_elements[i] + break + else: + assert False, f"Invalid time str: {time_str}" + + if (m := self._FLOAT_RE.search(time_str)) is not None: + time_str = time_str[: m.start()] + + precision = int(m.group("prec")) + assert precision > 0, "0 and negative precision is not supported" + + truncation = self._module.timedelta( + microseconds=10 ** (6 - min(6, precision)) + ) + + def format_time(dt, *, time_str=time_str, precision=precision): + if precision < 7: + return ( + format(dt, time_str) + + f"{dt.microsecond:06d}"[0:precision] + ) + else: + return ( + format(dt, time_str) + + f"{dt.microsecond:06d}" + + "0" * (precision - 6) + ) + + else: + + def format_time(dt, *, time_str=time_str): + return format(dt, time_str) + + return format_time, truncation + + def _make_tz_formatter(self, base_formatter): + def tz_formatter(dt, *, _self=self, _base_formatter=base_formatter): + if dt.tzinfo is None: + return "" + utcoffset = dt.utcoffset() + + t = self._ARBITRARY_DT + abs(utcoffset) + + sign = "+" if utcoffset >= self._ZERO else "-" + + return sign + _base_formatter(t) + + return tz_formatter + + def _make_time_truncator(self, truncation): + if truncation is None: + + def time_truncator(dt): + return {} + + else: + + def time_truncator(dt, *, _time_truncation=truncation): + time_as_td = self._module.timedelta( + hours=dt.hour, + minutes=dt.minute, + seconds=dt.second, + microseconds=dt.microsecond, + ) + truncated = _time_truncation * (time_as_td // _time_truncation) + + if truncated == time_as_td: + return {} + + td_as_datetime = self._ARBITRARY_DT + truncated + return { + component: getattr(td_as_datetime, component) + for component in ("hour", "minute", "second", "microsecond") + } + + return time_truncator + + def _make_tz_truncator(self, truncation): + if truncation is None: + + def tz_truncator(dt): + return {} + + else: + + def tz_truncator(dt, *, _tz_truncation=truncation): + if dt.tzinfo is None: + return {} + + offset = dt.utcoffset() + sign = -1 if offset < self._ZERO else 1 + + tmp, remainder = divmod(abs(offset), _tz_truncation) + if not remainder: + return {} + + new_offset = tmp * _tz_truncation + new_tzinfo = self._module.timezone(sign * new_offset) + return {"tzinfo": new_tzinfo} + + return tz_truncator + + def _null_formatter(self, dt): + return "" + + def _remove_tzinfo_truncator(self, dt): + if dt.tzinfo is not None: + return {"tzinfo": None} + return {} + + def _tz_z_formatter(self, dt): + if dt.tzinfo is None: + return "" + + utcoffset = dt.utcoffset() + + if utcoffset == self._ZERO: + return "Z" + + if utcoffset < self._ZERO: + rv = "-" + else: + rv = "+" + + utcoffset = abs(utcoffset) + + hours, rem = divmod(utcoffset, self._HOUR) + + rv += f"{hours:02d}" + if not rem: + return rv + + minutes, rem = divmod(rem, self._MINUTE) + rv += f":{minutes:02d}" + if not rem: + return rv + + seconds, rem = divmod(rem, self._SECOND) + rv += f":{seconds:02d}" + if not rem: + return rv + + microseconds = rem // self._MICROSECOND + rv += f".{microseconds:06d}" + return rv + + @staticmethod + def _all_in(string, substrings): + for substring in substrings: + if substring not in string: + return False + return True + + +def _cross_product_examples(**kwargs): + """Adds the cross-product of multiple hypothesis examples. + + This is a helper function to make specifying a bunch of examples less + complicated. By example: + + @_cross_product_examples(a=[1, 2], b=["a", "b"]) + def test_x(a, b): + ... + + Is equivalent to this (order not guaranteed): + + @hypothesis.example(a=1, b="a") + @hypothesis.example(a=2, b="a") + @hypothesis.example(a=1, b="b") + @hypothesis.example(a=2, b="b") + def test_x(a, b): + ... + """ + params, values = zip(*kwargs.items()) + + def inner(f): + out = f + for value_set in itertools.product(*values): + out = hypothesis.example(**dict(zip(params, value_set)))(out) + return out + + return inner + + +################ +# Hypothesis strategies +def _valid_date_formats(): + return ("%Y-%m-%d", "%Y%m%d", "%G-W%V", "%GW%V", "%G-W%V-%u", "%GW%V%u") + + +def _valid_time_formats(max_precision=9): + subsecond_format_tuples = itertools.product( + ("%H:%M:%S", "%H%M%S"), + (f"%(f{prec})" for prec in range(1, max_precision)), + ) + subsecond_formats = ( + (".".join(comps), ",".join(comps)) for comps in subsecond_format_tuples + ) + time_formats = ("%H", "%H:%M", "%H:%M:%S", "%H%M", "%H%M%S") + tuple( + itertools.chain.from_iterable(subsecond_formats) + ) + + tz_formats = ("",) + tuple( + (f"[TZ:{tz_fmt}]" for tz_fmt in time_formats + ("Z",)) + ) + + return tuple(map("".join, itertools.product(time_formats, tz_formats))) + + +def _make_isoformatter_strategy(): + time_format = hypothesis.strategies.one_of( + hypothesis.strategies.just(()), # No time format + hypothesis.strategies.tuples( + hypothesis.strategies.one_of( + hypothesis.strategies.just("T"), # Shrink towards T and space + hypothesis.strategies.just(" "), + hypothesis.strategies.characters(), + ), + hypothesis.strategies.sampled_from(VALID_TIME_FORMATS), + ), + ) + + return hypothesis.strategies.tuples( + hypothesis.strategies.sampled_from(VALID_DATE_FORMATS), time_format + ).map(lambda x: IsoFormatter("".join((x[0],) + x[1]))) + + +VALID_DATE_FORMATS = _valid_date_formats() +VALID_TIME_FORMATS = _valid_time_formats() + +DATE_ISOFORMATTERS = hypothesis.strategies.sampled_from(VALID_DATE_FORMATS).map( + IsoFormatter +) +TIME_ISOFORMATTERS = hypothesis.strategies.sampled_from(VALID_TIME_FORMATS).map( + IsoFormatter +) +ISOFORMATTERS = _make_isoformatter_strategy() +FIXED_TIMEZONES = hypothesis.strategies.timedeltas( + min_value=timedelta(hours=-23, minutes=59, seconds=59, microseconds=999999), + max_value=timedelta(hours=23, minutes=59, seconds=59, microseconds=999999), +).map(timezone) +TIMEZONES = hypothesis.strategies.one_of( + hypothesis.strategies.none(), + FIXED_TIMEZONES, + hypothesis.strategies.timezones(), +) + +################ +# Constants +DEFAULT_DT = datetime(2025, 1, 2, 3, 4, 5, 678901) +AWARE_UTC_DT = datetime(2025, 1, 2, 3, 4, 5, 678901, tzinfo=timezone.utc) +AWARE_POS_DT = datetime( + 2025, 1, 2, 3, 5, 6, 678901, tzinfo=timezone(timedelta(hours=3)) +) +AWARE_NEG_DT = datetime( + 2025, 1, 2, 3, 5, 6, 678901, tzinfo=timezone(-timedelta(hours=3)) +) + + +################ +# Tests + + +class FromIsoformatDateTest_Base(unittest.TestCase): + @classmethod + def setUpClass(cls): + super().setUpClass() + cls.klass = cls.module.date + + +class FromIsoformatDateTest_Fast(FromIsoformatDateTest_Base): + module = c_datetime + + @hypothesis.given( + d=hypothesis.strategies.dates(), + iso_formatter=DATE_ISOFORMATTERS, + ) + @_cross_product_examples( + d=[ + date(2025, 1, 2), + date(2000, 1, 1), + date(1, 1, 1), + date(9999, 12, 31), + ], + iso_formatter=map(IsoFormatter, ["%Y-%m-%d", "%Y%m%d"]), + ) + @_cross_product_examples( + d=[date(2025, 1, 2), date(2025, 12, 31), date(2023, 1, 1)], + iso_formatter=map( + IsoFormatter, ["%G-W%V", "%GW%V", "%G-W%V-%u", "%GW%V%u"] + ), + ) + def test_fromisoformat_dates(self, d, iso_formatter): + iso_formatter = iso_formatter.with_module(self.module) + + if type(d) != self.klass: + d = self.klass(d.year, d.month, d.day) + + input_str = iso_formatter.format_date(d) + actual = self.klass.fromisoformat(input_str) + expected = iso_formatter.truncate(d) + + self.assertEqual( + actual, + expected, + f"\n{actual} != {expected}\n" + + f"actual = {actual!r}\n" + + f"expected = {expected!r}\n" + + f"input_str = {input_str}\n" + + f"formatter = {iso_formatter!r}", + ) + + +class FromIsoformatDateTest_Pure(FromIsoformatDateTest_Fast): + module = py_datetime + + +class FromIsoformatDateTimeTest_Fast(FromIsoformatDateTest_Fast): + module = c_datetime + + @classmethod + def setUpClass(cls): + super().setUpClass() + cls.klass = cls.module.datetime + + @hypothesis.given( + dt=hypothesis.strategies.datetimes(timezones=TIMEZONES), + iso_formatter=ISOFORMATTERS, + ) + # fmt: off + @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y-%m-%d")) + @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y%m%d")) + @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y%m%dT%H")) + @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H")) + @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y%m%dT%H")) + @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y%m%dT%H:%M")) + @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M")) + @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H%M")) + @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M:%S")) + @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H%M%S")) + @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H%M")) + @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H%M%S")) + @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M:%S.%(f1)")) + @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M:%S,%(f1)")) + @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M:%S[TZ:Z]")) + @hypothesis.example(dt=AWARE_UTC_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M:%S[TZ:Z]")) + @hypothesis.example(dt=AWARE_POS_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M:%S[TZ:Z]")) + @hypothesis.example(dt=AWARE_UTC_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M:%S[TZ:%H]")) + @hypothesis.example(dt=AWARE_NEG_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M:%S[TZ:%H]")) + @hypothesis.example(dt=AWARE_POS_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M:%S[TZ:%H]")) + @hypothesis.example(dt=AWARE_UTC_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M:%S[TZ:%H%M]")) + @hypothesis.example(dt=AWARE_NEG_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M:%S[TZ:%H%M]")) + @hypothesis.example(dt=AWARE_POS_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M:%S[TZ:%H%M]")) + @hypothesis.example(dt=datetime(2000, 1, 1, + tzinfo=timezone(-timedelta(hours=-22, microseconds=1))), + iso_formatter=IsoFormatter("%Y-%m-%dT%H[TZ:%H]")) + @hypothesis.example(dt=AWARE_UTC_DT, + iso_formatter=IsoFormatter("%Y-%m-%d0%H:%M:%S,%(f1)[TZ:%H:%M:%S.%(f2)]")) + @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%G-W%V")) + @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%G-W%V-%u")) + @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%GW%V:%H")) + @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%GW%V5%H")) + @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%GW%V%u5%H")) + @hypothesis.example(dt=AWARE_UTC_DT, iso_formatter=IsoFormatter("%G-W%V0%H[TZ:%H]")) + @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M:%S,%(f3)[TZ:Z]")) + @hypothesis.example(dt=DEFAULT_DT.replace(tzinfo=timezone(timedelta(seconds=10))), iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M:%S,%(f3)[TZ:Z]")) + @hypothesis.example(dt=AWARE_UTC_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M:%S,%(f1)[TZ:%H%M%S.%(f2)]")) + @_cross_product_examples( + dt=[ + datetime(2020, 1, 1, 3, 5, 7, 123457, tzinfo=zoneinfo.ZoneInfo("America/New_York")), + datetime(2020, 6, 1, 4, 5, 6, 111111, tzinfo=zoneinfo.ZoneInfo("America/New_York")), + datetime(2021, 10, 31, 1, 30, tzinfo=zoneinfo.ZoneInfo("Europe/London")), + ], + iso_formatter=[ + IsoFormatter("%Y-%m-%dT%H:%M:%S.%(f6)[TZ:%H:%M]"), + IsoFormatter("%Y-%m-%dT%H:%M:%S.%(f6)[TZ:%H%M]"), + ]) + # fmt: on + def test_fromisoformat(self, dt, iso_formatter): + iso_formatter = iso_formatter.with_module(self.module) + + if dt.tzinfo is None or isinstance(dt.tzinfo, self.module.timezone): + new_tzinfo = dt.tzinfo + else: + new_offset = self.module.timedelta( + seconds=dt.utcoffset().total_seconds() + ) + new_tzinfo = self.module.timezone(new_offset, dt.tzname()) + + if not isinstance(dt, self.module.datetime): + dt = self.klass( + dt.year, + dt.month, + dt.day, + dt.hour, + dt.minute, + dt.second, + dt.microsecond, + tzinfo=new_tzinfo, + fold=dt.fold, + ) + elif dt.tzinfo is not new_tzinfo: + dt = dt.astimezone(new_tzinfo) + + if "%G" in iso_formatter._format_str: + if ( + iso_formatter._format_str.startswith("%G-W%V-%u") + and len(iso_formatter._format_str) > 9 + ): + hypothesis.assume(not iso_formatter._format_str[9].isdigit()) + + input_str = iso_formatter.format_datetime(dt) + actual = self.klass.fromisoformat(input_str) + expected = iso_formatter.truncate(dt) + + self.assertEqual( + actual, + expected, + f"\n{actual} != {expected}\n" + + f"actual = {actual!r}\n" + + f"expected = {expected!r} \n" + + f"input_str = {input_str}", + ) + + +class FromIsoformatDateTimeTest_Pure(FromIsoformatDateTimeTest_Fast): + module = py_datetime + + +class FromIsoformatTimeTest_Base(unittest.TestCase): + @classmethod + def setUpClass(cls): + super().setUpClass() + cls.klass = cls.module.time + + +class FromIsoformatTimeTest_Fast(FromIsoformatTimeTest_Base): + module = c_datetime + + @hypothesis.given( + t=hypothesis.strategies.times( + timezones=FIXED_TIMEZONES | hypothesis.strategies.none() + ), + iso_formatter=TIME_ISOFORMATTERS, + ) + @_cross_product_examples( + t=[ + time(0, 0), + time(12, 0), + time(23, 59, 59, 999999), + time(12, 0, tzinfo=timezone.utc), + time(12, 0, tzinfo=timezone(timedelta(hours=-5))), + ], + iso_formatter=map( + IsoFormatter, + [ + "%H:%M", + "T%H:%M", + "%H%M", + "%H:%M:%S", + "%H%M%S", + "%H:%M:%S.%(f6)", + "%H%M%S.%(f6)", + "%H:%M:%S.%(f3)", + "%H%M%S.%(f3)", + "%H:%M:%S[TZ:%H:%M]", + "%H:%M:%S[TZ:%H%M]", + "T%H:%M:%S", + "T%H%M%S", + ], + ), + ) + @hypothesis.example( + t=time(0, 0, tzinfo=timezone.utc), + iso_formatter=IsoFormatter("%H:%M:%S[TZ:Z]"), + ) + @_cross_product_examples( + t=[ + time(0, 0, tzinfo=timezone(timedelta(hours=5, minutes=30))), + ], + iso_formatter=map( + IsoFormatter, ("%H:%M:%S[TZ:%H]", "%H:%M:%S[TZ:%H:%M]") + ), + ) + @hypothesis.example( + t=time( + 0, + 0, + tzinfo=timezone( + -timedelta( + hours=23, minutes=59, seconds=59, microseconds=999999 + ) + ), + ), + iso_formatter=IsoFormatter("%H:%M:%S,%(f3)[TZ:Z]"), + ) + def test_fromisoformat_times(self, t, iso_formatter): + iso_formatter = iso_formatter.with_module(self.module) + + if t.tzinfo is None or isinstance(t.tzinfo, self.module.timezone): + new_tzinfo = t.tzinfo + else: + new_offset = self.module.timedelta( + seconds=t.utcoffset().total_seconds() + ) + new_tzinfo = self.module.timezone(new_offset, t.tzname()) + + if not isinstance(t, self.module.time): + t = self.klass( + hour=t.hour, + minute=t.minute, + second=t.second, + microsecond=t.microsecond, + tzinfo=new_tzinfo, + fold=t.fold, + ) + elif t.tzinfo is not new_tzinfo: + t = t.replace(tzinfo=new_tzinfo) + + input_str = iso_formatter.format_time(t) + actual = self.klass.fromisoformat(input_str) + expected = iso_formatter.truncate(t) + + self.assertEqual( + actual, + expected, + f"\n{actual} != {expected}\n" + + f"actual = {actual!r}\n" + + f"expected = {expected!r} \n" + + f"input_str = {input_str}\n" + + f"formatter = {iso_formatter!r}", + ) + + +class FromIsoformatTimeTest_Pure(FromIsoformatTimeTest_Fast): + module = py_datetime From 1ea0cd1c32330640da1ef425099c342988367132 Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Mon, 2 May 2022 12:48:30 -0600 Subject: [PATCH 16/42] Fix bug in time parsing logic --- Lib/datetime.py | 4 ++-- Lib/test/test_fromisoformat.py | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/Lib/datetime.py b/Lib/datetime.py index fd7eb30e3c9915..8dc31bca34c245 100644 --- a/Lib/datetime.py +++ b/Lib/datetime.py @@ -429,13 +429,13 @@ def _parse_isoformat_time(tstr): # HHMM len: 4 # HH:MM len: 5 # HHMMSS len: 6 + # HHMMSS.f+ len: 7+ # HH:MM:SS len: 8 # HH:MM:SS.f+ len: 10+ - if (len_tzstr := len(tzstr)) < 10 and (len_tzstr % 2) and len_tzstr != 5: + if len(tzstr) in (1, 3): raise ValueError('Malformed time zone string') - if tzstr == 'Z': tz_comps = (0, 0, 0, 0) else: diff --git a/Lib/test/test_fromisoformat.py b/Lib/test/test_fromisoformat.py index a7a655d163279c..a41a5c0eb8ca4c 100644 --- a/Lib/test/test_fromisoformat.py +++ b/Lib/test/test_fromisoformat.py @@ -652,6 +652,7 @@ class FromIsoformatTimeTest_Fast(FromIsoformatTimeTest_Base): "%H:%M:%S.%(f6)", "%H%M%S.%(f6)", "%H:%M:%S.%(f3)", + "%H%M%S.%(f1)", "%H%M%S.%(f3)", "%H:%M:%S[TZ:%H:%M]", "%H:%M:%S[TZ:%H%M]", From 1e3577f67d9a347ced82fbe39b4c76ef4cb2bc8c Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Mon, 2 May 2022 16:02:01 -0600 Subject: [PATCH 17/42] s/ssize_t/size_t --- Modules/_datetimemodule.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/_datetimemodule.c b/Modules/_datetimemodule.c index 8f63fdc4acedbb..219bd6d62de410 100644 --- a/Modules/_datetimemodule.c +++ b/Modules/_datetimemodule.c @@ -5394,7 +5394,7 @@ _find_isoformat_separator(const char *dtstr, Py_ssize_t len, unsigned char* mode if (dtstr[4] == week_indicator) { *mode = MODE_ISOCALENDAR; // YYYYWww (7) or YYYYWwwd (8) - ssize_t idx = 7; + size_t idx = 7; for (; idx < len; ++idx) { // Keep going until we run out of digits. if (!is_digit(dtstr[idx])) { From 64227998e2b55304dc3542ba89fb2956c51e0cbf Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Mon, 2 May 2022 17:09:23 -0600 Subject: [PATCH 18/42] Add fromisoformat example tests --- Lib/test/datetimetester.py | 212 +++++++++++++++++++++++++++++++++++-- 1 file changed, 206 insertions(+), 6 deletions(-) diff --git a/Lib/test/datetimetester.py b/Lib/test/datetimetester.py index 6836a3e25e51bd..526f683f17fb22 100644 --- a/Lib/test/datetimetester.py +++ b/Lib/test/datetimetester.py @@ -1841,6 +1841,35 @@ def test_fromisoformat(self): self.assertEqual(dt, dt_rt) + def test_fromisoformat_date_examples(self): + examples = [ + ('00010101', self.theclass(1, 1, 1)), + ('20000101', self.theclass(2000, 1, 1)), + ('20250102', self.theclass(2025, 1, 2)), + ('99991231', self.theclass(9999, 12, 31)), + ('0001-01-01', self.theclass(1, 1, 1)), + ('2000-01-01', self.theclass(2000, 1, 1)), + ('2025-01-02', self.theclass(2025, 1, 2)), + ('9999-12-31', self.theclass(9999, 12, 31)), + ('2025W01', self.theclass(2024, 12, 30)), + ('2025-W01', self.theclass(2024, 12, 30)), + ('2025W014', self.theclass(2025, 1, 2)), + ('2025-W01-4', self.theclass(2025, 1, 2)), + ('2026W01', self.theclass(2025, 12, 29)), + ('2026-W01', self.theclass(2025, 12, 29)), + ('2026W013', self.theclass(2025, 12, 31)), + ('2026-W01-3', self.theclass(2025, 12, 31)), + ('2022W52', self.theclass(2022, 12, 26)), + ('2022-W52', self.theclass(2022, 12, 26)), + ('2022W527', self.theclass(2023, 1, 1)), + ('2022-W52-7', self.theclass(2023, 1, 1)), + ] + + for input_str, expected in examples: + with self.subTest(input_str=input_str): + actual = self.theclass.fromisoformat(input_str) + self.assertEqual(actual, expected) + def test_fromisoformat_subclass(self): class DateSubclass(self.theclass): pass @@ -3003,16 +3032,135 @@ def test_fromisoformat_timespecs(self): dt_rt = self.theclass.fromisoformat(dtstr) self.assertEqual(dt, dt_rt) - def test_fromisoformat_examples_datetime(self): - test_cases = [ - ('2009-04-19T03:15:45.2345', self.theclass(2009, 4, 19, 3, 15, 45, 234500)), - ('2009-04-19T03:15:45.1234567', self.theclass(2009, 4, 19, 3, 15, 45, 123456)), + def test_fromisoformat_datetime_examples(self): + UTC = timezone.utc + BST = timezone(timedelta(hours=1), 'BST') + EST = timezone(timedelta(hours=-5), 'EST') + EDT = timezone(timedelta(hours=-4), 'EDT') + examples = [ + ('2025-01-02', self.theclass(2025, 1, 2, 0, 0)), + ('2025-01-02T03', self.theclass(2025, 1, 2, 3, 0)), + ('2025-01-02T03:04', self.theclass(2025, 1, 2, 3, 4)), + ('2025-01-02T0304', self.theclass(2025, 1, 2, 3, 4)), + ('2025-01-02T03:04:05', self.theclass(2025, 1, 2, 3, 4, 5)), + ('2025-01-02T030405', self.theclass(2025, 1, 2, 3, 4, 5)), + ('2025-01-02T03:04:05.6', + self.theclass(2025, 1, 2, 3, 4, 5, 600000)), + ('2025-01-02T03:04:05,6', + self.theclass(2025, 1, 2, 3, 4, 5, 600000)), + ('2025-01-02T03:04:05.678', + self.theclass(2025, 1, 2, 3, 4, 5, 678000)), + ('2025-01-02T03:04:05.678901', + self.theclass(2025, 1, 2, 3, 4, 5, 678901)), + ('2025-01-02T03:04:05,678901', + self.theclass(2025, 1, 2, 3, 4, 5, 678901)), + ('2025-01-02T030405.678901', + self.theclass(2025, 1, 2, 3, 4, 5, 678901)), + ('2025-01-02T030405,678901', + self.theclass(2025, 1, 2, 3, 4, 5, 678901)), + ('2025-01-02T03:04:05.6789010', + self.theclass(2025, 1, 2, 3, 4, 5, 678901)), + ('2009-04-19T03:15:45.2345', + self.theclass(2009, 4, 19, 3, 15, 45, 234500)), + ('2009-04-19T03:15:45.1234567', + self.theclass(2009, 4, 19, 3, 15, 45, 123456)), + ('2025-01-02T03:04:05,678', + self.theclass(2025, 1, 2, 3, 4, 5, 678000)), + ('20250102', self.theclass(2025, 1, 2, 0, 0)), + ('20250102T03', self.theclass(2025, 1, 2, 3, 0)), + ('20250102T03:04', self.theclass(2025, 1, 2, 3, 4)), + ('20250102T03:04:05', self.theclass(2025, 1, 2, 3, 4, 5)), + ('20250102T030405', self.theclass(2025, 1, 2, 3, 4, 5)), + ('20250102T03:04:05.6', + self.theclass(2025, 1, 2, 3, 4, 5, 600000)), + ('20250102T03:04:05,6', + self.theclass(2025, 1, 2, 3, 4, 5, 600000)), + ('20250102T03:04:05.678', + self.theclass(2025, 1, 2, 3, 4, 5, 678000)), + ('20250102T03:04:05,678', + self.theclass(2025, 1, 2, 3, 4, 5, 678000)), + ('20250102T03:04:05.678901', + self.theclass(2025, 1, 2, 3, 4, 5, 678901)), + ('20250102T030405.678901', + self.theclass(2025, 1, 2, 3, 4, 5, 678901)), + ('20250102T030405,678901', + self.theclass(2025, 1, 2, 3, 4, 5, 678901)), + ('20250102T030405.6789010', + self.theclass(2025, 1, 2, 3, 4, 5, 678901)), + ('2022W52520', self.theclass(2022, 12, 26, 20, 0)), + ('2022W527520', self.theclass(2023, 1, 1, 20, 0)), + ('2026W01516', self.theclass(2025, 12, 29, 16, 0)), + ('2026W013516', self.theclass(2025, 12, 31, 16, 0)), + ('2025W01503', self.theclass(2024, 12, 30, 3, 0)), + ('2025W014503', self.theclass(2025, 1, 2, 3, 0)), + ('2025W01512', self.theclass(2024, 12, 30, 12, 0)), + ('2025W014512', self.theclass(2025, 1, 2, 12, 0)), + ('2025W014T121431', self.theclass(2025, 1, 2, 12, 14, 31)), + ('2026W013T162100', self.theclass(2025, 12, 31, 16, 21)), + ('2026W013 162100', self.theclass(2025, 12, 31, 16, 21)), + ('2022W527T202159', self.theclass(2023, 1, 1, 20, 21, 59)), + ('2022W527 202159', self.theclass(2023, 1, 1, 20, 21, 59)), + ('2025W014 121431', self.theclass(2025, 1, 2, 12, 14, 31)), + ('2025W014T030405', self.theclass(2025, 1, 2, 3, 4, 5)), + ('2025W014 030405', self.theclass(2025, 1, 2, 3, 4, 5)), + ('2025-W01-4T03:04:05', self.theclass(2025, 1, 2, 3, 4, 5)), + ('2025-W01-4T03:04:05.678901', + self.theclass(2025, 1, 2, 3, 4, 5, 678901)), + ('2025-W01-4T12:14:31', self.theclass(2025, 1, 2, 12, 14, 31)), + ('2025-W01-4T12:14:31.012345', + self.theclass(2025, 1, 2, 12, 14, 31, 12345)), + ('2026-W01-3T16:21:00', self.theclass(2025, 12, 31, 16, 21)), + ('2026-W01-3T16:21:00.000000', self.theclass(2025, 12, 31, 16, 21)), + ('2022-W52-7T20:21:59', + self.theclass(2023, 1, 1, 20, 21, 59)), + ('2022-W52-7T20:21:59.999999', + self.theclass(2023, 1, 1, 20, 21, 59, 999999)), + ('2025-W01003+00', + self.theclass(2024, 12, 30, 3, 0, tzinfo=UTC)), + ('2025-01-02T03:04:05+00', + self.theclass(2025, 1, 2, 3, 4, 5, tzinfo=UTC)), + ('2025-01-02T03:04:05Z', + self.theclass(2025, 1, 2, 3, 4, 5, tzinfo=UTC)), + ('2025-01-02003:04:05,6+00:00:00.00', + self.theclass(2025, 1, 2, 3, 4, 5, 600000, tzinfo=UTC)), + ('2000-01-01T00+21', + self.theclass(2000, 1, 1, 0, 0, tzinfo=timezone(timedelta(hours=21)))), + ('2025-01-02T03:05:06+0300', + self.theclass(2025, 1, 2, 3, 5, 6, + tzinfo=timezone(timedelta(hours=3)))), + ('2025-01-02T03:05:06-0300', + self.theclass(2025, 1, 2, 3, 5, 6, + tzinfo=timezone(timedelta(hours=-3)))), + ('2025-01-02T03:04:05+0000', + self.theclass(2025, 1, 2, 3, 4, 5, tzinfo=UTC)), + ('2025-01-02T03:05:06+03', + self.theclass(2025, 1, 2, 3, 5, 6, + tzinfo=timezone(timedelta(hours=3)))), + ('2025-01-02T03:05:06-03', + self.theclass(2025, 1, 2, 3, 5, 6, + tzinfo=timezone(timedelta(hours=-3)))), + ('2020-01-01T03:05:07.123457-05:00', + self.theclass(2020, 1, 1, 3, 5, 7, 123457, tzinfo=EST)), + ('2020-01-01T03:05:07.123457-0500', + self.theclass(2020, 1, 1, 3, 5, 7, 123457, tzinfo=EST)), + ('2020-06-01T04:05:06.111111-04:00', + self.theclass(2020, 6, 1, 4, 5, 6, 111111, tzinfo=EDT)), + ('2020-06-01T04:05:06.111111-0400', + self.theclass(2020, 6, 1, 4, 5, 6, 111111, tzinfo=EDT)), + ('2021-10-31T01:30:00.000000+01:00', + self.theclass(2021, 10, 31, 1, 30, tzinfo=BST)), + ('2021-10-31T01:30:00.000000+0100', + self.theclass(2021, 10, 31, 1, 30, tzinfo=BST)), + ('2025-01-02T03:04:05,6+000000.00', + self.theclass(2025, 1, 2, 3, 4, 5, 600000, tzinfo=UTC)), + ('2025-01-02T03:04:05,678+00:00:10', + self.theclass(2025, 1, 2, 3, 4, 5, 678000, + tzinfo=timezone(timedelta(seconds=10)))), ] - for input_str, expected in test_cases: + for input_str, expected in examples: with self.subTest(input_str=input_str): actual = self.theclass.fromisoformat(input_str) - self.assertEqual(actual, expected) def test_fromisoformat_fails_datetime(self): @@ -3990,6 +4138,58 @@ def test_fromisoformat_fractions(self): self.assertEqual(actual, expected) + def test_fromisoformat_time_examples(self): + examples = [ + ('0000', self.theclass(0, 0)), + ('00:00', self.theclass(0, 0)), + ('000000', self.theclass(0, 0)), + ('00:00:00', self.theclass(0, 0)), + ('000000.0', self.theclass(0, 0)), + ('00:00:00.0', self.theclass(0, 0)), + ('000000.000', self.theclass(0, 0)), + ('00:00:00.000', self.theclass(0, 0)), + ('000000.000000', self.theclass(0, 0)), + ('00:00:00.000000', self.theclass(0, 0)), + ('1200', self.theclass(12, 0)), + ('12:00', self.theclass(12, 0)), + ('120000', self.theclass(12, 0)), + ('12:00:00', self.theclass(12, 0)), + ('120000.0', self.theclass(12, 0)), + ('12:00:00.0', self.theclass(12, 0)), + ('120000.000', self.theclass(12, 0)), + ('12:00:00.000', self.theclass(12, 0)), + ('120000.000000', self.theclass(12, 0)), + ('12:00:00.000000', self.theclass(12, 0)), + ('2359', self.theclass(23, 59)), + ('23:59', self.theclass(23, 59)), + ('235959', self.theclass(23, 59, 59)), + ('23:59:59', self.theclass(23, 59, 59)), + ('235959.9', self.theclass(23, 59, 59, 900000)), + ('23:59:59.9', self.theclass(23, 59, 59, 900000)), + ('235959.999', self.theclass(23, 59, 59, 999000)), + ('23:59:59.999', self.theclass(23, 59, 59, 999000)), + ('235959.999999', self.theclass(23, 59, 59, 999999)), + ('23:59:59.999999', self.theclass(23, 59, 59, 999999)), + ('00:00:00Z', self.theclass(0, 0, tzinfo=timezone.utc)), + ('12:00:00+0000', self.theclass(12, 0, tzinfo=timezone.utc)), + ('12:00:00+00:00', self.theclass(12, 0, tzinfo=timezone.utc)), + ('00:00:00+05', + self.theclass(0, 0, tzinfo=timezone(timedelta(hours=5)))), + ('00:00:00+05:30', + self.theclass(0, 0, tzinfo=timezone(timedelta(hours=5, minutes=30)))), + ('12:00:00-05:00', + self.theclass(12, 0, tzinfo=timezone(timedelta(hours=-5)))), + ('12:00:00-0500', + self.theclass(12, 0, tzinfo=timezone(timedelta(hours=-5)))), + ('00:00:00,000-23:59:59.999999', + self.theclass(0, 0, tzinfo=timezone(-timedelta(hours=23, minutes=59, seconds=59, microseconds=999999)))), + ] + + for input_str, expected in examples: + with self.subTest(input_str=input_str): + actual = self.theclass.fromisoformat(input_str) + self.assertEqual(actual, expected) + def test_fromisoformat_fails(self): bad_strs = [ '', # Empty string From 3d24a1585e01c0a68a920e0f2e095da34f9d3d84 Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Mon, 2 May 2022 17:16:34 -0600 Subject: [PATCH 19/42] Try to be consistent about use of double quotes in error messages --- Lib/datetime.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/Lib/datetime.py b/Lib/datetime.py index 8dc31bca34c245..03dbb2f1b83cec 100644 --- a/Lib/datetime.py +++ b/Lib/datetime.py @@ -336,7 +336,7 @@ def _parse_isoformat_date(dtstr): dayno = 1 if len(dtstr) > pos: if (dtstr[pos:pos + 1] == '-') != has_sep: - raise ValueError('Inconsistent use of dash separator') + raise ValueError("Inconsistent use of dash separator") pos += has_sep @@ -347,7 +347,7 @@ def _parse_isoformat_date(dtstr): month = int(dtstr[pos:pos + 2]) pos += 2 if (dtstr[pos:pos + 1] == "-") != has_sep: - raise ValueError('Inconsistent use of dash separator') + raise ValueError("Inconsistent use of dash separator") pos += has_sep day = int(dtstr[pos:pos + 2]) @@ -366,7 +366,7 @@ def _parse_hh_mm_ss_ff(tstr): pos = 0 for comp in range(0, 3): if (len_str - pos) < 2: - raise ValueError('Incomplete time component') + raise ValueError("Incomplete time component") time_comps[comp] = int(tstr[pos:pos+2]) @@ -380,13 +380,13 @@ def _parse_hh_mm_ss_ff(tstr): break if has_sep and next_char != ':': - raise ValueError('Invalid time separator: %c' % next_char) + raise ValueError("Invalid time separator: %c" % next_char) pos += has_sep if pos < len_str: if tstr[pos] not in '.,': - raise ValueError('Invalid microsecond component') + raise ValueError("Invalid microsecond component") else: pos += 1 @@ -402,7 +402,7 @@ def _parse_hh_mm_ss_ff(tstr): time_comps[3] *= _FRACTION_CORRECTION[to_parse-1] if (len_remainder > to_parse and not tstr[(pos+to_parse):].isdigit()): - raise ValueError('Non-digit values in unparsed fraction') + raise ValueError("Non-digit values in unparsed fraction") return time_comps @@ -410,7 +410,7 @@ def _parse_isoformat_time(tstr): # Format supported is HH[:MM[:SS[.fff[fff]]]][+HH:MM[:SS[.ffffff]]] len_str = len(tstr) if len_str < 2: - raise ValueError('Isoformat time too short') + raise ValueError("Isoformat time too short") # This is equivalent to re.search('[+-Z]', tstr), but faster tz_pos = (tstr.find('-') + 1 or tstr.find('+') + 1 or tstr.find('Z') + 1) @@ -434,7 +434,7 @@ def _parse_isoformat_time(tstr): # HH:MM:SS.f+ len: 10+ if len(tzstr) in (1, 3): - raise ValueError('Malformed time zone string') + raise ValueError("Malformed time zone string") if tzstr == 'Z': tz_comps = (0, 0, 0, 0) From 661b1b014fdc7ba3223b069187f4afc3f2e50320 Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Mon, 2 May 2022 18:01:32 -0600 Subject: [PATCH 20/42] Update documentation --- Doc/library/datetime.rst | 82 +++++++++++++++++++++++----------------- 1 file changed, 47 insertions(+), 35 deletions(-) diff --git a/Doc/library/datetime.rst b/Doc/library/datetime.rst index ca17dc880cfb34..e7b088e99ceb71 100644 --- a/Doc/library/datetime.rst +++ b/Doc/library/datetime.rst @@ -526,18 +526,20 @@ Other constructors, all class methods: .. classmethod:: date.fromisoformat(date_string) - Return a :class:`date` corresponding to a *date_string* given in the format - ``YYYY-MM-DD``:: + Return a :class:`date` corresponding to a *date_string* given in any valid + ISO-8601 format: >>> from datetime import date >>> date.fromisoformat('2019-12-04') datetime.date(2019, 12, 4) - - This is the inverse of :meth:`date.isoformat`. It only supports the format - ``YYYY-MM-DD``. + >>> date.fromisoformat('20191204') + datetime.date(2019, 12, 4) + >>> date.fromisoformat('2021-W01-1') + datetime.date(2021, 1, 4) .. versionadded:: 3.7 - + .. versionchanged:: 3.11 + Previously, this method only supported the format ``YYYY-MM-DD``. .. classmethod:: date.fromisocalendar(year, week, day) @@ -710,8 +712,6 @@ Instance methods: >>> date(2002, 12, 4).isoformat() '2002-12-04' - This is the inverse of :meth:`date.fromisoformat`. - .. method:: date.__str__() For a date *d*, ``str(d)`` is equivalent to ``d.isoformat()``. @@ -994,31 +994,31 @@ Other constructors, all class methods: .. classmethod:: datetime.fromisoformat(date_string) - Return a :class:`.datetime` corresponding to a *date_string* in one of the - formats emitted by :meth:`date.isoformat` and :meth:`datetime.isoformat`. - - Specifically, this function supports strings in the format: - - .. code-block:: none - - YYYY-MM-DD[*HH[:MM[:SS[.fff[fff]]]][+HH:MM[:SS[.ffffff]]]] - - where ``*`` can match any single character. - - .. caution:: + Return a :class:`.datetime` corresponding to a *date_string* in any valid + ISO-8601 format, with the following exceptions: - This does *not* support parsing arbitrary ISO 8601 strings - it is only intended - as the inverse operation of :meth:`datetime.isoformat`. A more full-featured - ISO 8601 parser, ``dateutil.parser.isoparse`` is available in the third-party package - `dateutil `__. + 1. Time zone offsets may have fractional seconds. + 2. The `T` separator may be replaced by any single unicode character. + 3. You may *not* omit the separator character. + 4. Fractional seconds may have any number of digits (anything beyond 6 will + be truncated). + 5. Fractional hours and minutes are not supported. Examples:: >>> from datetime import datetime >>> datetime.fromisoformat('2011-11-04') datetime.datetime(2011, 11, 4, 0, 0) + >>> datetime.fromisoformat('20111104') + datetime.datetime(2011, 11, 4, 0, 0) >>> datetime.fromisoformat('2011-11-04T00:05:23') datetime.datetime(2011, 11, 4, 0, 5, 23) + >>> datetime.fromisoformat('2011-11-04T00:05:23Z') + datetime.datetime(2011, 11, 4, 0, 5, 23, tzinfo=datetime.timezone.utc) + >>> datetime.fromisoformat('20111104T000523') + datetime.datetime(2011, 11, 4, 0, 5, 23) + >>> datetime.fromisoformat('2021-W01-2T00:05:23.283') + datetime.datetime(2011, 1, 4, 0, 5, 23, 283000) >>> datetime.fromisoformat('2011-11-04 00:05:23.283') datetime.datetime(2011, 11, 4, 0, 5, 23, 283000) >>> datetime.fromisoformat('2011-11-04 00:05:23.283+00:00') @@ -1028,6 +1028,10 @@ Other constructors, all class methods: tzinfo=datetime.timezone(datetime.timedelta(seconds=14400))) .. versionadded:: 3.7 + .. versionchanged:: 3.11 + Previously, this method only supported formats that could be emitted by + :meth:`date.isoformat()` or :meth:`datetime.isoformat()`. + .. classmethod:: datetime.fromisocalendar(year, week, day) @@ -1763,30 +1767,38 @@ Other constructor: .. classmethod:: time.fromisoformat(time_string) - Return a :class:`.time` corresponding to a *time_string* in one of the - formats emitted by :meth:`time.isoformat`. Specifically, this function supports - strings in the format: - - .. code-block:: none - - HH[:MM[:SS[.fff[fff]]]][+HH:MM[:SS[.ffffff]]] + Return a :class:`.time` corresponding to a *time_string* in any valid + ISO-8601 format, with the following exceptions: - .. caution:: - - This does *not* support parsing arbitrary ISO 8601 strings. It is only - intended as the inverse operation of :meth:`time.isoformat`. + 1. Time zone offsets may have fractional seconds. + 2. The leading `T`, normally required in cases where there may be ambiguity between + a date and a time, is not required. + 3. Fractional seconds may have any number of digits (anything beyond 6 will + be truncated). + 4. Fractional hours and minutes are not supported. Examples:: >>> from datetime import time >>> time.fromisoformat('04:23:01') datetime.time(4, 23, 1) + >>> time.fromisoformat('T04:23:01') + datetime.time(4, 23, 1) + >>> time.fromisoformat('T042301') + datetime.time(4, 23, 1) >>> time.fromisoformat('04:23:01.000384') datetime.time(4, 23, 1, 384) + >>> time.fromisoformat('04:23:01,000') + datetime.time(4, 23, 1, 384) >>> time.fromisoformat('04:23:01+04:00') datetime.time(4, 23, 1, tzinfo=datetime.timezone(datetime.timedelta(seconds=14400))) + >>> time.fromisoformat('04:23:01Z') + datetime.time(4, 23, 1, tzinfo=datetime.timezone.utc) .. versionadded:: 3.7 + .. versionchanged:: 3.11 + Previously, this method only supported formats that could be emitted by + :meth:`time.isoformat()`. Instance methods: From 1defa1df9cad97baad854f6b19e6306113cfb2bf Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Mon, 2 May 2022 18:04:48 -0600 Subject: [PATCH 21/42] Remove isoformatter --- isoformatter.py | 247 ------------------------------------------------ 1 file changed, 247 deletions(-) delete mode 100644 isoformatter.py diff --git a/isoformatter.py b/isoformatter.py deleted file mode 100644 index 6530c3e39813fa..00000000000000 --- a/isoformatter.py +++ /dev/null @@ -1,247 +0,0 @@ -import functools -import itertools - -from datetime import date, datetime, time, timedelta, timezone - -# import hypothesis -from test.support.hypothesis_helper import hypothesis - -from test.isoformat_helper import IsoFormatter - -import unittest - - -def _valid_date_formats(): - return ("%Y-%m-%d", "%Y%m%d", "%G-W%V", "%GW%V", "%G-W%V-%u", "%GW%V%u") - - -def _valid_time_formats(max_precision=9): - subsecond_format_tuples = itertools.product( - ("%H:%M:%S", "%H%M%S"), - (f"%(f{prec})" for prec in range(1, max_precision)), - ) - subsecond_formats = ( - (".".join(comps), ",".join(comps)) for comps in subsecond_format_tuples - ) - time_formats = ("%H", "%H:%M", "%H:%M:%S", "%H%M", "%H%M%S") + tuple( - itertools.chain.from_iterable(subsecond_formats) - ) - - tz_formats = ("",) + tuple( - (f"[TZ:{tz_fmt}]" for tz_fmt in time_formats + ("Z",)) - ) - - return tuple(map("".join, itertools.product(time_formats, tz_formats))) - - -VALID_DATE_FORMATS = _valid_date_formats() -VALID_TIME_FORMATS = _valid_time_formats() - - -def _make_isoformatter_strategy(): - time_format = hypothesis.strategies.one_of( - hypothesis.strategies.just(()), # No time format - hypothesis.strategies.tuples( - hypothesis.strategies.one_of( - hypothesis.strategies.just("T"), # Shrink towards T and space - hypothesis.strategies.just(" "), - hypothesis.strategies.characters(), - ), - hypothesis.strategies.sampled_from(VALID_TIME_FORMATS), - ), - ) - - return hypothesis.strategies.tuples( - hypothesis.strategies.sampled_from(VALID_DATE_FORMATS), time_format - ).map(lambda x: IsoFormatter("".join((x[0],) + x[1]))) - - -DATE_ISOFORMATTERS = hypothesis.strategies.sampled_from(VALID_DATE_FORMATS).map( - IsoFormatter -) -TIME_ISOFORMATTERS = hypothesis.strategies.sampled_from(VALID_TIME_FORMATS).map( - IsoFormatter -) -ISOFORMATTERS = _make_isoformatter_strategy() -FIXED_TIMEZONES = hypothesis.strategies.timedeltas( - min_value=timedelta(hours=-23, minutes=59, seconds=59, microseconds=999999), - max_value=timedelta(hours=23, minutes=59, seconds=59, microseconds=999999), -).map(timezone) -TIMEZONES = hypothesis.strategies.one_of( - hypothesis.strategies.none(), - FIXED_TIMEZONES, - hypothesis.strategies.timezones(), -) - - -DEFAULT_D = date(2025, 1, 2) -DEFAULT_DT = datetime(2025, 1, 2, 3, 4, 5, 678901) -AWARE_UTC_DT = datetime(2025, 1, 2, 3, 4, 5, 678901, tzinfo=timezone.utc) -AWARE_POS_DT = datetime( - 2025, 1, 2, 3, 5, 6, 678901, tzinfo=timezone(timedelta(hours=3)) -) -AWARE_NEG_DT = datetime( - 2025, 1, 2, 3, 5, 6, 678901, tzinfo=timezone(-timedelta(hours=3)) -) - - -def _cross_product_examples(**kwargs): - params, values = zip(*kwargs.items()) - - example_stack = [] - for value_set in itertools.product(*values): - example_stack.append(hypothesis.example(**dict(zip(params, value_set)))) - - return functools.reduce(lambda a, b: a(b), example_stack) - - -class IsoFormatTest(unittest.TestCase): - @hypothesis.given( - d=hypothesis.strategies.dates(), - iso_formatter=DATE_ISOFORMATTERS, - ) - @_cross_product_examples( - d=[ - date(2025, 1, 2), - date(2000, 1, 1), - date(1, 1, 1), - date(9999, 12, 31), - ], - iso_formatter=map(IsoFormatter, ["%Y-%m-%d", "%Y%m%d"]), - ) - @_cross_product_examples( - d=[date(2025, 1, 2), date(2025, 12, 31), date(2023, 1, 1)], - iso_formatter=map( - IsoFormatter, ["%G-W%V", "%GW%V", "%G-W%V-%u", "%GW%V%u"] - ), - ) - def test_dates(self, d, iso_formatter): - input_str = iso_formatter.format(d) - actual = type(d).fromisoformat(input_str) - expected = iso_formatter.truncate(d) - - self.assertEqual( - actual, - expected, - f"\n{actual} != {expected}\n" - + f"actual = {actual!r}\n" - + f"expected = {expected!r}\n" - + f"input_str = {input_str}\n" - + f"formatter = {iso_formatter!r}", - ) - - @hypothesis.given( - t=hypothesis.strategies.times( - timezones=FIXED_TIMEZONES | hypothesis.strategies.none() - ), - iso_formatter=TIME_ISOFORMATTERS, - ) - @_cross_product_examples( - t=[ - time(0, 0), - time(12, 0), - time(23, 59, 59, 999999), - time(12, 0, tzinfo=timezone.utc), - time(12, 0, tzinfo=timezone(timedelta(hours=-5))), - ], - iso_formatter=map( - IsoFormatter, - [ - "%H:%M:%S", - "%H%M%S", - "%H:%M:%S.%f", - "%H%M%S.%f", - "%H:%M:%S[TZ:%H:%M]", - "%H:%M:%S[TZ:%H%M]", - ], - ), - ) - @hypothesis.example( - t=time(0, 0, tzinfo=timezone.utc), - iso_formatter=IsoFormatter("%H:%M:%S[TZ:Z]"), - ) - @_cross_product_examples( - t=[ - time(0, 0, tzinfo=timezone(timedelta(hours=5, minutes=30))), - ], - iso_formatter=map( - IsoFormatter, ("%H:%M:%S[TZ:%H]", "%H:%M:%S[TZ:%H:%M]") - ), - ) - def test_times(self, t, iso_formatter): - input_str = iso_formatter.format(t) - actual = type(t).fromisoformat(input_str) - expected = iso_formatter.truncate(t) - - self.assertEqual( - actual, - expected, - f"\n{actual} != {expected}\n" - + f"actual = {actual!r}\n" - + f"expected = {expected!r} \n" - + f"input_str = {input_str}\n" - + f"formatter = {iso_formatter!r}", - ) - - @unittest.skip("Broken atm") - @hypothesis.given( - dt=hypothesis.strategies.datetimes(timezones=TIMEZONES), - iso_formatter=ISOFORMATTERS, - ) - # fmt: off - @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y-%m-%d")) - @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y%m%d")) - @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y%m%dT%H")) - @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H")) - @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y%m%dT%H")) - @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y%m%dT%H:%M")) - @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M")) - @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H%M")) - @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M:%S")) - @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H%M%S")) - @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H%M")) - @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H%M%S")) - @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M:%S.%(f1)")) - @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M:%S,%(f1)")) - @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M:%S[TZ:Z]")) - @hypothesis.example(dt=AWARE_UTC_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M:%S[TZ:Z]")) - @hypothesis.example(dt=AWARE_POS_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M:%S[TZ:Z]")) - @hypothesis.example(dt=AWARE_UTC_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M:%S[TZ:%H]")) - @hypothesis.example(dt=AWARE_NEG_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M:%S[TZ:%H]")) - @hypothesis.example(dt=AWARE_POS_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M:%S[TZ:%H]")) - @hypothesis.example(dt=AWARE_UTC_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M:%S[TZ:%H%M]")) - @hypothesis.example(dt=AWARE_NEG_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M:%S[TZ:%H%M]")) - @hypothesis.example(dt=AWARE_POS_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M:%S[TZ:%H%M]")) - @hypothesis.example(dt=datetime(2000, 1, 1, - tzinfo=timezone(-timedelta(hours=-22, microseconds=1))), - iso_formatter=IsoFormatter("%Y-%m-%dT%H[TZ:%H]")) - @hypothesis.example(dt=AWARE_UTC_DT, - iso_formatter=IsoFormatter("%Y-%m-%d0%H:%M:%S,%(f1)[TZ:%H:%M:%S.%(f2)]")) - @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%G-W%V")) - @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%G-W%V-%u")) - @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%GW%V:%H")) - @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%GW%V5%H")) - @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%GW%V%u5%H")) - @hypothesis.example(dt=AWARE_UTC_DT, iso_formatter=IsoFormatter("%G-W%V0%H[TZ:%H]")) - # fmt: on - def test_fromisoformat(self, dt, iso_formatter): - - if "%G" in iso_formatter._format_str: - if ( - iso_formatter._format_str.startswith("%G-W%V-%u") - and len(iso_formatter._format_str) > 9 - ): - hypothesis.assume(not iso_formatter._format_str[9].isdigit()) - - input_str = iso_formatter.format(dt) - actual = datetime.fromisoformat(input_str) - expected = iso_formatter.truncate(dt) - - self.assertEqual( - actual, - expected, - f"\n{actual} != {expected}\n" - + f"actual = {actual!r}\n" - + f"expected = {expected!r} \n" - + f"input_str = {input_str}", - ) From 75de7a4b3c154e53868b2e7fe626631d4e53ee0e Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Tue, 3 May 2022 11:44:39 -0600 Subject: [PATCH 22/42] Update out-of-date comment --- Lib/datetime.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/datetime.py b/Lib/datetime.py index 03dbb2f1b83cec..b9ec9c6dc25aed 100644 --- a/Lib/datetime.py +++ b/Lib/datetime.py @@ -320,8 +320,8 @@ def _find_isoformat_separator(dtstr): def _parse_isoformat_date(dtstr): - # It is assumed that this function will only be called with a - # string of length exactly 10, and (though this is not used) ASCII-only + # It is assumed that this is an ASCII-only string of lengths 7, 8 or 10, + # see the comment on Modules/_datetimemodule.c:_find_isoformat_separator assert len(dtstr) in (7, 8, 10) year = int(dtstr[0:4]) has_sep = dtstr[4] == '-' From 07ee419cfd3c34568fa354751b8fa8c0adc7ac9b Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Tue, 3 May 2022 11:47:37 -0600 Subject: [PATCH 23/42] Only one space --- Lib/datetime.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/datetime.py b/Lib/datetime.py index b9ec9c6dc25aed..297cce51fe80b1 100644 --- a/Lib/datetime.py +++ b/Lib/datetime.py @@ -413,7 +413,7 @@ def _parse_isoformat_time(tstr): raise ValueError("Isoformat time too short") # This is equivalent to re.search('[+-Z]', tstr), but faster - tz_pos = (tstr.find('-') + 1 or tstr.find('+') + 1 or tstr.find('Z') + 1) + tz_pos = (tstr.find('-') + 1 or tstr.find('+') + 1 or tstr.find('Z') + 1) timestr = tstr[:tz_pos-1] if tz_pos > 0 else tstr time_comps = _parse_hh_mm_ss_ff(timestr) From 3d0fb7a751f1f8932fd3005c0bdf109c617e6c45 Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Tue, 3 May 2022 11:47:56 -0600 Subject: [PATCH 24/42] Explicitly handle 0-length tzstr --- Lib/datetime.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/datetime.py b/Lib/datetime.py index 297cce51fe80b1..855f63434cdffe 100644 --- a/Lib/datetime.py +++ b/Lib/datetime.py @@ -433,7 +433,7 @@ def _parse_isoformat_time(tstr): # HH:MM:SS len: 8 # HH:MM:SS.f+ len: 10+ - if len(tzstr) in (1, 3): + if len(tzstr) in (0, 1, 3): raise ValueError("Malformed time zone string") if tzstr == 'Z': From cc8c737f6c3d0b81eb5c4835da24f18b4d1e8af6 Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Tue, 3 May 2022 11:48:11 -0600 Subject: [PATCH 25/42] Raise exceptions from None --- Lib/datetime.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Lib/datetime.py b/Lib/datetime.py index 855f63434cdffe..0aec73580f5741 100644 --- a/Lib/datetime.py +++ b/Lib/datetime.py @@ -1838,13 +1838,15 @@ def fromisoformat(cls, date_string): date_components = _parse_isoformat_date(dstr) except ValueError: - raise ValueError(f'Invalid isoformat string: {date_string!r}') + raise ValueError( + f'Invalid isoformat string: {date_string!r}') from None if tstr: try: time_components = _parse_isoformat_time(tstr) except ValueError: - raise ValueError(f'Invalid isoformat string: {date_string!r}') + raise ValueError( + f'Invalid isoformat string: {date_string!r}') from None else: time_components = [0, 0, 0, 0, None] From 31bf63eeac6f8028845de37f631123f6bd35fa34 Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Tue, 3 May 2022 11:48:29 -0600 Subject: [PATCH 26/42] Add test cases around week 53 --- Lib/test/datetimetester.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Lib/test/datetimetester.py b/Lib/test/datetimetester.py index 526f683f17fb22..c33f60f4849f26 100644 --- a/Lib/test/datetimetester.py +++ b/Lib/test/datetimetester.py @@ -1863,6 +1863,12 @@ def test_fromisoformat_date_examples(self): ('2022-W52', self.theclass(2022, 12, 26)), ('2022W527', self.theclass(2023, 1, 1)), ('2022-W52-7', self.theclass(2023, 1, 1)), + ('2015W534', self.theclass(2015, 12, 31)), # Has week 53 + ('2015-W53-4', self.theclass(2015, 12, 31)), # Has week 53 + ('2015-W53-5', self.theclass(2016, 1, 1)), + ('2020W531', self.theclass(2020, 12, 28)), # Leap year + ('2020-W53-1', self.theclass(2020, 12, 28)), # Leap year + ('2020-W53-6', self.theclass(2021, 1, 2)), ] for input_str, expected in examples: @@ -1892,6 +1898,8 @@ def test_fromisoformat_fails(self): '2009-12-0a', # Invalid character in day '2009-01-32', # Invalid day '2009-02-29', # Invalid leap day + '2019-W53-1', # No week 53 in 2019 + '2020-W54-1', # No week 54 '2009\ud80002\ud80028', # Separators are surrogate codepoints ] @@ -3103,6 +3111,8 @@ def test_fromisoformat_datetime_examples(self): ('2025W014 121431', self.theclass(2025, 1, 2, 12, 14, 31)), ('2025W014T030405', self.theclass(2025, 1, 2, 3, 4, 5)), ('2025W014 030405', self.theclass(2025, 1, 2, 3, 4, 5)), + ('2020-W53-6T03:04:05', self.theclass(2021, 1, 2, 3, 4, 5)), + ('2020W537 03:04:05', self.theclass(2021, 1, 3, 3, 4, 5)), ('2025-W01-4T03:04:05', self.theclass(2025, 1, 2, 3, 4, 5)), ('2025-W01-4T03:04:05.678901', self.theclass(2025, 1, 2, 3, 4, 5, 678901)), From 5bfb3fce6c5379edfab3c307991c5dafc16ac36d Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Tue, 3 May 2022 11:48:50 -0600 Subject: [PATCH 27/42] Add examples around week 53 --- Lib/test/test_fromisoformat.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_fromisoformat.py b/Lib/test/test_fromisoformat.py index a41a5c0eb8ca4c..a8bc1dd47f06a2 100644 --- a/Lib/test/test_fromisoformat.py +++ b/Lib/test/test_fromisoformat.py @@ -475,7 +475,8 @@ class FromIsoformatDateTest_Fast(FromIsoformatDateTest_Base): iso_formatter=map(IsoFormatter, ["%Y-%m-%d", "%Y%m%d"]), ) @_cross_product_examples( - d=[date(2025, 1, 2), date(2025, 12, 31), date(2023, 1, 1)], + d=[date(2025, 1, 2), date(2025, 12, 31), date(2023, 1, 1), + date(2020, 12, 29), date(2021, 1, 1), date(2015, 12, 31)], iso_formatter=map( IsoFormatter, ["%G-W%V", "%GW%V", "%G-W%V-%u", "%GW%V%u"] ), From 4879a47d34bf812111b1b9dbae63b363400f3f71 Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Tue, 3 May 2022 11:57:28 -0600 Subject: [PATCH 28/42] Update docstrings --- Lib/datetime.py | 6 +++--- Modules/_datetimemodule.c | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Lib/datetime.py b/Lib/datetime.py index 0aec73580f5741..612dbbb253f596 100644 --- a/Lib/datetime.py +++ b/Lib/datetime.py @@ -978,7 +978,7 @@ def fromordinal(cls, n): @classmethod def fromisoformat(cls, date_string): - """Construct a date from the output of date.isoformat().""" + """Construct a date from a string in ISO 8601 format.""" if not isinstance(date_string, str): raise TypeError('fromisoformat: argument must be str') @@ -1534,7 +1534,7 @@ def isoformat(self, timespec='auto'): @classmethod def fromisoformat(cls, time_string): - """Construct a time from the output of isoformat().""" + """Construct a time from a string in one of the ISO 8601 formats.""" if not isinstance(time_string, str): raise TypeError('fromisoformat: argument must be str') @@ -1823,7 +1823,7 @@ def combine(cls, date, time, tzinfo=True): @classmethod def fromisoformat(cls, date_string): - """Construct a datetime from the output of datetime.isoformat().""" + """Construct a datetime from a string in one of the ISO 8601 formats.""" if not isinstance(date_string, str): raise TypeError('fromisoformat: argument must be str') diff --git a/Modules/_datetimemodule.c b/Modules/_datetimemodule.c index 219bd6d62de410..bbd5e5ecd70410 100644 --- a/Modules/_datetimemodule.c +++ b/Modules/_datetimemodule.c @@ -3565,7 +3565,7 @@ static PyMethodDef date_methods[] = { {"fromisoformat", (PyCFunction)date_fromisoformat, METH_O | METH_CLASS, - PyDoc_STR("str -> Construct a date from the output of date.isoformat()")}, + PyDoc_STR("str -> Construct a date from a string in ISO 8601 format.")}, {"fromisocalendar", _PyCFunction_CAST(date_fromisocalendar), METH_VARARGS | METH_KEYWORDS | METH_CLASS, @@ -4755,7 +4755,7 @@ static PyMethodDef time_methods[] = { PyDoc_STR("Return time with new specified fields.")}, {"fromisoformat", (PyCFunction)time_fromisoformat, METH_O | METH_CLASS, - PyDoc_STR("string -> time from time.isoformat() output")}, + PyDoc_STR("string -> time from a string in ISO 8601 format")}, {"__reduce_ex__", (PyCFunction)time_reduce_ex, METH_VARARGS, PyDoc_STR("__reduce_ex__(proto) -> (cls, state)")}, @@ -6543,7 +6543,7 @@ static PyMethodDef datetime_methods[] = { {"fromisoformat", (PyCFunction)datetime_fromisoformat, METH_O | METH_CLASS, - PyDoc_STR("string -> datetime from datetime.isoformat() output")}, + PyDoc_STR("string -> datetime from a string in most ISO 8601 formats")}, /* Instance methods: */ From 3cd657f2cabf27f5bfb2c76df1365dabee33ff14 Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Tue, 3 May 2022 12:11:47 -0600 Subject: [PATCH 29/42] Add news entry --- .../next/Library/2022-05-03-12-11-27.gh-issue-80010.yG54RE.rst | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2022-05-03-12-11-27.gh-issue-80010.yG54RE.rst diff --git a/Misc/NEWS.d/next/Library/2022-05-03-12-11-27.gh-issue-80010.yG54RE.rst b/Misc/NEWS.d/next/Library/2022-05-03-12-11-27.gh-issue-80010.yG54RE.rst new file mode 100644 index 00000000000000..bbcef471d2c0db --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-05-03-12-11-27.gh-issue-80010.yG54RE.rst @@ -0,0 +1,3 @@ +Add support for generalized ISO 8601 parsing to +:meth:`datetime.datetime.fromisoformat`, :meth:`datetime.date.fromisoformat` +and :meth:`datetime.time.fromisoformat`. Patch by Paul Ganssle. From 763d5bbaefae246019a049c38147f10d5735a50a Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Tue, 3 May 2022 12:29:03 -0600 Subject: [PATCH 30/42] Add what's new entry --- Doc/whatsnew/3.11.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Doc/whatsnew/3.11.rst b/Doc/whatsnew/3.11.rst index 2f32b56423de79..6bc0c23678b411 100644 --- a/Doc/whatsnew/3.11.rst +++ b/Doc/whatsnew/3.11.rst @@ -420,6 +420,14 @@ asyncio existing stream-based connections to TLS. (Contributed by Ian Good in :issue:`34975`.) +datetime +-------- + +* :meth:`datetime.date.fromisoformat`, :meth:`datetime.time.fromisoformat` and + :meth:`datetime.datetime.fromisoformat` can now be used to parse most ISO 8601 + formats (barring only those that support fractional hours and minutes). + (Contributed by Paul Ganssle in :gh:`80010`.) + fractions --------- From 3a06505549c75127d619874f3b3ed0f6aed125d9 Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Tue, 3 May 2022 16:31:42 -0600 Subject: [PATCH 31/42] Be consistent about ISO 8601 --- Doc/library/datetime.rst | 6 +++--- Lib/datetime.py | 2 +- Modules/_datetimemodule.c | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Doc/library/datetime.rst b/Doc/library/datetime.rst index e7b088e99ceb71..80781fbd3b9754 100644 --- a/Doc/library/datetime.rst +++ b/Doc/library/datetime.rst @@ -527,7 +527,7 @@ Other constructors, all class methods: .. classmethod:: date.fromisoformat(date_string) Return a :class:`date` corresponding to a *date_string* given in any valid - ISO-8601 format: + ISO 8601 format: >>> from datetime import date >>> date.fromisoformat('2019-12-04') @@ -995,7 +995,7 @@ Other constructors, all class methods: .. classmethod:: datetime.fromisoformat(date_string) Return a :class:`.datetime` corresponding to a *date_string* in any valid - ISO-8601 format, with the following exceptions: + ISO 8601 format, with the following exceptions: 1. Time zone offsets may have fractional seconds. 2. The `T` separator may be replaced by any single unicode character. @@ -1768,7 +1768,7 @@ Other constructor: .. classmethod:: time.fromisoformat(time_string) Return a :class:`.time` corresponding to a *time_string* in any valid - ISO-8601 format, with the following exceptions: + ISO 8601 format, with the following exceptions: 1. Time zone offsets may have fractional seconds. 2. The leading `T`, normally required in cases where there may be ambiguity between diff --git a/Lib/datetime.py b/Lib/datetime.py index 612dbbb253f596..a06d6dda871ebc 100644 --- a/Lib/datetime.py +++ b/Lib/datetime.py @@ -1538,7 +1538,7 @@ def fromisoformat(cls, time_string): if not isinstance(time_string, str): raise TypeError('fromisoformat: argument must be str') - # The spec actually requires that time-only ISO-8601 strings start with + # The spec actually requires that time-only ISO 8601 strings start with # T, but the extended format allows this to be omitted as long as there # is no ambiguity with date strings. time_string = time_string.removeprefix('T') diff --git a/Modules/_datetimemodule.c b/Modules/_datetimemodule.c index bbd5e5ecd70410..2c3d71c9476a42 100644 --- a/Modules/_datetimemodule.c +++ b/Modules/_datetimemodule.c @@ -4640,7 +4640,7 @@ time_fromisoformat(PyObject *cls, PyObject *tstr) { goto invalid_string_error; } - // The spec actually requires that time-only ISO-8601 strings start with + // The spec actually requires that time-only ISO 8601 strings start with // T, but the extended format allows this to be omitted as long as there // is no ambiguity with date strings. if (*p == 'T') { @@ -5269,7 +5269,7 @@ static PyObject * _sanitize_isoformat_str(PyObject *dtstr) { Py_ssize_t len = PyUnicode_GetLength(dtstr); - if (len < 7) { // All valid ISO8601 strings are at least 7 characters long + if (len < 7) { // All valid ISO 8601 strings are at least 7 characters long return NULL; } From e643f02372af955b881b41b2cfb6c65c53ad1951 Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Thu, 5 May 2022 07:31:56 -0600 Subject: [PATCH 32/42] Change name of isoformat separator detection function --- Lib/datetime.py | 10 +++++----- Modules/_datetimemodule.c | 7 +++---- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/Lib/datetime.py b/Lib/datetime.py index a06d6dda871ebc..db0187866420cc 100644 --- a/Lib/datetime.py +++ b/Lib/datetime.py @@ -262,8 +262,8 @@ def _wrap_strftime(object, format, timetuple): return _time.strftime(newformat, timetuple) # Helpers for parsing the result of isoformat() -def _find_isoformat_separator(dtstr): - # See the comment in _datetimemodule.c:_findisoformat_separator +def _find_isoformat_datetime_separator(dtstr): + # See the comment in _datetimemodule.c:_find_isoformat_datetime_separator len_dtstr = len(dtstr) if len_dtstr == 7: return 7 @@ -321,7 +321,7 @@ def _find_isoformat_separator(dtstr): def _parse_isoformat_date(dtstr): # It is assumed that this is an ASCII-only string of lengths 7, 8 or 10, - # see the comment on Modules/_datetimemodule.c:_find_isoformat_separator + # see the comment on Modules/_datetimemodule.c:_find_isoformat_datetime_separator assert len(dtstr) in (7, 8, 10) year = int(dtstr[0:4]) has_sep = dtstr[4] == '-' @@ -1832,7 +1832,7 @@ def fromisoformat(cls, date_string): # Split this at the separator try: - separator_location = _find_isoformat_separator(date_string) + separator_location = _find_isoformat_datetime_separator(date_string) dstr = date_string[0:separator_location] tstr = date_string[(separator_location+1):] @@ -2628,7 +2628,7 @@ def _name_from_offset(delta): _ord2ymd, _time, _time_class, _tzinfo_class, _wrap_strftime, _ymd2ord, _divide_and_round, _parse_isoformat_date, _parse_isoformat_time, _parse_hh_mm_ss_ff, _IsoCalendarDate, _isoweek_to_gregorian, - _find_isoformat_separator, _FRACTION_CORRECTION) + _find_isoformat_datetime_separator, _FRACTION_CORRECTION) # XXX Since import * above excludes names that start with _, # docstring does not get overwritten. In the future, it may be # appropriate to maintain a single module level docstring and diff --git a/Modules/_datetimemodule.c b/Modules/_datetimemodule.c index 2c3d71c9476a42..1aae2b63c3ac46 100644 --- a/Modules/_datetimemodule.c +++ b/Modules/_datetimemodule.c @@ -5324,7 +5324,7 @@ _sanitize_isoformat_str(PyObject *dtstr) #define MODE_AMBIGUOUS 2 static Py_ssize_t -_find_isoformat_separator(const char *dtstr, Py_ssize_t len, unsigned char* mode) { +_find_isoformat_datetime_separator(const char *dtstr, Py_ssize_t len) { // The valid date formats can all be distinguished by characters 4 and 5 // and further narrowed down by character // which tells us where to look for the separator character. @@ -5453,9 +5453,8 @@ datetime_fromisoformat(PyObject *cls, PyObject *dtstr) } } - unsigned char mode; - const Py_ssize_t separator_location = _find_isoformat_separator( - dt_ptr, len, &mode); + const Py_ssize_t separator_location = _find_isoformat_datetime_separator( + dt_ptr, len); const char *p = dt_ptr; From 504680908fb285e69fe31ecf5ea51839eabcba16 Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Thu, 5 May 2022 07:32:26 -0600 Subject: [PATCH 33/42] Remove 'mode' logic and update comments --- Modules/_datetimemodule.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/Modules/_datetimemodule.c b/Modules/_datetimemodule.c index 1aae2b63c3ac46..fb2a24a2b484ec 100644 --- a/Modules/_datetimemodule.c +++ b/Modules/_datetimemodule.c @@ -5319,9 +5319,6 @@ _sanitize_isoformat_str(PyObject *dtstr) return str_out; } -#define MODE_STANDARD 0 -#define MODE_ISOCALENDAR 1 -#define MODE_AMBIGUOUS 2 static Py_ssize_t _find_isoformat_datetime_separator(const char *dtstr, Py_ssize_t len) { @@ -5357,16 +5354,18 @@ _find_isoformat_datetime_separator(const char *dtstr, Py_ssize_t len) { assert(len > 7); - *mode = MODE_STANDARD; if (dtstr[4] == date_separator) { + // YYYY-??? + if (dtstr[5] == week_indicator) { - *mode = MODE_ISOCALENDAR; + // YYYY-W?? + if (len < 8) { return -1; } - // YYYY-Www-D (10) or YYYY-Www-HH (8) if (len > 8 && dtstr[8] == date_separator) { + // YYYY-Www-D (10) or YYYY-Www-HH (8) if (len == 9) { return -1; } if (len > 10 && is_digit(dtstr[10])) { // This is as far as we'll try to go to resolve the @@ -5377,7 +5376,6 @@ _find_isoformat_datetime_separator(const char *dtstr, Py_ssize_t len) { // likely that someone will use a hyphen as a separator // than a number, but at this point it's really best effort // because this is an extension of the spec anyway. - *mode = *mode | MODE_AMBIGUOUS; return 8; } @@ -5391,8 +5389,8 @@ _find_isoformat_datetime_separator(const char *dtstr, Py_ssize_t len) { return 10; } } else { + // YYYY??? if (dtstr[4] == week_indicator) { - *mode = MODE_ISOCALENDAR; // YYYYWww (7) or YYYYWwwd (8) size_t idx = 7; for (; idx < len; ++idx) { @@ -5407,7 +5405,7 @@ _find_isoformat_datetime_separator(const char *dtstr, Py_ssize_t len) { } if (idx % 2 == 0) { - // If the index of the last number is even, it's YYYYWwwd + // If the index of the last number is even, it's YYYYWww return 7; } else { return 8; From 90093bff9a4bcc44425c30acdd732e4b56d54db0 Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Thu, 5 May 2022 07:34:29 -0600 Subject: [PATCH 34/42] Fix segfault case --- Lib/test/datetimetester.py | 1 + Modules/_datetimemodule.c | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/Lib/test/datetimetester.py b/Lib/test/datetimetester.py index c33f60f4849f26..0495362b3f3693 100644 --- a/Lib/test/datetimetester.py +++ b/Lib/test/datetimetester.py @@ -3095,6 +3095,7 @@ def test_fromisoformat_datetime_examples(self): self.theclass(2025, 1, 2, 3, 4, 5, 678901)), ('20250102T030405.6789010', self.theclass(2025, 1, 2, 3, 4, 5, 678901)), + ('2022W01', self.theclass(2022, 1, 3)), ('2022W52520', self.theclass(2022, 12, 26, 20, 0)), ('2022W527520', self.theclass(2023, 1, 1, 20, 0)), ('2026W01516', self.theclass(2025, 12, 29, 16, 0)), diff --git a/Modules/_datetimemodule.c b/Modules/_datetimemodule.c index fb2a24a2b484ec..5506a28a908667 100644 --- a/Modules/_datetimemodule.c +++ b/Modules/_datetimemodule.c @@ -5352,7 +5352,9 @@ _find_isoformat_datetime_separator(const char *dtstr, Py_ssize_t len) { static const char date_separator = '-'; static const char week_indicator = 'W'; - assert(len > 7); + if (len == 7) { + return 7; + } if (dtstr[4] == date_separator) { // YYYY-??? From 6fc8157840b94feb9469a2332545321a17e27209 Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Thu, 5 May 2022 07:34:43 -0600 Subject: [PATCH 35/42] Explicitly cast signed to unsigned --- Modules/_datetimemodule.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/_datetimemodule.c b/Modules/_datetimemodule.c index 5506a28a908667..7ce67f81372f9f 100644 --- a/Modules/_datetimemodule.c +++ b/Modules/_datetimemodule.c @@ -5395,7 +5395,7 @@ _find_isoformat_datetime_separator(const char *dtstr, Py_ssize_t len) { if (dtstr[4] == week_indicator) { // YYYYWww (7) or YYYYWwwd (8) size_t idx = 7; - for (; idx < len; ++idx) { + for (; idx < (size_t)len; ++idx) { // Keep going until we run out of digits. if (!is_digit(dtstr[idx])) { break; From d9a766b9eee6bb96330112c68efee2952693feca Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Thu, 5 May 2022 07:51:24 -0600 Subject: [PATCH 36/42] Document that ordinal dates are not supported --- Doc/library/datetime.rst | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/Doc/library/datetime.rst b/Doc/library/datetime.rst index 80781fbd3b9754..196a6a3d2a7ff0 100644 --- a/Doc/library/datetime.rst +++ b/Doc/library/datetime.rst @@ -527,7 +527,7 @@ Other constructors, all class methods: .. classmethod:: date.fromisoformat(date_string) Return a :class:`date` corresponding to a *date_string* given in any valid - ISO 8601 format: + ISO 8601 format, except ordinal dates (e.g. ``YYYY-DDD``). >>> from datetime import date >>> date.fromisoformat('2019-12-04') @@ -999,10 +999,8 @@ Other constructors, all class methods: 1. Time zone offsets may have fractional seconds. 2. The `T` separator may be replaced by any single unicode character. - 3. You may *not* omit the separator character. - 4. Fractional seconds may have any number of digits (anything beyond 6 will - be truncated). - 5. Fractional hours and minutes are not supported. + 3. Ordinal dates are not currently supported. + 4. Fractional hours and minutes are not supported. Examples:: From 04ed787edc56b5ff16e20c7c3979558c14c723dc Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Thu, 5 May 2022 07:51:39 -0600 Subject: [PATCH 37/42] Remove dead code --- Lib/datetime.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/Lib/datetime.py b/Lib/datetime.py index db0187866420cc..56454babaaabbd 100644 --- a/Lib/datetime.py +++ b/Lib/datetime.py @@ -436,10 +436,7 @@ def _parse_isoformat_time(tstr): if len(tzstr) in (0, 1, 3): raise ValueError("Malformed time zone string") - if tzstr == 'Z': - tz_comps = (0, 0, 0, 0) - else: - tz_comps = _parse_hh_mm_ss_ff(tzstr) + tz_comps = _parse_hh_mm_ss_ff(tzstr) if all(x == 0 for x in tz_comps): tzi = timezone.utc From 6da3e901321c2cf462ec38af47b4da9686456293 Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Thu, 5 May 2022 08:16:09 -0600 Subject: [PATCH 38/42] Various fixes --- Lib/datetime.py | 18 ++++++++++-------- Modules/_datetimemodule.c | 36 +++++++++++++++++++----------------- 2 files changed, 29 insertions(+), 25 deletions(-) diff --git a/Lib/datetime.py b/Lib/datetime.py index 56454babaaabbd..afbb6fed2ecb68 100644 --- a/Lib/datetime.py +++ b/Lib/datetime.py @@ -262,6 +262,9 @@ def _wrap_strftime(object, format, timetuple): return _time.strftime(newformat, timetuple) # Helpers for parsing the result of isoformat() +def _is_ascii_digit(c): + return c in "0123456789" + def _find_isoformat_datetime_separator(dtstr): # See the comment in _datetimemodule.c:_find_isoformat_datetime_separator len_dtstr = len(dtstr) @@ -279,7 +282,7 @@ def _find_isoformat_datetime_separator(dtstr): if len_dtstr > 8 and dtstr[8] == date_separator: if len_dtstr == 9: raise ValueError("Invalid ISO string") - if len_dtstr > 10 and dtstr[10].isdigit(): + if len_dtstr > 10 and _is_ascii_digit(dtstr[10]): # This is as far as we need to resolve the ambiguity for # the moment - if we have YYYY-Www-##, the separator is # either a hyphen at 8 or a number at 10. @@ -302,7 +305,7 @@ def _find_isoformat_datetime_separator(dtstr): # YYYYWww (7) or YYYYWwwd (8) idx = 7 while idx < len_dtstr: - if not dtstr[idx].isdigit(): + if not _is_ascii_digit(dtstr[idx]): break idx += 1 @@ -342,7 +345,7 @@ def _parse_isoformat_date(dtstr): dayno = int(dtstr[pos:pos + 1]) - return _isoweek_to_gregorian(year, weekno, dayno) + return list(_isoweek_to_gregorian(year, weekno, dayno)) else: month = int(dtstr[pos:pos + 2]) pos += 2 @@ -401,7 +404,7 @@ def _parse_hh_mm_ss_ff(tstr): if to_parse < 6: time_comps[3] *= _FRACTION_CORRECTION[to_parse-1] if (len_remainder > to_parse - and not tstr[(pos+to_parse):].isdigit()): + and not all(map(_is_ascii_digit, tstr[(pos+to_parse):]))): raise ValueError("Non-digit values in unparsed fraction") return time_comps @@ -482,7 +485,7 @@ def _isoweek_to_gregorian(year, week, day): day_1 = _isoweek1monday(year) ord_day = day_1 + day_offset - return list(_ord2ymd(ord_day)) + return _ord2ymd(ord_day) # Just raise TypeError if the arg isn't None or a string. @@ -994,8 +997,6 @@ def fromisocalendar(cls, year, week, day): This is the inverse of the date.isocalendar() function""" return cls(*_isoweek_to_gregorian(year, week, day)) - return cls(*_ord2ymd(ord_day)) - # Conversions to string def __repr__(self): @@ -2625,7 +2626,8 @@ def _name_from_offset(delta): _ord2ymd, _time, _time_class, _tzinfo_class, _wrap_strftime, _ymd2ord, _divide_and_round, _parse_isoformat_date, _parse_isoformat_time, _parse_hh_mm_ss_ff, _IsoCalendarDate, _isoweek_to_gregorian, - _find_isoformat_datetime_separator, _FRACTION_CORRECTION) + _find_isoformat_datetime_separator, _FRACTION_CORRECTION, + _is_ascii_digit) # XXX Since import * above excludes names that start with _, # docstring does not get overwritten. In the future, it may be # appropriate to maintain a single module level docstring and diff --git a/Modules/_datetimemodule.c b/Modules/_datetimemodule.c index 7ce67f81372f9f..efb5278038f2f4 100644 --- a/Modules/_datetimemodule.c +++ b/Modules/_datetimemodule.c @@ -742,6 +742,9 @@ parse_isoformat_date(const char *dtstr, const size_t len, int *year, int *month, * 0: Success * -1: Failed to parse date component * -2: Inconsistent date separator usage + * -3: Failed to parse ISO week. + * -4: Failed to parse ISO day. + * -5, -6: Failure in iso_to_ymd */ const char *p = dtstr; p = parse_digits(p, year, 4); @@ -781,7 +784,7 @@ parse_isoformat_date(const char *dtstr, const size_t len, int *year, int *month, int rv = iso_to_ymd(*year, iso_week, iso_day, year, month, day); if (rv) { - return 3 - rv; + return -3 + rv; } else { return 0; } @@ -792,10 +795,8 @@ parse_isoformat_date(const char *dtstr, const size_t len, int *year, int *month, return -1; } - if (uses_separator) { - if (*(p++) != '-') { - return -2; - } + if (uses_separator && *(p++) != '-') { + return -2; } p = parse_digits(p, day, 2); if (p == NULL) { @@ -808,9 +809,11 @@ static int parse_hh_mm_ss_ff(const char *tstr, const char *tstr_end, int *hour, int *minute, int *second, int *microsecond) { + *hour = *minute = *second = *microsecond = 0; const char *p = tstr; const char *p_end = tstr_end; int *vals[3] = {hour, minute, second}; + // This is initialized to satisfy an erroneous compiler warning. unsigned char has_separator = 1; // Parse [HH[:?MM[:?SS]]] @@ -852,7 +855,7 @@ parse_hh_mm_ss_ff(const char *tstr, const char *tstr_end, int *hour, return -3; } - static int correction[5] = { + static int correction[] = { 100000, 10000, 1000, 100, 10 }; @@ -860,11 +863,8 @@ parse_hh_mm_ss_ff(const char *tstr, const char *tstr_end, int *hour, *microsecond *= correction[to_parse-1]; } - for (size_t i = 0; i < len_remains - 6; ++i) { - if (!is_digit(*p)) { - break; - } - p++; + while (is_digit(*p)){ + ++p; // skip truncated digits } // Return 1 if it's not the end of the string @@ -918,7 +918,7 @@ parse_isoformat_time(const char *dtstr, size_t dtlen, int *hour, int *minute, *tzmicrosecond = 0; if (*(tzinfo_pos + 1) != '\0') { - return -6; + return -5; } else { return 1; } @@ -933,7 +933,7 @@ parse_isoformat_time(const char *dtstr, size_t dtlen, int *hour, int *minute, *tzoffset = tzsign * ((tzhour * 3600) + (tzminute * 60) + tzsecond); *tzmicrosecond *= tzsign; - return rv ? -7 : 1; + return rv ? -5 : 1; } /* --------------------------------------------------------------------------- @@ -3120,7 +3120,7 @@ date_fromisocalendar(PyObject *cls, PyObject *args, PyObject *kw) } int month; - Py_ssize_t rv = iso_to_ymd(year, week, day, &year, &month, &day); + int rv = iso_to_ymd(year, week, day, &year, &month, &day); if (rv == -2) { @@ -4644,7 +4644,7 @@ time_fromisoformat(PyObject *cls, PyObject *tstr) { // T, but the extended format allows this to be omitted as long as there // is no ambiguity with date strings. if (*p == 'T') { - p += 1; + ++p; len -= 1; } @@ -5286,10 +5286,12 @@ _sanitize_isoformat_str(PyObject *dtstr) // in positions 7, 8 or 10. We'll check each of these for a surrogate and // if we find one, replace it with `T`. If there is more than one surrogate, // we don't have to bother sanitizing it, because the function will later - // fail when we try to convert the function into unicode characters. + // fail when we try to encode the string as ASCII. static const size_t potential_separators[3] = {7, 8, 10}; size_t surrogate_separator = 0; - for(size_t idx = 0; idx < 3; ++idx) { + for(size_t idx = 0; + idx < sizeof(potential_separators) / sizeof(*potential_separators); + ++idx) { size_t pos = potential_separators[idx]; if (pos > (size_t)len) { break; From 92cc0bee3593a8534a2cf221870db50384699a98 Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Thu, 5 May 2022 08:19:26 -0600 Subject: [PATCH 39/42] Fix example --- Doc/library/datetime.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/datetime.rst b/Doc/library/datetime.rst index 196a6a3d2a7ff0..1ce66322052387 100644 --- a/Doc/library/datetime.rst +++ b/Doc/library/datetime.rst @@ -1015,7 +1015,7 @@ Other constructors, all class methods: datetime.datetime(2011, 11, 4, 0, 5, 23, tzinfo=datetime.timezone.utc) >>> datetime.fromisoformat('20111104T000523') datetime.datetime(2011, 11, 4, 0, 5, 23) - >>> datetime.fromisoformat('2021-W01-2T00:05:23.283') + >>> datetime.fromisoformat('2011-W01-2T00:05:23.283') datetime.datetime(2011, 1, 4, 0, 5, 23, 283000) >>> datetime.fromisoformat('2011-11-04 00:05:23.283') datetime.datetime(2011, 11, 4, 0, 5, 23, 283000) From bec0bee0bccc337efb99f70e5f53a6ba9fc40e22 Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Thu, 5 May 2022 08:22:17 -0600 Subject: [PATCH 40/42] Add example for time.fromisoformat --- Doc/library/datetime.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Doc/library/datetime.rst b/Doc/library/datetime.rst index 1ce66322052387..7ade628cb34fcb 100644 --- a/Doc/library/datetime.rst +++ b/Doc/library/datetime.rst @@ -1792,6 +1792,9 @@ Other constructor: datetime.time(4, 23, 1, tzinfo=datetime.timezone(datetime.timedelta(seconds=14400))) >>> time.fromisoformat('04:23:01Z') datetime.time(4, 23, 1, tzinfo=datetime.timezone.utc) + >>> time.fromisoformat('04:23:01+00:00') + datetime.time(4, 23, 1, tzinfo=datetime.timezone.utc) + .. versionadded:: 3.7 .. versionchanged:: 3.11 From aad6011717954904ca8e9ec173dde55b02723204 Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Thu, 5 May 2022 08:22:51 -0600 Subject: [PATCH 41/42] Fix trailing colon --- Doc/library/datetime.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/datetime.rst b/Doc/library/datetime.rst index 7ade628cb34fcb..e0b28d7cb978d9 100644 --- a/Doc/library/datetime.rst +++ b/Doc/library/datetime.rst @@ -527,7 +527,7 @@ Other constructors, all class methods: .. classmethod:: date.fromisoformat(date_string) Return a :class:`date` corresponding to a *date_string* given in any valid - ISO 8601 format, except ordinal dates (e.g. ``YYYY-DDD``). + ISO 8601 format, except ordinal dates (e.g. ``YYYY-DDD``):: >>> from datetime import date >>> date.fromisoformat('2019-12-04') From a33d776ddcb0ae11362d19057ae842a2d1f10226 Mon Sep 17 00:00:00 2001 From: Paul Ganssle Date: Thu, 5 May 2022 11:03:09 -0600 Subject: [PATCH 42/42] Remove fromisoformat property test --- Lib/test/test_fromisoformat.py | 728 --------------------------------- 1 file changed, 728 deletions(-) delete mode 100644 Lib/test/test_fromisoformat.py diff --git a/Lib/test/test_fromisoformat.py b/Lib/test/test_fromisoformat.py deleted file mode 100644 index a8bc1dd47f06a2..00000000000000 --- a/Lib/test/test_fromisoformat.py +++ /dev/null @@ -1,728 +0,0 @@ -import functools -import itertools -import re -import sys -import unittest -import zoneinfo -from datetime import date, datetime, time, timedelta, timezone -from test.support.hypothesis_helper import hypothesis -from test.support.import_helper import import_fresh_module -from typing import Any - - -def _get_modules(): - import datetime as c_datetime - import zoneinfo as c_zoneinfo - - py_datetime = import_fresh_module( - "datetime", fresh=["datetime", "_strptime"], blocked=["_datetime"] - ) - - return c_datetime, py_datetime - - -(c_datetime, py_datetime) = _get_modules() - - -@functools.lru_cache -def make_timedelta(module, *args, **kwargs): - return module.timedelta(*args, **kwargs) - - -@functools.lru_cache -def make_cached_datetime(module, *args, **kwargs): - return module.datetime(*args, **kwargs) - - -class IsoFormatter: - """Helper class to make it possible to round-trip a given ISO 8601 format. - - The main problem this solves is that many ISO 8601 formats are lossy, e.g.:: - - >>> datetime(2022, 5, 19, 12, 30, 15).isoformat(timespec="hours") - 2022-05-19T12 - - This prevents us from easily writing tests that take arbitrary input - datetimes, serializes them to an arbitrary ISO 8601 format and ensures that - the same thing comes back when we try and parse it. - - This class allows you to specify an ISO 8601 format and generate both the - ISO 8601 string and the truncated datetime, like so: - - >>> formatter = IsoFormatter("%Y-%m-%dT%H") - >>> dt = datetime(2022, 5, 19, 12, 30, 15) - >>> formatter.format_datetime(dt) - "2022-05-19T12" - >>> formatter.truncate(dt) - datetime.datetime(2022, 5, 19, 12, 0) - """ - - _TZ_RE = re.compile(r"\[TZ:(?P[^\]]+)\]$") - _FLOAT_RE = re.compile(r"%\(f(?P\d+)\)$") - - def __init__(self, format_str, datetime_module=c_datetime): - self._format_str = format_str - self._module = datetime_module - - # Create instances of these unit values for convenience and performance. - self._MICROSECOND = make_timedelta(self._module, microseconds=1) - self._SECOND = make_timedelta(self._module, seconds=1) - self._MINUTE = make_timedelta(self._module, minutes=1) - self._HOUR = make_timedelta(self._module, hours=1) - self._ZERO = make_timedelta(self._module, 0) - self._ARBITRARY_DT = make_cached_datetime(self._module, 2000, 1, 1) - - if (m := self._TZ_RE.search(format_str)) is not None: - self._tz_str = m.group("fmt") - format_str = format_str[: m.start()] - else: - self._tz_str = None - - try: - time_str_start = format_str.index("%H") - except ValueError: - time_str_start = None - - if time_str_start is not None: - self._time_str = format_str[time_str_start:] - self._sep = format_str[time_str_start - 1] - self._date_str = format_str[: time_str_start - 1] - else: - self._time_str = None - self._sep = "" - self._date_str = format_str - - self._date_str = self._date_str.replace("%Y", "%4Y").replace( - "%G", "%4G" - ) - - self._populate_time() - self._populate_tz() - - if "W" in self._date_str: - expected_components = ("%4G", "%V") - else: - expected_components = ("%4Y", "%m", "%d") - - def __repr__(self): - return f"{self.__class__.__name__}('{self._format_str}')" - - def with_module(self, module): - if self._module is module: - return self - return self.__class__(self._format_str, datetime_module=module) - - def format_datetime(self, dt) -> str: - """Apply the specified ISO8601 format to a datetime.""" - return ( - f"{format(dt, self._date_str)}{self._sep}" - + f"{self._time_formatter(dt)}{self._tz_formatter(dt)}" - ) - - def format_date(self, dt) -> str: - return f"{format(dt, self._date_str)}" - - def format_time(self, dt) -> str: - return f"{self._time_formatter(dt)}{self._tz_formatter(dt)}" - - def truncate(self, dt): - """Truncate a datetime to the precision level of the format.""" - truncator = {} - if "W" in self._date_str and "%u" not in self._date_str: - iso_year, week, weekday = dt.isocalendar() - if weekday != 1: - truncated_dt = self._module.datetime.fromisocalendar( - iso_year, week, 1 - ) - for comp in ("year", "month", "day"): - if getattr(dt, comp) != ( - new_comp := getattr(truncated_dt, comp) - ): - truncator[comp] = new_comp - - if hasattr(dt, "tzinfo"): - truncator.update(self._time_truncator(dt)) - truncator.update(self._tz_truncator(dt)) - - if truncator: - return dt.replace(**truncator) - else: - return dt - - def _populate_time(self): - if self._time_str is not None: - time_formatter, time_truncation = self._make_timelike_formatter( - self._time_str - ) - self._time_formatter = time_formatter - self._time_truncator = self._make_time_truncator(time_truncation) - else: - self._time_formatter = self._null_formatter - self._time_truncator = self._make_time_truncator( - self._module.timedelta(days=1) - ) - - def _populate_tz(self): - if self._tz_str is not None: - if self._tz_str == "Z": - self._tz_formatter = self._tz_z_formatter - self._tz_truncator = self._make_tz_truncator(None) - else: - base_formatter, tz_truncation = self._make_timelike_formatter( - self._tz_str - ) - - self._tz_formatter = self._make_tz_formatter(base_formatter) - self._tz_truncator = self._make_tz_truncator(tz_truncation) - else: - self._tz_formatter = self._null_formatter - self._tz_truncator = self._remove_tzinfo_truncator - - def _make_timelike_formatter(self, time_str): - time_elements = ("%(f", "%S", "%M", "%H") - truncation_elements = (None, self._SECOND, self._MINUTE, self._HOUR) - - truncation = None - for i, elem in enumerate(time_elements): - if elem in time_str: - assert self._all_in( - time_str, time_elements[(i + 1) :] - ), f"Invalid time str: {time_str}" - truncation = truncation_elements[i] - break - else: - assert False, f"Invalid time str: {time_str}" - - if (m := self._FLOAT_RE.search(time_str)) is not None: - time_str = time_str[: m.start()] - - precision = int(m.group("prec")) - assert precision > 0, "0 and negative precision is not supported" - - truncation = self._module.timedelta( - microseconds=10 ** (6 - min(6, precision)) - ) - - def format_time(dt, *, time_str=time_str, precision=precision): - if precision < 7: - return ( - format(dt, time_str) - + f"{dt.microsecond:06d}"[0:precision] - ) - else: - return ( - format(dt, time_str) - + f"{dt.microsecond:06d}" - + "0" * (precision - 6) - ) - - else: - - def format_time(dt, *, time_str=time_str): - return format(dt, time_str) - - return format_time, truncation - - def _make_tz_formatter(self, base_formatter): - def tz_formatter(dt, *, _self=self, _base_formatter=base_formatter): - if dt.tzinfo is None: - return "" - utcoffset = dt.utcoffset() - - t = self._ARBITRARY_DT + abs(utcoffset) - - sign = "+" if utcoffset >= self._ZERO else "-" - - return sign + _base_formatter(t) - - return tz_formatter - - def _make_time_truncator(self, truncation): - if truncation is None: - - def time_truncator(dt): - return {} - - else: - - def time_truncator(dt, *, _time_truncation=truncation): - time_as_td = self._module.timedelta( - hours=dt.hour, - minutes=dt.minute, - seconds=dt.second, - microseconds=dt.microsecond, - ) - truncated = _time_truncation * (time_as_td // _time_truncation) - - if truncated == time_as_td: - return {} - - td_as_datetime = self._ARBITRARY_DT + truncated - return { - component: getattr(td_as_datetime, component) - for component in ("hour", "minute", "second", "microsecond") - } - - return time_truncator - - def _make_tz_truncator(self, truncation): - if truncation is None: - - def tz_truncator(dt): - return {} - - else: - - def tz_truncator(dt, *, _tz_truncation=truncation): - if dt.tzinfo is None: - return {} - - offset = dt.utcoffset() - sign = -1 if offset < self._ZERO else 1 - - tmp, remainder = divmod(abs(offset), _tz_truncation) - if not remainder: - return {} - - new_offset = tmp * _tz_truncation - new_tzinfo = self._module.timezone(sign * new_offset) - return {"tzinfo": new_tzinfo} - - return tz_truncator - - def _null_formatter(self, dt): - return "" - - def _remove_tzinfo_truncator(self, dt): - if dt.tzinfo is not None: - return {"tzinfo": None} - return {} - - def _tz_z_formatter(self, dt): - if dt.tzinfo is None: - return "" - - utcoffset = dt.utcoffset() - - if utcoffset == self._ZERO: - return "Z" - - if utcoffset < self._ZERO: - rv = "-" - else: - rv = "+" - - utcoffset = abs(utcoffset) - - hours, rem = divmod(utcoffset, self._HOUR) - - rv += f"{hours:02d}" - if not rem: - return rv - - minutes, rem = divmod(rem, self._MINUTE) - rv += f":{minutes:02d}" - if not rem: - return rv - - seconds, rem = divmod(rem, self._SECOND) - rv += f":{seconds:02d}" - if not rem: - return rv - - microseconds = rem // self._MICROSECOND - rv += f".{microseconds:06d}" - return rv - - @staticmethod - def _all_in(string, substrings): - for substring in substrings: - if substring not in string: - return False - return True - - -def _cross_product_examples(**kwargs): - """Adds the cross-product of multiple hypothesis examples. - - This is a helper function to make specifying a bunch of examples less - complicated. By example: - - @_cross_product_examples(a=[1, 2], b=["a", "b"]) - def test_x(a, b): - ... - - Is equivalent to this (order not guaranteed): - - @hypothesis.example(a=1, b="a") - @hypothesis.example(a=2, b="a") - @hypothesis.example(a=1, b="b") - @hypothesis.example(a=2, b="b") - def test_x(a, b): - ... - """ - params, values = zip(*kwargs.items()) - - def inner(f): - out = f - for value_set in itertools.product(*values): - out = hypothesis.example(**dict(zip(params, value_set)))(out) - return out - - return inner - - -################ -# Hypothesis strategies -def _valid_date_formats(): - return ("%Y-%m-%d", "%Y%m%d", "%G-W%V", "%GW%V", "%G-W%V-%u", "%GW%V%u") - - -def _valid_time_formats(max_precision=9): - subsecond_format_tuples = itertools.product( - ("%H:%M:%S", "%H%M%S"), - (f"%(f{prec})" for prec in range(1, max_precision)), - ) - subsecond_formats = ( - (".".join(comps), ",".join(comps)) for comps in subsecond_format_tuples - ) - time_formats = ("%H", "%H:%M", "%H:%M:%S", "%H%M", "%H%M%S") + tuple( - itertools.chain.from_iterable(subsecond_formats) - ) - - tz_formats = ("",) + tuple( - (f"[TZ:{tz_fmt}]" for tz_fmt in time_formats + ("Z",)) - ) - - return tuple(map("".join, itertools.product(time_formats, tz_formats))) - - -def _make_isoformatter_strategy(): - time_format = hypothesis.strategies.one_of( - hypothesis.strategies.just(()), # No time format - hypothesis.strategies.tuples( - hypothesis.strategies.one_of( - hypothesis.strategies.just("T"), # Shrink towards T and space - hypothesis.strategies.just(" "), - hypothesis.strategies.characters(), - ), - hypothesis.strategies.sampled_from(VALID_TIME_FORMATS), - ), - ) - - return hypothesis.strategies.tuples( - hypothesis.strategies.sampled_from(VALID_DATE_FORMATS), time_format - ).map(lambda x: IsoFormatter("".join((x[0],) + x[1]))) - - -VALID_DATE_FORMATS = _valid_date_formats() -VALID_TIME_FORMATS = _valid_time_formats() - -DATE_ISOFORMATTERS = hypothesis.strategies.sampled_from(VALID_DATE_FORMATS).map( - IsoFormatter -) -TIME_ISOFORMATTERS = hypothesis.strategies.sampled_from(VALID_TIME_FORMATS).map( - IsoFormatter -) -ISOFORMATTERS = _make_isoformatter_strategy() -FIXED_TIMEZONES = hypothesis.strategies.timedeltas( - min_value=timedelta(hours=-23, minutes=59, seconds=59, microseconds=999999), - max_value=timedelta(hours=23, minutes=59, seconds=59, microseconds=999999), -).map(timezone) -TIMEZONES = hypothesis.strategies.one_of( - hypothesis.strategies.none(), - FIXED_TIMEZONES, - hypothesis.strategies.timezones(), -) - -################ -# Constants -DEFAULT_DT = datetime(2025, 1, 2, 3, 4, 5, 678901) -AWARE_UTC_DT = datetime(2025, 1, 2, 3, 4, 5, 678901, tzinfo=timezone.utc) -AWARE_POS_DT = datetime( - 2025, 1, 2, 3, 5, 6, 678901, tzinfo=timezone(timedelta(hours=3)) -) -AWARE_NEG_DT = datetime( - 2025, 1, 2, 3, 5, 6, 678901, tzinfo=timezone(-timedelta(hours=3)) -) - - -################ -# Tests - - -class FromIsoformatDateTest_Base(unittest.TestCase): - @classmethod - def setUpClass(cls): - super().setUpClass() - cls.klass = cls.module.date - - -class FromIsoformatDateTest_Fast(FromIsoformatDateTest_Base): - module = c_datetime - - @hypothesis.given( - d=hypothesis.strategies.dates(), - iso_formatter=DATE_ISOFORMATTERS, - ) - @_cross_product_examples( - d=[ - date(2025, 1, 2), - date(2000, 1, 1), - date(1, 1, 1), - date(9999, 12, 31), - ], - iso_formatter=map(IsoFormatter, ["%Y-%m-%d", "%Y%m%d"]), - ) - @_cross_product_examples( - d=[date(2025, 1, 2), date(2025, 12, 31), date(2023, 1, 1), - date(2020, 12, 29), date(2021, 1, 1), date(2015, 12, 31)], - iso_formatter=map( - IsoFormatter, ["%G-W%V", "%GW%V", "%G-W%V-%u", "%GW%V%u"] - ), - ) - def test_fromisoformat_dates(self, d, iso_formatter): - iso_formatter = iso_formatter.with_module(self.module) - - if type(d) != self.klass: - d = self.klass(d.year, d.month, d.day) - - input_str = iso_formatter.format_date(d) - actual = self.klass.fromisoformat(input_str) - expected = iso_formatter.truncate(d) - - self.assertEqual( - actual, - expected, - f"\n{actual} != {expected}\n" - + f"actual = {actual!r}\n" - + f"expected = {expected!r}\n" - + f"input_str = {input_str}\n" - + f"formatter = {iso_formatter!r}", - ) - - -class FromIsoformatDateTest_Pure(FromIsoformatDateTest_Fast): - module = py_datetime - - -class FromIsoformatDateTimeTest_Fast(FromIsoformatDateTest_Fast): - module = c_datetime - - @classmethod - def setUpClass(cls): - super().setUpClass() - cls.klass = cls.module.datetime - - @hypothesis.given( - dt=hypothesis.strategies.datetimes(timezones=TIMEZONES), - iso_formatter=ISOFORMATTERS, - ) - # fmt: off - @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y-%m-%d")) - @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y%m%d")) - @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y%m%dT%H")) - @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H")) - @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y%m%dT%H")) - @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y%m%dT%H:%M")) - @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M")) - @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H%M")) - @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M:%S")) - @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H%M%S")) - @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H%M")) - @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H%M%S")) - @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M:%S.%(f1)")) - @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M:%S,%(f1)")) - @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M:%S[TZ:Z]")) - @hypothesis.example(dt=AWARE_UTC_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M:%S[TZ:Z]")) - @hypothesis.example(dt=AWARE_POS_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M:%S[TZ:Z]")) - @hypothesis.example(dt=AWARE_UTC_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M:%S[TZ:%H]")) - @hypothesis.example(dt=AWARE_NEG_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M:%S[TZ:%H]")) - @hypothesis.example(dt=AWARE_POS_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M:%S[TZ:%H]")) - @hypothesis.example(dt=AWARE_UTC_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M:%S[TZ:%H%M]")) - @hypothesis.example(dt=AWARE_NEG_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M:%S[TZ:%H%M]")) - @hypothesis.example(dt=AWARE_POS_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M:%S[TZ:%H%M]")) - @hypothesis.example(dt=datetime(2000, 1, 1, - tzinfo=timezone(-timedelta(hours=-22, microseconds=1))), - iso_formatter=IsoFormatter("%Y-%m-%dT%H[TZ:%H]")) - @hypothesis.example(dt=AWARE_UTC_DT, - iso_formatter=IsoFormatter("%Y-%m-%d0%H:%M:%S,%(f1)[TZ:%H:%M:%S.%(f2)]")) - @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%G-W%V")) - @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%G-W%V-%u")) - @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%GW%V:%H")) - @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%GW%V5%H")) - @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%GW%V%u5%H")) - @hypothesis.example(dt=AWARE_UTC_DT, iso_formatter=IsoFormatter("%G-W%V0%H[TZ:%H]")) - @hypothesis.example(dt=DEFAULT_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M:%S,%(f3)[TZ:Z]")) - @hypothesis.example(dt=DEFAULT_DT.replace(tzinfo=timezone(timedelta(seconds=10))), iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M:%S,%(f3)[TZ:Z]")) - @hypothesis.example(dt=AWARE_UTC_DT, iso_formatter=IsoFormatter("%Y-%m-%dT%H:%M:%S,%(f1)[TZ:%H%M%S.%(f2)]")) - @_cross_product_examples( - dt=[ - datetime(2020, 1, 1, 3, 5, 7, 123457, tzinfo=zoneinfo.ZoneInfo("America/New_York")), - datetime(2020, 6, 1, 4, 5, 6, 111111, tzinfo=zoneinfo.ZoneInfo("America/New_York")), - datetime(2021, 10, 31, 1, 30, tzinfo=zoneinfo.ZoneInfo("Europe/London")), - ], - iso_formatter=[ - IsoFormatter("%Y-%m-%dT%H:%M:%S.%(f6)[TZ:%H:%M]"), - IsoFormatter("%Y-%m-%dT%H:%M:%S.%(f6)[TZ:%H%M]"), - ]) - # fmt: on - def test_fromisoformat(self, dt, iso_formatter): - iso_formatter = iso_formatter.with_module(self.module) - - if dt.tzinfo is None or isinstance(dt.tzinfo, self.module.timezone): - new_tzinfo = dt.tzinfo - else: - new_offset = self.module.timedelta( - seconds=dt.utcoffset().total_seconds() - ) - new_tzinfo = self.module.timezone(new_offset, dt.tzname()) - - if not isinstance(dt, self.module.datetime): - dt = self.klass( - dt.year, - dt.month, - dt.day, - dt.hour, - dt.minute, - dt.second, - dt.microsecond, - tzinfo=new_tzinfo, - fold=dt.fold, - ) - elif dt.tzinfo is not new_tzinfo: - dt = dt.astimezone(new_tzinfo) - - if "%G" in iso_formatter._format_str: - if ( - iso_formatter._format_str.startswith("%G-W%V-%u") - and len(iso_formatter._format_str) > 9 - ): - hypothesis.assume(not iso_formatter._format_str[9].isdigit()) - - input_str = iso_formatter.format_datetime(dt) - actual = self.klass.fromisoformat(input_str) - expected = iso_formatter.truncate(dt) - - self.assertEqual( - actual, - expected, - f"\n{actual} != {expected}\n" - + f"actual = {actual!r}\n" - + f"expected = {expected!r} \n" - + f"input_str = {input_str}", - ) - - -class FromIsoformatDateTimeTest_Pure(FromIsoformatDateTimeTest_Fast): - module = py_datetime - - -class FromIsoformatTimeTest_Base(unittest.TestCase): - @classmethod - def setUpClass(cls): - super().setUpClass() - cls.klass = cls.module.time - - -class FromIsoformatTimeTest_Fast(FromIsoformatTimeTest_Base): - module = c_datetime - - @hypothesis.given( - t=hypothesis.strategies.times( - timezones=FIXED_TIMEZONES | hypothesis.strategies.none() - ), - iso_formatter=TIME_ISOFORMATTERS, - ) - @_cross_product_examples( - t=[ - time(0, 0), - time(12, 0), - time(23, 59, 59, 999999), - time(12, 0, tzinfo=timezone.utc), - time(12, 0, tzinfo=timezone(timedelta(hours=-5))), - ], - iso_formatter=map( - IsoFormatter, - [ - "%H:%M", - "T%H:%M", - "%H%M", - "%H:%M:%S", - "%H%M%S", - "%H:%M:%S.%(f6)", - "%H%M%S.%(f6)", - "%H:%M:%S.%(f3)", - "%H%M%S.%(f1)", - "%H%M%S.%(f3)", - "%H:%M:%S[TZ:%H:%M]", - "%H:%M:%S[TZ:%H%M]", - "T%H:%M:%S", - "T%H%M%S", - ], - ), - ) - @hypothesis.example( - t=time(0, 0, tzinfo=timezone.utc), - iso_formatter=IsoFormatter("%H:%M:%S[TZ:Z]"), - ) - @_cross_product_examples( - t=[ - time(0, 0, tzinfo=timezone(timedelta(hours=5, minutes=30))), - ], - iso_formatter=map( - IsoFormatter, ("%H:%M:%S[TZ:%H]", "%H:%M:%S[TZ:%H:%M]") - ), - ) - @hypothesis.example( - t=time( - 0, - 0, - tzinfo=timezone( - -timedelta( - hours=23, minutes=59, seconds=59, microseconds=999999 - ) - ), - ), - iso_formatter=IsoFormatter("%H:%M:%S,%(f3)[TZ:Z]"), - ) - def test_fromisoformat_times(self, t, iso_formatter): - iso_formatter = iso_formatter.with_module(self.module) - - if t.tzinfo is None or isinstance(t.tzinfo, self.module.timezone): - new_tzinfo = t.tzinfo - else: - new_offset = self.module.timedelta( - seconds=t.utcoffset().total_seconds() - ) - new_tzinfo = self.module.timezone(new_offset, t.tzname()) - - if not isinstance(t, self.module.time): - t = self.klass( - hour=t.hour, - minute=t.minute, - second=t.second, - microsecond=t.microsecond, - tzinfo=new_tzinfo, - fold=t.fold, - ) - elif t.tzinfo is not new_tzinfo: - t = t.replace(tzinfo=new_tzinfo) - - input_str = iso_formatter.format_time(t) - actual = self.klass.fromisoformat(input_str) - expected = iso_formatter.truncate(t) - - self.assertEqual( - actual, - expected, - f"\n{actual} != {expected}\n" - + f"actual = {actual!r}\n" - + f"expected = {expected!r} \n" - + f"input_str = {input_str}\n" - + f"formatter = {iso_formatter!r}", - ) - - -class FromIsoformatTimeTest_Pure(FromIsoformatTimeTest_Fast): - module = py_datetime