diff --git a/Lib/test/datetimetester.py b/Lib/test/datetimetester.py index c81408b344968d..47a2df1290b6ef 100644 --- a/Lib/test/datetimetester.py +++ b/Lib/test/datetimetester.py @@ -2955,6 +2955,16 @@ def test_more_strftime(self): except UnicodeEncodeError: pass + def test_strftime_embedded_nul(self): + # gh-124531: The null character should not terminate the format string. + t = self.theclass(2004, 12, 31, 6, 22, 33, 47) + self.assertEqual(t.strftime('\0'), '\0') + self.assertEqual(t.strftime('\0'*1000), '\0'*1000) + s1 = t.strftime('%c') + s2 = t.strftime('%x') + self.assertEqual(t.strftime('\0%c\0%x'), f'\0{s1}\0{s2}') + self.assertEqual(t.strftime('\0%c\0%x\0'), f'\0{s1}\0{s2}\0') + def test_extract(self): dt = self.theclass(2002, 3, 4, 18, 45, 3, 1234) self.assertEqual(dt.date(), date(2002, 3, 4)) @@ -3736,6 +3746,16 @@ def test_strftime(self): # gh-85432: The parameter was named "fmt" in the pure-Python impl. t.strftime(format="%f") + def test_strftime_embedded_nul(self): + # gh-124531: The null character should not terminate the format string. + t = self.theclass(1, 2, 3, 4) + self.assertEqual(t.strftime('\0'), '\0') + self.assertEqual(t.strftime('\0'*1000), '\0'*1000) + s1 = t.strftime('%Z') + s2 = t.strftime('%X') + self.assertEqual(t.strftime('\0%Z\0%X'), f'\0{s1}\0{s2}') + self.assertEqual(t.strftime('\0%Z\0%X\0'), f'\0{s1}\0{s2}\0') + def test_format(self): t = self.theclass(1, 2, 3, 4) self.assertEqual(t.__format__(''), str(t)) diff --git a/Lib/test/test_time.py b/Lib/test/test_time.py index 530c317a852e77..caaa20443989e9 100644 --- a/Lib/test/test_time.py +++ b/Lib/test/test_time.py @@ -182,8 +182,16 @@ def test_strftime(self): self.fail('conversion specifier: %r failed.' % format) self.assertRaises(TypeError, time.strftime, b'%S', tt) - # embedded null character - self.assertRaises(ValueError, time.strftime, '%S\0', tt) + + def test_strftime_embedded_nul(self): + # gh-124531: The null character should not terminate the format string. + tt = time.gmtime(self.t) + self.assertEqual(time.strftime('\0', tt), '\0') + self.assertEqual(time.strftime('\0'*1000, tt), '\0'*1000) + s1 = time.strftime('%c', tt) + s2 = time.strftime('%x', tt) + self.assertEqual(time.strftime('\0%c\0%x', tt), f'\0{s1}\0{s2}') + self.assertEqual(time.strftime('\0%c\0%x\0', tt), f'\0{s1}\0{s2}\0') def _bounds_checking(self, func): # Make sure that strftime() checks the bounds of the various parts diff --git a/Misc/NEWS.d/next/Library/2024-10-05-16-42-33.gh-issue-124531.1ZDOwp.rst b/Misc/NEWS.d/next/Library/2024-10-05-16-42-33.gh-issue-124531.1ZDOwp.rst new file mode 100644 index 00000000000000..eb7d2fc7674b33 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-05-16-42-33.gh-issue-124531.1ZDOwp.rst @@ -0,0 +1,4 @@ +Fix :func:`time.strftime`, the :meth:`~datetime.datetime.strftime` method of +the :mod:`datetime` classes :class:`~datetime.datetime`, +:class:`~datetime.date` and :class:`~datetime.time` and formatting of these +classes with format strings containing embedded null characters. diff --git a/Modules/_datetimemodule.c b/Modules/_datetimemodule.c index 90527d2a3e0350..0138a90a9f981a 100644 --- a/Modules/_datetimemodule.c +++ b/Modules/_datetimemodule.c @@ -1837,6 +1837,7 @@ wrap_strftime(PyObject *object, PyObject *format, PyObject *timetuple, PyObject *freplacement = NULL; /* py string, replacement for %f */ const char *pin; /* pointer to next char in input format */ + const char *pend; /* pointer past the end of input format */ Py_ssize_t flen; /* length of input format */ char ch; /* next char in input format */ @@ -1886,22 +1887,15 @@ wrap_strftime(PyObject *object, PyObject *format, PyObject *timetuple, pnew = PyBytes_AsString(newfmt); usednew = 0; - while ((ch = *pin++) != '\0') { - if (ch != '%') { - ptoappend = pin - 1; - ntoappend = 1; - } - else if ((ch = *pin++) == '\0') { - /* Null byte follows %, copy only '%'. - * - * Back the pin up one char so that we catch the null check - * the next time through the loop.*/ - pin--; + pend = pin + flen; + while (pin != pend) { + ch = *pin++; + if (ch != '%' || pin == pend) { ptoappend = pin - 1; ntoappend = 1; } /* A % has been seen and ch is the character after it. */ - else if (ch == 'z') { + else if ((ch = *pin++) == 'z') { /* %z -> +HHMM */ if (zreplacement == NULL) { zreplacement = make_somezreplacement(object, "", tzinfoarg); @@ -2035,12 +2029,10 @@ wrap_strftime(PyObject *object, PyObject *format, PyObject *timetuple, assert(usednew <= totalnew); } /* end while() */ - if (_PyBytes_Resize(&newfmt, usednew) < 0) - goto Done; { PyObject *format; - format = PyUnicode_FromString(PyBytes_AS_STRING(newfmt)); + format = PyUnicode_FromStringAndSize(PyBytes_AS_STRING(newfmt), usednew); if (format != NULL) { result = PyObject_CallFunctionObjArgs(strftime, format, timetuple, NULL); diff --git a/Modules/timemodule.c b/Modules/timemodule.c index 9720c201a184a8..e6b57f075288e2 100644 --- a/Modules/timemodule.c +++ b/Modules/timemodule.c @@ -787,16 +787,14 @@ time_strftime(PyObject *module, PyObject *args) PyObject *format; #endif PyObject *format_arg; + Py_ssize_t fmtsize; size_t fmtlen, buflen; time_char *outbuf = NULL; - size_t i; + size_t outsize, outpos; PyObject *ret = NULL; memset((void *) &buf, '\0', sizeof(buf)); - /* Will always expect a unicode string to be passed as format. - Given that there's no str type anymore in py3k this seems safe. - */ if (!PyArg_ParseTuple(args, "U|O:strftime", &format_arg, &tup)) return NULL; @@ -835,7 +833,7 @@ time_strftime(PyObject *module, PyObject *args) buf.tm_isdst = 1; #ifdef HAVE_WCSFTIME - format = PyUnicode_AsWideCharString(format_arg, NULL); + format = PyUnicode_AsWideCharString(format_arg, &fmtsize); if (format == NULL) return NULL; fmt = format; @@ -845,19 +843,20 @@ time_strftime(PyObject *module, PyObject *args) if (format == NULL) return NULL; fmt = PyBytes_AS_STRING(format); + fmtsize = PyBytes_GET_SIZE(format); #endif #if defined(MS_WINDOWS) && !defined(HAVE_WCSFTIME) /* check that the format string contains only valid directives */ - for (outbuf = strchr(fmt, '%'); - outbuf != NULL; - outbuf = strchr(outbuf+2, '%')) + for (const time_char *f = memchr(fmt, '%', fmtsize); + f != NULL; + f = memchr(f + 2, '%', fmtsize - (f + 2 - fmt))) { - if (outbuf[1] == '#') - ++outbuf; /* not documented by python, */ - if (outbuf[1] == '\0') + if (f[1] == '#') + ++f; /* not documented by python, */ + if (f + 1 >= fmt + fmtsize) break; - if ((outbuf[1] == 'y') && buf.tm_year < 0) { + if ((f[1] == 'y') && buf.tm_year < 0) { PyErr_SetString(PyExc_ValueError, "format %y requires year >= 1900 on Windows"); Py_DECREF(format); @@ -865,15 +864,15 @@ time_strftime(PyObject *module, PyObject *args) } } #elif (defined(_AIX) || (defined(__sun) && defined(__SVR4))) && defined(HAVE_WCSFTIME) - for (outbuf = wcschr(fmt, '%'); - outbuf != NULL; - outbuf = wcschr(outbuf+2, '%')) + for (const time_char *f = wmemchr(fmt, '%', fmtsize); + f != NULL; + f = wmemchr(f + 2, '%', fmtsize - (f + 2 - fmt))) { - if (outbuf[1] == L'\0') + if (f + 1 >= fmt + fmtsize) break; /* Issue #19634: On AIX, wcsftime("y", (1899, 1, 1, 0, 0, 0, 0, 0, 0)) returns "0/" instead of "99" */ - if (outbuf[1] == L'y' && buf.tm_year < 0) { + if (f[1] == L'y' && buf.tm_year < 0) { PyErr_SetString(PyExc_ValueError, "format %y requires year >= 1900 on AIX"); PyMem_Free(format); @@ -882,47 +881,71 @@ time_strftime(PyObject *module, PyObject *args) } #endif - fmtlen = time_strlen(fmt); - /* I hate these functions that presume you know how big the output * will be ahead of time... */ - for (i = 1024; ; i += i) { - outbuf = (time_char *)PyMem_Malloc(i*sizeof(time_char)); + outsize = fmtsize + 128; + outpos = 0; + fmtlen = time_strlen(fmt); + while (1) { + outbuf = (time_char *)PyMem_Realloc(outbuf, outsize*sizeof(time_char)); if (outbuf == NULL) { PyErr_NoMemory(); break; } + if (fmtlen == 0) { + /* Empty format string or leading or trailing NUL, + or consequent NULs. + strftime() on macOS does not work well with empty format string. + */ + if (outpos == outsize) { + outsize += outsize; + continue; + } + outbuf[outpos] = 0; + } + else { #if defined _MSC_VER && _MSC_VER >= 1400 && defined(__STDC_SECURE_LIB__) - errno = 0; + errno = 0; #endif - _Py_BEGIN_SUPPRESS_IPH - buflen = format_time(outbuf, i, fmt, &buf); - _Py_END_SUPPRESS_IPH + _Py_BEGIN_SUPPRESS_IPH + buflen = format_time(outbuf + outpos, outsize - outpos, fmt, &buf); + _Py_END_SUPPRESS_IPH #if defined _MSC_VER && _MSC_VER >= 1400 && defined(__STDC_SECURE_LIB__) - /* VisualStudio .NET 2005 does this properly */ - if (buflen == 0 && errno == EINVAL) { - PyErr_SetString(PyExc_ValueError, "Invalid format string"); - PyMem_Free(outbuf); - break; - } + /* VisualStudio .NET 2005 does this properly */ + if (buflen == 0 && errno == EINVAL) { + PyErr_SetString(PyExc_ValueError, "Invalid format string"); + break; + } #endif - if (buflen > 0 || i >= 256 * fmtlen) { + if (buflen == 0 && outsize - outpos < 256 * fmtlen) { + outsize += outsize; + continue; + } /* If the buffer is 256 times as long as the format, it's probably not failing for lack of room! More likely, the format yields an empty result, e.g. an empty format, or %Z when the timezone is unknown. */ + outpos += buflen; + } + if (fmtlen < (size_t)fmtsize) { + /* It was not terminating NUL, but an embedded NUL. + Skip the NUL and continue. */ + outpos++; + fmt += fmtlen + 1; + fmtsize -= fmtlen + 1; + fmtlen = time_strlen(fmt); + continue; + } #ifdef HAVE_WCSFTIME - ret = PyUnicode_FromWideChar(outbuf, buflen); + ret = PyUnicode_FromWideChar(outbuf, outpos); #else - ret = PyUnicode_DecodeLocaleAndSize(outbuf, buflen, "surrogateescape"); + ret = PyUnicode_DecodeLocaleAndSize(outbuf, outpos, "surrogateescape"); #endif - PyMem_Free(outbuf); - break; - } - PyMem_Free(outbuf); + break; } + PyMem_Free(outbuf); #ifdef HAVE_WCSFTIME PyMem_Free(format); #else