From 5cca9a482102c6c2801bd17a129a2505beb4cd1b Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 4 Oct 2023 16:33:37 +0200 Subject: [PATCH] Add PyUnicode_EqualToUTF8() function Add PyUnicode_EqualToUTF8() and PyUnicode_EqualToUTF8AndSize() functions. --- docs/api.rst | 8 ++++ docs/changelog.rst | 5 ++ pythoncapi_compat.h | 73 +++++++++++++++++++++++++++++ tests/test_pythoncapi_compat_cext.c | 35 ++++++++++++++ 4 files changed, 121 insertions(+) diff --git a/docs/api.rst b/docs/api.rst index 9204d9a..3725b4e 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -113,6 +113,14 @@ Python 3.13 Available on Python 3.5.2 and newer. +.. c:function:: int PyUnicode_EqualToUTF8(PyObject *unicode, const char *str) + + See `PyUnicode_EqualToUTF8() documentation `__. + +.. c:function:: int PyUnicode_EqualToUTF8AndSize(PyObject *unicode, const char *str, Py_ssize_t size) + + See `PyUnicode_EqualToUTF8AndSize() documentation `__. + Python 3.12 ----------- diff --git a/docs/changelog.rst b/docs/changelog.rst index 9fd3d06..4147a4a 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -1,6 +1,11 @@ Changelog ========= +* 2023-10-04: Add functions: + + * ``PyUnicode_EqualToUTF8()`` + * ``PyUnicode_EqualToUTF8AndSize()`` + * 2023-10-03: Add functions: * ``PyObject_VisitManagedDict()`` diff --git a/pythoncapi_compat.h b/pythoncapi_compat.h index dcd97ff..f5b1b78 100644 --- a/pythoncapi_compat.h +++ b/pythoncapi_compat.h @@ -939,6 +939,79 @@ PyThreadState_GetUnchecked(void) } #endif +// gh-110289 added PyUnicode_EqualToUTF8() and PyUnicode_EqualToUTF8AndSize() +// to Python 3.13.0a1 +#if PY_VERSION_HEX < 0x030D00A1 +static inline int +PyUnicode_EqualToUTF8AndSize(PyObject *unicode, const char *str, Py_ssize_t str_len) +{ + Py_ssize_t len; + const void *utf8; + PyObject *exc_type, *exc_value, *exc_tb; + int res; + + // API cannot report errors so save/restore the exception + PyErr_Fetch(&exc_type, &exc_value, &exc_tb); + + // Python 3.3.0a1 added PyUnicode_AsUTF8AndSize() +#if PY_VERSION_HEX >= 0x030300A1 + if (PyUnicode_IS_ASCII(unicode)) { + utf8 = PyUnicode_DATA(unicode); + len = PyUnicode_GET_LENGTH(unicode); + } + else { + utf8 = PyUnicode_AsUTF8AndSize(unicode, &len); + if (utf8 == NULL) { + // Memory allocation failure. The API cannot report error, + // so ignore the exception and return 0. + res = 0; + goto done; + } + } + + if (len != str_len) { + res = 0; + goto done; + } + res = (memcmp(utf8, str, (size_t)len) == 0); +#else + PyObject *bytes = PyUnicode_AsUTF8String(unicode); + if (bytes == NULL) { + // Memory allocation failure. The API cannot report error, + // so ignore the exception and return 0. + res = 0; + goto done; + } + +#if PY_VERSION_HEX >= 0x03000000 + len = PyBytes_GET_SIZE(bytes); + utf8 = PyBytes_AS_STRING(bytes); +#else + len = PyString_GET_SIZE(bytes); + utf8 = PyString_AS_STRING(bytes); +#endif + if (len != str_len) { + Py_DECREF(bytes); + res = 0; + goto done; + } + + res = (memcmp(utf8, str, (size_t)len) == 0); + Py_DECREF(bytes); +#endif + +done: + PyErr_Restore(exc_type, exc_value, exc_tb); + return res; +} + +static inline int +PyUnicode_EqualToUTF8(PyObject *unicode, const char *str) +{ + return PyUnicode_EqualToUTF8AndSize(unicode, str, (Py_ssize_t)strlen(str)); +} +#endif + #ifdef __cplusplus } diff --git a/tests/test_pythoncapi_compat_cext.c b/tests/test_pythoncapi_compat_cext.c index 78ee1c8..8d8ef38 100644 --- a/tests/test_pythoncapi_compat_cext.c +++ b/tests/test_pythoncapi_compat_cext.c @@ -1361,6 +1361,40 @@ test_managed_dict(PyObject *Py_UNUSED(module), PyObject *Py_UNUSED(args)) #endif // PY_VERSION_HEX >= 0x030B00A3 +static PyObject * +test_unicode(PyObject *Py_UNUSED(module), PyObject *Py_UNUSED(args)) +{ + PyObject *abc = PyUnicode_FromString("abc"); + if (abc == NULL) { + return NULL; + } + + PyObject *abc0def = PyUnicode_FromStringAndSize("abc\0def", 7); + if (abc == NULL) { + return NULL; + } + + // PyUnicode_EqualToUTF8() and PyUnicode_EqualToUTF8AndSize() can be called + // with an exception raised and they must not clear the current exception. + PyErr_NoMemory(); + + assert(PyUnicode_EqualToUTF8AndSize(abc, "abc", 3) == 1); + assert(PyUnicode_EqualToUTF8AndSize(abc, "Python", 6) == 0); + assert(PyUnicode_EqualToUTF8AndSize(abc0def, "abc\0def", 7) == 1); + + assert(PyUnicode_EqualToUTF8(abc, "abc") == 1); + assert(PyUnicode_EqualToUTF8(abc, "Python") == 0); + assert(PyUnicode_EqualToUTF8(abc0def, "abc\0def") == 0); + + assert(PyErr_ExceptionMatches(PyExc_MemoryError)); + PyErr_Clear(); + + Py_DECREF(abc); + Py_DECREF(abc0def); + Py_RETURN_NONE; +} + + static struct PyMethodDef methods[] = { {"test_object", test_object, METH_NOARGS, _Py_NULL}, {"test_py_is", test_py_is, METH_NOARGS, _Py_NULL}, @@ -1390,6 +1424,7 @@ static struct PyMethodDef methods[] = { #ifdef TEST_MANAGED_DICT {"test_managed_dict", test_managed_dict, METH_NOARGS, _Py_NULL}, #endif + {"test_unicode", test_unicode, METH_NOARGS, _Py_NULL}, {_Py_NULL, _Py_NULL, 0, _Py_NULL} };