From a9b1c0cc1f599500ca8026f8232bf65f4ee7aced Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Thu, 3 Oct 2024 14:35:35 +0200 Subject: [PATCH 01/11] fix OOB in `UnicodeError.__str__` --- Objects/exceptions.c | 119 ++++++++++++++++++++++--------------------- 1 file changed, 60 insertions(+), 59 deletions(-) diff --git a/Objects/exceptions.c b/Objects/exceptions.c index b3910855165494..0d1fd205068642 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -2994,52 +2994,53 @@ UnicodeEncodeError_init(PyObject *self, PyObject *args, PyObject *kwds) static PyObject * UnicodeEncodeError_str(PyObject *self) { - PyUnicodeErrorObject *uself = (PyUnicodeErrorObject *)self; - PyObject *result = NULL; - PyObject *reason_str = NULL; - PyObject *encoding_str = NULL; + PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; - if (!uself->object) + if (exc->object == NULL) { /* Not properly initialized. */ return PyUnicode_FromString(""); + } /* Get reason and encoding as strings, which they might not be if they've been modified after we were constructed. */ - reason_str = PyObject_Str(uself->reason); - if (reason_str == NULL) - goto done; - encoding_str = PyObject_Str(uself->encoding); - if (encoding_str == NULL) - goto done; + PyObject *reason = PyObject_Str(exc->reason); + if (reason == NULL) { + return NULL; + } + PyObject *encoding = PyObject_Str(exc->encoding); + if (encoding == NULL) { + Py_DECREF(reason); + return NULL; + } - if (uself->start < PyUnicode_GET_LENGTH(uself->object) && uself->end == uself->start+1) { - Py_UCS4 badchar = PyUnicode_ReadChar(uself->object, uself->start); + PyObject *res; + ssize_t len = PyUnicode_GET_LENGTH(exc->object); + ssize_t start = exc->start, end = exc->end; + + if ((start >= 0 && start < len) && (end >= 0 && end <= len) && end == start + 1) { + Py_UCS4 badchar = PyUnicode_ReadChar(exc->object, start); const char *fmt; - if (badchar <= 0xff) + if (badchar <= 0xff) { fmt = "'%U' codec can't encode character '\\x%02x' in position %zd: %U"; - else if (badchar <= 0xffff) + } + else if (badchar <= 0xffff) { fmt = "'%U' codec can't encode character '\\u%04x' in position %zd: %U"; - else + } + else { fmt = "'%U' codec can't encode character '\\U%08x' in position %zd: %U"; - result = PyUnicode_FromFormat( - fmt, - encoding_str, - (int)badchar, - uself->start, - reason_str); + } + res = PyUnicode_FromFormat(fmt, encoding, (int)badchar, start, reason); } else { - result = PyUnicode_FromFormat( + res = PyUnicode_FromFormat( "'%U' codec can't encode characters in position %zd-%zd: %U", - encoding_str, - uself->start, - uself->end-1, - reason_str); + encoding, start, end - 1, reason + ); } -done: - Py_XDECREF(reason_str); - Py_XDECREF(encoding_str); - return result; + + Py_DECREF(reason); + Py_DECREF(encoding); + return res; } static PyTypeObject _PyExc_UnicodeEncodeError = { @@ -3107,46 +3108,46 @@ UnicodeDecodeError_init(PyObject *self, PyObject *args, PyObject *kwds) static PyObject * UnicodeDecodeError_str(PyObject *self) { - PyUnicodeErrorObject *uself = (PyUnicodeErrorObject *)self; - PyObject *result = NULL; - PyObject *reason_str = NULL; - PyObject *encoding_str = NULL; + PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; - if (!uself->object) + if (!exc->object) { /* Not properly initialized. */ return PyUnicode_FromString(""); + } /* Get reason and encoding as strings, which they might not be if they've been modified after we were constructed. */ - reason_str = PyObject_Str(uself->reason); - if (reason_str == NULL) - goto done; - encoding_str = PyObject_Str(uself->encoding); - if (encoding_str == NULL) - goto done; + PyObject *reason = PyObject_Str(exc->reason); + if (reason == NULL) { + return NULL; + } + PyObject *encoding = PyObject_Str(exc->encoding); + if (encoding == NULL) { + Py_DECREF(reason); + return NULL; + } - if (uself->start < PyBytes_GET_SIZE(uself->object) && uself->end == uself->start+1) { - int byte = (int)(PyBytes_AS_STRING(((PyUnicodeErrorObject *)self)->object)[uself->start]&0xff); - result = PyUnicode_FromFormat( + PyObject *res; + ssize_t len = PyBytes_GET_SIZE(exc->object); + ssize_t start = exc->start, end = exc->end; + + if ((start >= 0 && start < len) && (end >= 0 && end <= len) && end == start + 1) { + int badbyte = (int)(PyBytes_AS_STRING(exc->object)[start] & 0xff); + res = PyUnicode_FromFormat( "'%U' codec can't decode byte 0x%02x in position %zd: %U", - encoding_str, - byte, - uself->start, - reason_str); + encoding, badbyte, start, reason + ); } else { - result = PyUnicode_FromFormat( + res = PyUnicode_FromFormat( "'%U' codec can't decode bytes in position %zd-%zd: %U", - encoding_str, - uself->start, - uself->end-1, - reason_str - ); + encoding, start, end - 1, reason + ); } -done: - Py_XDECREF(reason_str); - Py_XDECREF(encoding_str); - return result; + + Py_DECREF(reason); + Py_DECREF(encoding); + return res; } static PyTypeObject _PyExc_UnicodeDecodeError = { From 356742648151df9dc6d88c36c581a965566364b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Thu, 3 Oct 2024 14:39:46 +0200 Subject: [PATCH 02/11] blurb --- .../2024-10-03-14-39-41.gh-issue-123378.dCxANf.rst | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2024-10-03-14-39-41.gh-issue-123378.dCxANf.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-10-03-14-39-41.gh-issue-123378.dCxANf.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-10-03-14-39-41.gh-issue-123378.dCxANf.rst new file mode 100644 index 00000000000000..5cd34535d674d3 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-10-03-14-39-41.gh-issue-123378.dCxANf.rst @@ -0,0 +1,3 @@ +Fix a crash in the :meth:`~object.__str__` method of :exc:`UnicodeError` +objects when the :attr:`UnicodeError.start` and :attr:`UnicodeError.end` +values are invalid or out-of-range. Patch by Bénédikt Tran. From 43141c0bfd3f4ef4d96dd935e258def44e8fe975 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Thu, 3 Oct 2024 14:43:29 +0200 Subject: [PATCH 03/11] add tests --- Lib/test/test_exceptions.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/Lib/test/test_exceptions.py b/Lib/test/test_exceptions.py index ba858c49400911..cf81acd29a8e9e 100644 --- a/Lib/test/test_exceptions.py +++ b/Lib/test/test_exceptions.py @@ -8,6 +8,7 @@ import weakref import errno from codecs import BOM_UTF8 +from itertools import product from textwrap import dedent from test.support import (captured_stderr, check_impl_detail, @@ -1336,6 +1337,22 @@ def test_unicode_errors_no_object(self): for klass in klasses: self.assertEqual(str(klass.__new__(klass)), "") + def test_unicode_error_str_gh_123378(self): + for meth, start, end in product( + (str, repr), + range(-5, 5), + range(-5, 5), + ): + for obj in ('', 'a'): + with self.subTest(meth, obj=obj, start=start, end=end): + res = meth(UnicodeEncodeError('utf-8', obj, start, end, '')) + self.assertIsInstance(res, str) + + for obj in (b'', b'a'): + with self.subTest(meth, obj=obj, start=start, end=end): + res = meth(UnicodeDecodeError('utf-8', obj, start, end, '')) + self.assertIsInstance(res, str) + @no_tracing def test_badisinstance(self): # Bug #2542: if issubclass(e, MyException) raises an exception, From 0c613750ac03f6016d99bb015d347378dffff8b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Thu, 3 Oct 2024 14:52:31 +0200 Subject: [PATCH 04/11] small fixup --- Objects/exceptions.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Objects/exceptions.c b/Objects/exceptions.c index 0d1fd205068642..fa5dd4be3af0a0 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -3110,7 +3110,7 @@ UnicodeDecodeError_str(PyObject *self) { PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; - if (!exc->object) { + if (exc->object == NULL) { /* Not properly initialized. */ return PyUnicode_FromString(""); } From 80287f624058cd3ba13d10208e5a2d9de0883ac0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Thu, 3 Oct 2024 14:59:21 +0200 Subject: [PATCH 05/11] Fix Windows compilation --- Objects/exceptions.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Objects/exceptions.c b/Objects/exceptions.c index fa5dd4be3af0a0..64a6d7f5fc4c8a 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -3014,8 +3014,8 @@ UnicodeEncodeError_str(PyObject *self) } PyObject *res; - ssize_t len = PyUnicode_GET_LENGTH(exc->object); - ssize_t start = exc->start, end = exc->end; + Py_ssize_t len = PyUnicode_GET_LENGTH(exc->object); + Py_ssize_t start = exc->start, end = exc->end; if ((start >= 0 && start < len) && (end >= 0 && end <= len) && end == start + 1) { Py_UCS4 badchar = PyUnicode_ReadChar(exc->object, start); @@ -3128,8 +3128,8 @@ UnicodeDecodeError_str(PyObject *self) } PyObject *res; - ssize_t len = PyBytes_GET_SIZE(exc->object); - ssize_t start = exc->start, end = exc->end; + Py_ssize_t len = PyBytes_GET_SIZE(exc->object); + Py_ssize_t start = exc->start, end = exc->end; if ((start >= 0 && start < len) && (end >= 0 && end <= len) && end == start + 1) { int badbyte = (int)(PyBytes_AS_STRING(exc->object)[start] & 0xff); From 7bbc0f59b05d28ba855e1bf2bd16a66b7bbdb0d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 4 Oct 2024 13:54:16 +0200 Subject: [PATCH 06/11] improve test coverage --- Lib/test/test_exceptions.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/Lib/test/test_exceptions.py b/Lib/test/test_exceptions.py index cf81acd29a8e9e..c441e93ba77ad7 100644 --- a/Lib/test/test_exceptions.py +++ b/Lib/test/test_exceptions.py @@ -1338,20 +1338,20 @@ def test_unicode_errors_no_object(self): self.assertEqual(str(klass.__new__(klass)), "") def test_unicode_error_str_gh_123378(self): - for meth, start, end in product( + for fn, start, end, obj in product( (str, repr), range(-5, 5), range(-5, 5), + ('', 'a', '123', '1234', '12345', 'abc123'), ): - for obj in ('', 'a'): - with self.subTest(meth, obj=obj, start=start, end=end): - res = meth(UnicodeEncodeError('utf-8', obj, start, end, '')) - self.assertIsInstance(res, str) - - for obj in (b'', b'a'): - with self.subTest(meth, obj=obj, start=start, end=end): - res = meth(UnicodeDecodeError('utf-8', obj, start, end, '')) - self.assertIsInstance(res, str) + with self.subTest(fn, obj=obj, start=start, end=end): + exc = UnicodeEncodeError('utf-8', obj, start, end, '') + self.assertIsInstance(fn(exc), str) + + encoded = obj.encode() + with self.subTest(fn, obj=encoded, start=start, end=end): + exc = UnicodeDecodeError('utf-8', encoded, start, end, '') + self.assertIsInstance(fn(exc), str) @no_tracing def test_badisinstance(self): From 2313cd1def7843e9d5f42690805c7ca1373d8b8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 4 Oct 2024 14:21:33 +0200 Subject: [PATCH 07/11] fix `UnicodeTranslateError` --- Objects/exceptions.c | 55 ++++++++++++++++++++++---------------------- 1 file changed, 28 insertions(+), 27 deletions(-) diff --git a/Objects/exceptions.c b/Objects/exceptions.c index 64a6d7f5fc4c8a..587256f393f271 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -3205,46 +3205,47 @@ UnicodeTranslateError_init(PyUnicodeErrorObject *self, PyObject *args, static PyObject * UnicodeTranslateError_str(PyObject *self) { - PyUnicodeErrorObject *uself = (PyUnicodeErrorObject *)self; - PyObject *result = NULL; - PyObject *reason_str = NULL; + PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; - if (!uself->object) + if (exc->object == NULL) { /* Not properly initialized. */ return PyUnicode_FromString(""); + } /* Get reason as a string, which it might not be if it's been modified after we were constructed. */ - reason_str = PyObject_Str(uself->reason); - if (reason_str == NULL) - goto done; + PyObject *reason = PyObject_Str(exc->reason); + if (reason == NULL) { + return NULL; + } + + PyObject *res; + Py_ssize_t len = PyUnicode_GET_LENGTH(exc->object); + Py_ssize_t start = exc->start, end = exc->end; - if (uself->start < PyUnicode_GET_LENGTH(uself->object) && uself->end == uself->start+1) { - Py_UCS4 badchar = PyUnicode_ReadChar(uself->object, uself->start); + if ((start >= 0 && start < len) && (end >= 0 && end <= len) && end == start + 1) { + Py_UCS4 badchar = PyUnicode_ReadChar(exc->object, start); const char *fmt; - if (badchar <= 0xff) + if (badchar <= 0xff) { fmt = "can't translate character '\\x%02x' in position %zd: %U"; - else if (badchar <= 0xffff) + } + else if (badchar <= 0xffff) { fmt = "can't translate character '\\u%04x' in position %zd: %U"; - else + } + else { fmt = "can't translate character '\\U%08x' in position %zd: %U"; - result = PyUnicode_FromFormat( - fmt, - (int)badchar, - uself->start, - reason_str - ); - } else { - result = PyUnicode_FromFormat( + } + res = PyUnicode_FromFormat(fmt, (int)badchar, start, reason); + } + else { + res = PyUnicode_FromFormat( "can't translate characters in position %zd-%zd: %U", - uself->start, - uself->end-1, - reason_str - ); + start, end - 1, reason + ); } -done: - Py_XDECREF(reason_str); - return result; + + Py_DECREF(reason); + return res; } static PyTypeObject _PyExc_UnicodeTranslateError = { From a27a88bff15a3245e33fce0c305aa3db8a1f437b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 4 Oct 2024 14:19:00 +0200 Subject: [PATCH 08/11] improve test coverage --- Lib/test/test_exceptions.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/Lib/test/test_exceptions.py b/Lib/test/test_exceptions.py index c441e93ba77ad7..0cc2913d940664 100644 --- a/Lib/test/test_exceptions.py +++ b/Lib/test/test_exceptions.py @@ -1338,20 +1338,24 @@ def test_unicode_errors_no_object(self): self.assertEqual(str(klass.__new__(klass)), "") def test_unicode_error_str_gh_123378(self): - for fn, start, end, obj in product( + for formatter, start, end, obj in product( (str, repr), range(-5, 5), range(-5, 5), ('', 'a', '123', '1234', '12345', 'abc123'), ): - with self.subTest(fn, obj=obj, start=start, end=end): + with self.subTest(formatter, obj=obj, start=start, end=end): exc = UnicodeEncodeError('utf-8', obj, start, end, '') - self.assertIsInstance(fn(exc), str) + self.assertIsInstance(formatter(exc), str) + + with self.subTest(formatter, obj=obj, start=start, end=end): + exc = UnicodeTranslateError(obj, start, end, '') + self.assertIsInstance(formatter(exc), str) encoded = obj.encode() - with self.subTest(fn, obj=encoded, start=start, end=end): + with self.subTest(formatter, obj=encoded, start=start, end=end): exc = UnicodeDecodeError('utf-8', encoded, start, end, '') - self.assertIsInstance(fn(exc), str) + self.assertIsInstance(formatter(exc), str) @no_tracing def test_badisinstance(self): From 360c1a583ecaf8d1a978c0f4d8450f7c39e5e583 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 5 Oct 2024 12:22:37 +0200 Subject: [PATCH 09/11] revert cosmetic changes --- Objects/exceptions.c | 144 ++++++++++++++++++++++--------------------- 1 file changed, 74 insertions(+), 70 deletions(-) diff --git a/Objects/exceptions.c b/Objects/exceptions.c index 587256f393f271..ed3ea1f1272c64 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -2995,52 +2995,54 @@ static PyObject * UnicodeEncodeError_str(PyObject *self) { PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; + PyObject *result = NULL; + PyObject *reason_str = NULL; + PyObject *encoding_str = NULL; - if (exc->object == NULL) { + if (exc->object == NULL) /* Not properly initialized. */ return PyUnicode_FromString(""); - } /* Get reason and encoding as strings, which they might not be if they've been modified after we were constructed. */ - PyObject *reason = PyObject_Str(exc->reason); - if (reason == NULL) { - return NULL; - } - PyObject *encoding = PyObject_Str(exc->encoding); - if (encoding == NULL) { - Py_DECREF(reason); - return NULL; - } + reason_str = PyObject_Str(exc->reason); + if (reason_str == NULL) + goto done; + encoding_str = PyObject_Str(exc->encoding); + if (encoding_str == NULL) + goto done; - PyObject *res; Py_ssize_t len = PyUnicode_GET_LENGTH(exc->object); Py_ssize_t start = exc->start, end = exc->end; if ((start >= 0 && start < len) && (end >= 0 && end <= len) && end == start + 1) { Py_UCS4 badchar = PyUnicode_ReadChar(exc->object, start); const char *fmt; - if (badchar <= 0xff) { + if (badchar <= 0xff) fmt = "'%U' codec can't encode character '\\x%02x' in position %zd: %U"; - } - else if (badchar <= 0xffff) { + else if (badchar <= 0xffff) fmt = "'%U' codec can't encode character '\\u%04x' in position %zd: %U"; - } - else { + else fmt = "'%U' codec can't encode character '\\U%08x' in position %zd: %U"; - } - res = PyUnicode_FromFormat(fmt, encoding, (int)badchar, start, reason); + result = PyUnicode_FromFormat( + fmt, + encoding_str, + (int)badchar, + start, + reason_str); } else { - res = PyUnicode_FromFormat( + result = PyUnicode_FromFormat( "'%U' codec can't encode characters in position %zd-%zd: %U", - encoding, start, end - 1, reason - ); + encoding_str, + start, + end - 1, + reason_str); } - - Py_DECREF(reason); - Py_DECREF(encoding); - return res; +done: + Py_XDECREF(reason_str); + Py_XDECREF(encoding_str); + return result; } static PyTypeObject _PyExc_UnicodeEncodeError = { @@ -3109,45 +3111,47 @@ static PyObject * UnicodeDecodeError_str(PyObject *self) { PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; + PyObject *result = NULL; + PyObject *reason_str = NULL; + PyObject *encoding_str = NULL; - if (exc->object == NULL) { + if (exc->object == NULL) /* Not properly initialized. */ return PyUnicode_FromString(""); - } /* Get reason and encoding as strings, which they might not be if they've been modified after we were constructed. */ - PyObject *reason = PyObject_Str(exc->reason); - if (reason == NULL) { - return NULL; - } - PyObject *encoding = PyObject_Str(exc->encoding); - if (encoding == NULL) { - Py_DECREF(reason); - return NULL; - } + reason_str = PyObject_Str(exc->reason); + if (reason_str == NULL) + goto done; + encoding_str = PyObject_Str(exc->encoding); + if (encoding_str == NULL) + goto done; - PyObject *res; Py_ssize_t len = PyBytes_GET_SIZE(exc->object); Py_ssize_t start = exc->start, end = exc->end; if ((start >= 0 && start < len) && (end >= 0 && end <= len) && end == start + 1) { int badbyte = (int)(PyBytes_AS_STRING(exc->object)[start] & 0xff); - res = PyUnicode_FromFormat( + result = PyUnicode_FromFormat( "'%U' codec can't decode byte 0x%02x in position %zd: %U", - encoding, badbyte, start, reason - ); + encoding_str, + badbyte, + start, + reason_str); } else { - res = PyUnicode_FromFormat( + result = PyUnicode_FromFormat( "'%U' codec can't decode bytes in position %zd-%zd: %U", - encoding, start, end - 1, reason - ); + encoding_str, + start, + end - 1, + reason_str); } - - Py_DECREF(reason); - Py_DECREF(encoding); - return res; +done: + Py_XDECREF(reason_str); + Py_XDECREF(encoding_str); + return result; } static PyTypeObject _PyExc_UnicodeDecodeError = { @@ -3206,46 +3210,46 @@ static PyObject * UnicodeTranslateError_str(PyObject *self) { PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; + PyObject *result = NULL; + PyObject *reason_str = NULL; - if (exc->object == NULL) { + if (exc->object == NULL) /* Not properly initialized. */ return PyUnicode_FromString(""); - } /* Get reason as a string, which it might not be if it's been modified after we were constructed. */ - PyObject *reason = PyObject_Str(exc->reason); - if (reason == NULL) { - return NULL; - } + reason_str = PyObject_Str(exc->reason); + if (reason_str == NULL) + goto done; - PyObject *res; Py_ssize_t len = PyUnicode_GET_LENGTH(exc->object); Py_ssize_t start = exc->start, end = exc->end; if ((start >= 0 && start < len) && (end >= 0 && end <= len) && end == start + 1) { Py_UCS4 badchar = PyUnicode_ReadChar(exc->object, start); const char *fmt; - if (badchar <= 0xff) { + if (badchar <= 0xff) fmt = "can't translate character '\\x%02x' in position %zd: %U"; - } - else if (badchar <= 0xffff) { + else if (badchar <= 0xffff) fmt = "can't translate character '\\u%04x' in position %zd: %U"; - } - else { + else fmt = "can't translate character '\\U%08x' in position %zd: %U"; - } - res = PyUnicode_FromFormat(fmt, (int)badchar, start, reason); - } - else { - res = PyUnicode_FromFormat( + result = PyUnicode_FromFormat( + fmt, + (int)badchar, + start, + reason_str); + } else { + result = PyUnicode_FromFormat( "can't translate characters in position %zd-%zd: %U", - start, end - 1, reason - ); + start, + end - 1, + reason_str); } - - Py_DECREF(reason); - return res; +done: + Py_XDECREF(reason_str); + return result; } static PyTypeObject _PyExc_UnicodeTranslateError = { From 85199f81d34a6a45788ec8d89ee8ea039265dbce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 7 Oct 2024 16:21:15 +0200 Subject: [PATCH 10/11] add back PEP-7 braces --- Objects/exceptions.c | 45 +++++++++++++++++++++++++++++--------------- 1 file changed, 30 insertions(+), 15 deletions(-) diff --git a/Objects/exceptions.c b/Objects/exceptions.c index ed3ea1f1272c64..c685481b13a93a 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -2999,18 +2999,21 @@ UnicodeEncodeError_str(PyObject *self) PyObject *reason_str = NULL; PyObject *encoding_str = NULL; - if (exc->object == NULL) + if (exc->object == NULL) { /* Not properly initialized. */ return PyUnicode_FromString(""); + } /* Get reason and encoding as strings, which they might not be if they've been modified after we were constructed. */ reason_str = PyObject_Str(exc->reason); - if (reason_str == NULL) + if (reason_str == NULL) { goto done; + } encoding_str = PyObject_Str(exc->encoding); - if (encoding_str == NULL) + if (encoding_str == NULL) { goto done; + } Py_ssize_t len = PyUnicode_GET_LENGTH(exc->object); Py_ssize_t start = exc->start, end = exc->end; @@ -3018,12 +3021,15 @@ UnicodeEncodeError_str(PyObject *self) if ((start >= 0 && start < len) && (end >= 0 && end <= len) && end == start + 1) { Py_UCS4 badchar = PyUnicode_ReadChar(exc->object, start); const char *fmt; - if (badchar <= 0xff) + if (badchar <= 0xff) { fmt = "'%U' codec can't encode character '\\x%02x' in position %zd: %U"; - else if (badchar <= 0xffff) + } + else if (badchar <= 0xffff) { fmt = "'%U' codec can't encode character '\\u%04x' in position %zd: %U"; - else + } + else { fmt = "'%U' codec can't encode character '\\U%08x' in position %zd: %U"; + } result = PyUnicode_FromFormat( fmt, encoding_str, @@ -3115,18 +3121,21 @@ UnicodeDecodeError_str(PyObject *self) PyObject *reason_str = NULL; PyObject *encoding_str = NULL; - if (exc->object == NULL) + if (exc->object == NULL) { /* Not properly initialized. */ return PyUnicode_FromString(""); + } /* Get reason and encoding as strings, which they might not be if they've been modified after we were constructed. */ reason_str = PyObject_Str(exc->reason); - if (reason_str == NULL) + if (reason_str == NULL) { goto done; + } encoding_str = PyObject_Str(exc->encoding); - if (encoding_str == NULL) + if (encoding_str == NULL) { goto done; + } Py_ssize_t len = PyBytes_GET_SIZE(exc->object); Py_ssize_t start = exc->start, end = exc->end; @@ -3213,15 +3222,17 @@ UnicodeTranslateError_str(PyObject *self) PyObject *result = NULL; PyObject *reason_str = NULL; - if (exc->object == NULL) + if (exc->object == NULL) { /* Not properly initialized. */ return PyUnicode_FromString(""); + } /* Get reason as a string, which it might not be if it's been modified after we were constructed. */ reason_str = PyObject_Str(exc->reason); - if (reason_str == NULL) + if (reason_str == NULL) { goto done; + } Py_ssize_t len = PyUnicode_GET_LENGTH(exc->object); Py_ssize_t start = exc->start, end = exc->end; @@ -3229,18 +3240,22 @@ UnicodeTranslateError_str(PyObject *self) if ((start >= 0 && start < len) && (end >= 0 && end <= len) && end == start + 1) { Py_UCS4 badchar = PyUnicode_ReadChar(exc->object, start); const char *fmt; - if (badchar <= 0xff) + if (badchar <= 0xff) { fmt = "can't translate character '\\x%02x' in position %zd: %U"; - else if (badchar <= 0xffff) + } + else if (badchar <= 0xffff) { fmt = "can't translate character '\\u%04x' in position %zd: %U"; - else + } + else { fmt = "can't translate character '\\U%08x' in position %zd: %U"; + } result = PyUnicode_FromFormat( fmt, (int)badchar, start, reason_str); - } else { + } + else { result = PyUnicode_FromFormat( "can't translate characters in position %zd-%zd: %U", start, From 44dfaeb960454266cc2e96c01d05f2bc6325a673 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 8 Oct 2024 11:06:24 +0200 Subject: [PATCH 11/11] address Victor's review --- Lib/test/test_exceptions.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/Lib/test/test_exceptions.py b/Lib/test/test_exceptions.py index 0cc2913d940664..b3c21cd4f3d585 100644 --- a/Lib/test/test_exceptions.py +++ b/Lib/test/test_exceptions.py @@ -1337,25 +1337,28 @@ def test_unicode_errors_no_object(self): for klass in klasses: self.assertEqual(str(klass.__new__(klass)), "") - def test_unicode_error_str_gh_123378(self): - for formatter, start, end, obj in product( - (str, repr), + def test_unicode_error_str_does_not_crash(self): + # Test that str(UnicodeError(...)) does not crash. + # See https://github.com/python/cpython/issues/123378. + + for start, end, objlen in product( range(-5, 5), range(-5, 5), - ('', 'a', '123', '1234', '12345', 'abc123'), + range(7), ): - with self.subTest(formatter, obj=obj, start=start, end=end): + obj = 'a' * objlen + with self.subTest('encode', objlen=objlen, start=start, end=end): exc = UnicodeEncodeError('utf-8', obj, start, end, '') - self.assertIsInstance(formatter(exc), str) + self.assertIsInstance(str(exc), str) - with self.subTest(formatter, obj=obj, start=start, end=end): + with self.subTest('translate', objlen=objlen, start=start, end=end): exc = UnicodeTranslateError(obj, start, end, '') - self.assertIsInstance(formatter(exc), str) + self.assertIsInstance(str(exc), str) encoded = obj.encode() - with self.subTest(formatter, obj=encoded, start=start, end=end): + with self.subTest('decode', objlen=objlen, start=start, end=end): exc = UnicodeDecodeError('utf-8', encoded, start, end, '') - self.assertIsInstance(formatter(exc), str) + self.assertIsInstance(str(exc), str) @no_tracing def test_badisinstance(self):