From 7a1574dfcee9688813487ae6765ecc87ad57425b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 26 Aug 2024 17:40:48 +0200 Subject: [PATCH 01/29] Fix `PyUnicode{Encode,Decode}Error_GetStart`. This fixes a bug when the `start` value of a `UnicodeError` is 0 and the underlying object is an empty string or bytes. --- Objects/exceptions.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Objects/exceptions.c b/Objects/exceptions.c index fda62f159c1540..9bcdc88e1291ca 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -2751,7 +2751,7 @@ PyUnicodeEncodeError_GetStart(PyObject *exc, Py_ssize_t *start) if (*start<0) *start = 0; /*XXX check for values <0*/ if (*start>=size) - *start = size-1; + *start = size ? size-1 : 0; Py_DECREF(obj); return 0; } @@ -2769,7 +2769,7 @@ PyUnicodeDecodeError_GetStart(PyObject *exc, Py_ssize_t *start) if (*start<0) *start = 0; if (*start>=size) - *start = size-1; + *start = size ? size-1 : 0; Py_DECREF(obj); return 0; } From 6ef0c6d47a0b6b41ad5e1af4e77a2a6200297c23 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 27 Aug 2024 09:07:59 +0200 Subject: [PATCH 02/29] blurb --- .../next/C API/2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 Misc/NEWS.d/next/C API/2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst diff --git a/Misc/NEWS.d/next/C API/2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst b/Misc/NEWS.d/next/C API/2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst new file mode 100644 index 00000000000000..909eac97dde81e --- /dev/null +++ b/Misc/NEWS.d/next/C API/2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst @@ -0,0 +1,4 @@ +Ensure that *start* is correctly set by :c:func:`PyUnicodeEncodeError_GetStart` +and :c:func:`PyUnicodeDecodeError_GetStart` when :attr:`UnicodeError.start` is +*0* and the underlying :attr:`UnicodeEncode.object` is empty. +Patch by Bénédikt Tran. From 60ab0bb67f1b0469a2b76f9b25fc6f5ae2e89efb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 27 Aug 2024 09:48:12 +0200 Subject: [PATCH 03/29] add tests --- Lib/test/test_capi/test_exceptions.py | 46 +++++++++++++++++++++++++++ Modules/_testcapi/exceptions.c | 23 ++++++++++++++ 2 files changed, 69 insertions(+) diff --git a/Lib/test/test_capi/test_exceptions.py b/Lib/test/test_capi/test_exceptions.py index b22ddd8ad858d4..750712f629137b 100644 --- a/Lib/test/test_capi/test_exceptions.py +++ b/Lib/test/test_capi/test_exceptions.py @@ -415,6 +415,52 @@ def test_err_formatunraisable(self): # CRASHES formatunraisable(NULL, NULL) +class TestUnicodeError(unittest.TestCase): + + def test_unicode_encode_error_get_start(self): + test_func = _testcapi.unicode_encode_get_start + self._test_unicode_error_get_start('x', UnicodeEncodeError, test_func) + + def test_unicode_decode_error_get_start(self): + test_func = _testcapi.unicode_decode_get_start + self._test_unicode_error_get_start(b'x', UnicodeDecodeError, test_func) + + def _test_unicode_error_get_start(self, literal, exc_type, test_func): + for obj_len, py_start, c_start in [ + # normal cases + (5, 0, 0), + (5, 1, 1), + (5, 2, 2), + # negative start is clamped to 0 + (0, -1, 0), + (2, -1, 0), + # out of range start is clamped to max(0, obj_len - 1) + (0, 0, 0), + (0, 1, 0), + (0, 10, 0), + (2, 0, 0), + (5, 5, 4), + (5, 10, 4), + ]: + c_start_computed = py_start + if c_start_computed < 0: + c_start_computed = 0 + if c_start_computed >= obj_len: + if obj_len == 0: + c_start_computed = 0 + else: + c_start_computed = obj_len - 1 + + s = literal * obj_len + py_end = py_start + 1 + + with self.subTest(s, exc_type=exc_type, py_start=py_start, c_start=c_start): + self.assertEqual(c_start, c_start_computed) + exc = exc_type('utf-8', s, py_start, py_end, 'reason') + c_start_actual = test_func(exc) + self.assertEqual(c_start_actual, c_start) + + class Test_PyUnstable_Exc_PrepReraiseStar(ExceptionIsLikeMixin, unittest.TestCase): def setUp(self): diff --git a/Modules/_testcapi/exceptions.c b/Modules/_testcapi/exceptions.c index 316ef0e7ad7e55..a36bec3959acda 100644 --- a/Modules/_testcapi/exceptions.c +++ b/Modules/_testcapi/exceptions.c @@ -359,6 +359,27 @@ _testcapi_unstable_exc_prep_reraise_star_impl(PyObject *module, return PyUnstable_Exc_PrepReraiseStar(orig, excs); } +/* Test PyUnicodeEncodeError_GetStart */ +static PyObject * +unicode_encode_get_start(PyObject *Py_UNUSED(module), PyObject *arg) +{ + Py_ssize_t start; + if (PyUnicodeEncodeError_GetStart(arg, &start) < 0) { + return NULL; + } + RETURN_SIZE(start); +} + +/* Test PyUnicodeDecodeError_GetStart */ +static PyObject * +unicode_decode_get_start(PyObject *Py_UNUSED(module), PyObject *arg) +{ + Py_ssize_t start; + if (PyUnicodeDecodeError_GetStart(arg, &start) < 0) { + return NULL; + } + RETURN_SIZE(start); +} /* * Define the PyRecurdingInfinitelyError_Type @@ -403,6 +424,8 @@ static PyMethodDef test_methods[] = { _TESTCAPI_SET_EXCEPTION_METHODDEF _TESTCAPI_TRACEBACK_PRINT_METHODDEF _TESTCAPI_UNSTABLE_EXC_PREP_RERAISE_STAR_METHODDEF + {"unicode_encode_get_start", unicode_encode_get_start, METH_O}, + {"unicode_decode_get_start", unicode_decode_get_start, METH_O}, {NULL}, }; From 67b3d8ef92543dcb2501eedf31c48e323e3c7a7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 27 Aug 2024 09:56:36 +0200 Subject: [PATCH 04/29] fix NEWS --- .../next/C API/2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/C API/2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst b/Misc/NEWS.d/next/C API/2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst index 909eac97dde81e..922d0b34ec7546 100644 --- a/Misc/NEWS.d/next/C API/2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst +++ b/Misc/NEWS.d/next/C API/2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst @@ -1,4 +1,4 @@ Ensure that *start* is correctly set by :c:func:`PyUnicodeEncodeError_GetStart` and :c:func:`PyUnicodeDecodeError_GetStart` when :attr:`UnicodeError.start` is -*0* and the underlying :attr:`UnicodeEncode.object` is empty. +*0* and the underlying :attr:`UnicodeError.object` is empty. Patch by Bénédikt Tran. From 78fff57f57a0fc4a74d24deefa5608ea46265532 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 27 Aug 2024 10:29:39 +0200 Subject: [PATCH 05/29] remove a duplicated normal case --- Lib/test/test_capi/test_exceptions.py | 1 - 1 file changed, 1 deletion(-) diff --git a/Lib/test/test_capi/test_exceptions.py b/Lib/test/test_capi/test_exceptions.py index 750712f629137b..c3e49485677494 100644 --- a/Lib/test/test_capi/test_exceptions.py +++ b/Lib/test/test_capi/test_exceptions.py @@ -438,7 +438,6 @@ def _test_unicode_error_get_start(self, literal, exc_type, test_func): (0, 0, 0), (0, 1, 0), (0, 10, 0), - (2, 0, 0), (5, 5, 4), (5, 10, 4), ]: From a6e6f80b86b9b55c6f90d64895d1e44c9c6c6ff4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Wed, 28 Aug 2024 13:49:42 +0200 Subject: [PATCH 06/29] handle start < 0 --- Include/cpython/pyerrors.h | 2 +- Objects/exceptions.c | 67 ++++++++++++++++++++++++++++---------- 2 files changed, 50 insertions(+), 19 deletions(-) diff --git a/Include/cpython/pyerrors.h b/Include/cpython/pyerrors.h index b36b4681f5dddb..0aaf7388a4a662 100644 --- a/Include/cpython/pyerrors.h +++ b/Include/cpython/pyerrors.h @@ -44,7 +44,7 @@ typedef struct { PyException_HEAD PyObject *encoding; PyObject *object; - Py_ssize_t start; + Py_ssize_t start; // should be >= 0 Py_ssize_t end; PyObject *reason; } PyUnicodeErrorObject; diff --git a/Objects/exceptions.c b/Objects/exceptions.c index 9bcdc88e1291ca..8cd1746b7b928f 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -2741,17 +2741,17 @@ PyUnicodeTranslateError_GetObject(PyObject *exc) int PyUnicodeEncodeError_GetStart(PyObject *exc, Py_ssize_t *start) { - Py_ssize_t size; PyObject *obj = get_unicode(((PyUnicodeErrorObject *)exc)->object, "object"); - if (!obj) + if (!obj) { return -1; + } + Py_ssize_t size = PyUnicode_GET_LENGTH(obj); *start = ((PyUnicodeErrorObject *)exc)->start; - size = PyUnicode_GET_LENGTH(obj); - if (*start<0) - *start = 0; /*XXX check for values <0*/ - if (*start>=size) - *start = size ? size-1 : 0; + assert(*start >= 0); + if (*start >= size) { + *start = size ? size - 1 : 0; + } Py_DECREF(obj); return 0; } @@ -2760,16 +2760,16 @@ PyUnicodeEncodeError_GetStart(PyObject *exc, Py_ssize_t *start) int PyUnicodeDecodeError_GetStart(PyObject *exc, Py_ssize_t *start) { - Py_ssize_t size; PyObject *obj = get_string(((PyUnicodeErrorObject *)exc)->object, "object"); - if (!obj) + if (!obj) { return -1; - size = PyBytes_GET_SIZE(obj); + } + Py_ssize_t size = PyBytes_GET_SIZE(obj); *start = ((PyUnicodeErrorObject *)exc)->start; - if (*start<0) - *start = 0; - if (*start>=size) - *start = size ? size-1 : 0; + assert(*start >= 0); + if (*start >= size) { + *start = size ? size - 1 : 0; + } Py_DECREF(obj); return 0; } @@ -2785,6 +2785,10 @@ PyUnicodeTranslateError_GetStart(PyObject *exc, Py_ssize_t *start) int PyUnicodeEncodeError_SetStart(PyObject *exc, Py_ssize_t start) { + if (start < 0) { + PyErr_SetString(PyExc_ValueError, "'start' must be >= 0"); + return -1; + } ((PyUnicodeErrorObject *)exc)->start = start; return 0; } @@ -2793,6 +2797,10 @@ PyUnicodeEncodeError_SetStart(PyObject *exc, Py_ssize_t start) int PyUnicodeDecodeError_SetStart(PyObject *exc, Py_ssize_t start) { + if (start < 0) { + PyErr_SetString(PyExc_ValueError, "'start' must be >= 0"); + return -1; + } ((PyUnicodeErrorObject *)exc)->start = start; return 0; } @@ -2801,6 +2809,10 @@ PyUnicodeDecodeError_SetStart(PyObject *exc, Py_ssize_t start) int PyUnicodeTranslateError_SetStart(PyObject *exc, Py_ssize_t start) { + if (start < 0) { + PyErr_SetString(PyExc_ValueError, "'start' must be >= 0"); + return -1; + } ((PyUnicodeErrorObject *)exc)->start = start; return 0; } @@ -2980,8 +2992,12 @@ UnicodeEncodeError_init(PyObject *self, PyObject *args, PyObject *kwds) if (!PyArg_ParseTuple(args, "UUnnU", &err->encoding, &err->object, &err->start, &err->end, &err->reason)) { - err->encoding = err->object = err->reason = NULL; - return -1; + goto error; + } + + if (err->start < 0) { + PyErr_SetString(PyExc_ValueError, "'start' must be >= 0"); + goto error; } Py_INCREF(err->encoding); @@ -2989,6 +3005,9 @@ UnicodeEncodeError_init(PyObject *self, PyObject *args, PyObject *kwds) Py_INCREF(err->reason); return 0; +error: + err->encoding = err->object = err->reason = NULL; + return -1; } static PyObject * @@ -3086,6 +3105,11 @@ UnicodeDecodeError_init(PyObject *self, PyObject *args, PyObject *kwds) Py_INCREF(ude->object); Py_INCREF(ude->reason); + if (ude->start < 0) { + PyErr_SetString(PyExc_ValueError, "'start' must be >= 0"); + goto error; + } + if (!PyBytes_Check(ude->object)) { Py_buffer view; if (PyObject_GetBuffer(ude->object, &view, PyBUF_SIMPLE) != 0) @@ -3190,14 +3214,21 @@ UnicodeTranslateError_init(PyUnicodeErrorObject *self, PyObject *args, if (!PyArg_ParseTuple(args, "UnnU", &self->object, &self->start, &self->end, &self->reason)) { - self->object = self->reason = NULL; - return -1; + goto error; + } + + if (self->start < 0) { + PyErr_SetString(PyExc_ValueError, "'start' must be >= 0"); + goto error; } Py_INCREF(self->object); Py_INCREF(self->reason); return 0; +error: + self->object = self->reason = NULL; + return -1; } From 20c47ba557b1dcac343402d2b4ab23649b79dfd9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Wed, 28 Aug 2024 13:49:36 +0200 Subject: [PATCH 07/29] add C tests --- Modules/_testcapi/exceptions.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/Modules/_testcapi/exceptions.c b/Modules/_testcapi/exceptions.c index a36bec3959acda..47516881777ce9 100644 --- a/Modules/_testcapi/exceptions.c +++ b/Modules/_testcapi/exceptions.c @@ -381,6 +381,18 @@ unicode_decode_get_start(PyObject *Py_UNUSED(module), PyObject *arg) RETURN_SIZE(start); } +/* Test PyUnicodeTranslateError_GetStart */ +static PyObject * +unicode_translate_get_start(PyObject *Py_UNUSED(module), PyObject *arg) +{ + Py_ssize_t start; + if (PyUnicodeTranslateError_GetStart(arg, &start) < 0) { + return NULL; + } + RETURN_SIZE(start); +} + + /* * Define the PyRecurdingInfinitelyError_Type */ @@ -426,6 +438,7 @@ static PyMethodDef test_methods[] = { _TESTCAPI_UNSTABLE_EXC_PREP_RERAISE_STAR_METHODDEF {"unicode_encode_get_start", unicode_encode_get_start, METH_O}, {"unicode_decode_get_start", unicode_decode_get_start, METH_O}, + {"unicode_translate_get_start", unicode_translate_get_start, METH_O}, {NULL}, }; From 51bc77e33cf535dcae3f0dc5d35c74b92784ae87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Wed, 28 Aug 2024 13:49:31 +0200 Subject: [PATCH 08/29] add test coverage --- Lib/test/test_capi/test_exceptions.py | 32 +++++++++++++++++++++------ 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/Lib/test/test_capi/test_exceptions.py b/Lib/test/test_capi/test_exceptions.py index c3e49485677494..1ee55835b3b635 100644 --- a/Lib/test/test_capi/test_exceptions.py +++ b/Lib/test/test_capi/test_exceptions.py @@ -415,8 +415,28 @@ def test_err_formatunraisable(self): # CRASHES formatunraisable(NULL, NULL) +class PyUnicodeTranslateError(UnicodeTranslateError): + # UnicodeTranslateError takes 4 arguments instead of 5, + # so we just make a UnicodeTranslateError class that is + # compatible with the UnicodeError.__init__. + def __init__(self, encoding, *args, **kwargs): + super().__init__(*args) + + class TestUnicodeError(unittest.TestCase): + def test_unicode_error_start_value(self): + # negative start is not allowed + for exc_type, literal in [ + (UnicodeEncodeError, 'x'), + (UnicodeDecodeError, b'x'), + (PyUnicodeTranslateError, 'x'), + ]: + for obj_len in [0, 1, 2]: + s = literal * obj_len + with self.subTest(exc_type=exc_type, obj_len=obj_len): + self.assertRaises(ValueError, exc_type, 'utf-8', s, -1, 0, '?') + def test_unicode_encode_error_get_start(self): test_func = _testcapi.unicode_encode_get_start self._test_unicode_error_get_start('x', UnicodeEncodeError, test_func) @@ -425,15 +445,16 @@ def test_unicode_decode_error_get_start(self): test_func = _testcapi.unicode_decode_get_start self._test_unicode_error_get_start(b'x', UnicodeDecodeError, test_func) + def test_unicode_translate_error_get_start(self): + test_func = _testcapi.unicode_translate_get_start + self._test_unicode_error_get_start('x', PyUnicodeTranslateError, test_func) + def _test_unicode_error_get_start(self, literal, exc_type, test_func): for obj_len, py_start, c_start in [ # normal cases (5, 0, 0), (5, 1, 1), (5, 2, 2), - # negative start is clamped to 0 - (0, -1, 0), - (2, -1, 0), # out of range start is clamped to max(0, obj_len - 1) (0, 0, 0), (0, 1, 0), @@ -442,8 +463,6 @@ def _test_unicode_error_get_start(self, literal, exc_type, test_func): (5, 10, 4), ]: c_start_computed = py_start - if c_start_computed < 0: - c_start_computed = 0 if c_start_computed >= obj_len: if obj_len == 0: c_start_computed = 0 @@ -455,11 +474,10 @@ def _test_unicode_error_get_start(self, literal, exc_type, test_func): with self.subTest(s, exc_type=exc_type, py_start=py_start, c_start=c_start): self.assertEqual(c_start, c_start_computed) - exc = exc_type('utf-8', s, py_start, py_end, 'reason') + exc = exc_type('utf-8', s, py_start, py_end, 'why') c_start_actual = test_func(exc) self.assertEqual(c_start_actual, c_start) - class Test_PyUnstable_Exc_PrepReraiseStar(ExceptionIsLikeMixin, unittest.TestCase): def setUp(self): From b290e58441ae4ffe4735c6b4b854bdf279d88b7d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Wed, 28 Aug 2024 13:53:33 +0200 Subject: [PATCH 09/29] update docs --- Doc/c-api/exceptions.rst | 4 ++-- Doc/library/exceptions.rst | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/Doc/c-api/exceptions.rst b/Doc/c-api/exceptions.rst index 499bfb47cc4be5..8756b76e0a40ae 100644 --- a/Doc/c-api/exceptions.rst +++ b/Doc/c-api/exceptions.rst @@ -857,8 +857,8 @@ The following functions are used to create and modify Unicode exceptions from C. int PyUnicodeEncodeError_SetStart(PyObject *exc, Py_ssize_t start) int PyUnicodeTranslateError_SetStart(PyObject *exc, Py_ssize_t start) - Set the *start* attribute of the given exception object to *start*. Return - ``0`` on success, ``-1`` on failure. + Set the *start* attribute of the given exception object to *start*. *start* + must be non-negative. Return ``0`` on success, ``-1`` on failure. .. c:function:: int PyUnicodeDecodeError_GetEnd(PyObject *exc, Py_ssize_t *end) int PyUnicodeEncodeError_GetEnd(PyObject *exc, Py_ssize_t *end) diff --git a/Doc/library/exceptions.rst b/Doc/library/exceptions.rst index b5ba86f1b19223..55325ac9c1e11a 100644 --- a/Doc/library/exceptions.rst +++ b/Doc/library/exceptions.rst @@ -642,7 +642,8 @@ The following exceptions are the exceptions that are usually raised. .. attribute:: start - The first index of invalid data in :attr:`object`. + The first index of invalid data in :attr:`object`. This value + must be non-negative. .. attribute:: end From 75398a9a8494f8ae74ff481219a1272266b431c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Wed, 28 Aug 2024 13:54:48 +0200 Subject: [PATCH 10/29] fixup --- Lib/test/test_capi/test_exceptions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_capi/test_exceptions.py b/Lib/test/test_capi/test_exceptions.py index 1ee55835b3b635..2f3478042c5f46 100644 --- a/Lib/test/test_capi/test_exceptions.py +++ b/Lib/test/test_capi/test_exceptions.py @@ -420,7 +420,7 @@ class PyUnicodeTranslateError(UnicodeTranslateError): # so we just make a UnicodeTranslateError class that is # compatible with the UnicodeError.__init__. def __init__(self, encoding, *args, **kwargs): - super().__init__(*args) + super().__init__(*args, **kwargs) class TestUnicodeError(unittest.TestCase): From 546be87f5ac0c61c7985f22ed8e362f84843e431 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Wed, 28 Aug 2024 13:59:07 +0200 Subject: [PATCH 11/29] update blurb --- .../C API/2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/Misc/NEWS.d/next/C API/2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst b/Misc/NEWS.d/next/C API/2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst index 922d0b34ec7546..adb4ceeb82d95d 100644 --- a/Misc/NEWS.d/next/C API/2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst +++ b/Misc/NEWS.d/next/C API/2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst @@ -1,4 +1,5 @@ -Ensure that *start* is correctly set by :c:func:`PyUnicodeEncodeError_GetStart` -and :c:func:`PyUnicodeDecodeError_GetStart` when :attr:`UnicodeError.start` is -*0* and the underlying :attr:`UnicodeError.object` is empty. -Patch by Bénédikt Tran. +Ensure that *start* is correctly set on :exc:`UnicodeEncodeError` objects. +A negative *start* is not allowed by :c:func:`PyUnicodeEncodeError_SetStart` +and will not be returned by :c:func:`PyUnicodeEncodeError_GetStart`. Similar +arguments apply to :exc:`UnicodeDecodeError` and :exc:`UnicodeTranslateError` +and their corresponding C interface. Patch by Bénédikt Tran. From cded571ddbd58b6d52000ce719445f0ce3505f02 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Thu, 29 Aug 2024 10:13:40 +0200 Subject: [PATCH 12/29] address Victor's review --- Include/cpython/pyerrors.h | 2 +- Lib/test/test_capi/test_exceptions.py | 42 ++++----- Objects/exceptions.c | 129 +++++++++++++------------- 3 files changed, 82 insertions(+), 91 deletions(-) diff --git a/Include/cpython/pyerrors.h b/Include/cpython/pyerrors.h index 0aaf7388a4a662..7f15ef3ad08f25 100644 --- a/Include/cpython/pyerrors.h +++ b/Include/cpython/pyerrors.h @@ -44,7 +44,7 @@ typedef struct { PyException_HEAD PyObject *encoding; PyObject *object; - Py_ssize_t start; // should be >= 0 + Py_ssize_t start; // must be >= 0 Py_ssize_t end; PyObject *reason; } PyUnicodeErrorObject; diff --git a/Lib/test/test_capi/test_exceptions.py b/Lib/test/test_capi/test_exceptions.py index 2f3478042c5f46..4f0c74923b83d2 100644 --- a/Lib/test/test_capi/test_exceptions.py +++ b/Lib/test/test_capi/test_exceptions.py @@ -5,6 +5,7 @@ import unittest import textwrap +from itertools import product from test import support from test.support import import_helper from test.support.os_helper import TESTFN, TESTFN_UNDECODABLE @@ -425,31 +426,31 @@ def __init__(self, encoding, *args, **kwargs): class TestUnicodeError(unittest.TestCase): - def test_unicode_error_start_value(self): + def test_unicode_error_init_start_value(self): # negative start is not allowed - for exc_type, literal in [ + for (exc_type, literal), obj_len, start in product([ (UnicodeEncodeError, 'x'), (UnicodeDecodeError, b'x'), (PyUnicodeTranslateError, 'x'), - ]: - for obj_len in [0, 1, 2]: - s = literal * obj_len - with self.subTest(exc_type=exc_type, obj_len=obj_len): - self.assertRaises(ValueError, exc_type, 'utf-8', s, -1, 0, '?') + ], range(3), [-2, -1]): + obj = literal * obj_len + for end in [0, start + 1]: + with self.subTest(start=start, end=end, exc_type=exc_type, obj_len=obj_len): + self.assertRaises(ValueError, exc_type, 'utf-8', obj, start, end, 'reason') def test_unicode_encode_error_get_start(self): - test_func = _testcapi.unicode_encode_get_start - self._test_unicode_error_get_start('x', UnicodeEncodeError, test_func) + get_start = _testcapi.unicode_encode_get_start + self._test_unicode_error_get_start('x', UnicodeEncodeError, get_start) def test_unicode_decode_error_get_start(self): - test_func = _testcapi.unicode_decode_get_start - self._test_unicode_error_get_start(b'x', UnicodeDecodeError, test_func) + get_start = _testcapi.unicode_decode_get_start + self._test_unicode_error_get_start(b'x', UnicodeDecodeError, get_start) def test_unicode_translate_error_get_start(self): - test_func = _testcapi.unicode_translate_get_start - self._test_unicode_error_get_start('x', PyUnicodeTranslateError, test_func) + get_start = _testcapi.unicode_translate_get_start + self._test_unicode_error_get_start('x', PyUnicodeTranslateError, get_start) - def _test_unicode_error_get_start(self, literal, exc_type, test_func): + def _test_unicode_error_get_start(self, literal, exc_type, get_start): for obj_len, py_start, c_start in [ # normal cases (5, 0, 0), @@ -462,21 +463,12 @@ def _test_unicode_error_get_start(self, literal, exc_type, test_func): (5, 5, 4), (5, 10, 4), ]: - c_start_computed = py_start - if c_start_computed >= obj_len: - if obj_len == 0: - c_start_computed = 0 - else: - c_start_computed = obj_len - 1 - s = literal * obj_len py_end = py_start + 1 with self.subTest(s, exc_type=exc_type, py_start=py_start, c_start=c_start): - self.assertEqual(c_start, c_start_computed) - exc = exc_type('utf-8', s, py_start, py_end, 'why') - c_start_actual = test_func(exc) - self.assertEqual(c_start_actual, c_start) + exc = exc_type('utf-8', s, py_start, py_end, 'reason') + self.assertEqual(get_start(exc), c_start) class Test_PyUnstable_Exc_PrepReraiseStar(ExceptionIsLikeMixin, unittest.TestCase): diff --git a/Objects/exceptions.c b/Objects/exceptions.c index 8cd1746b7b928f..142cd36a2898ee 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -2978,36 +2978,35 @@ static PyMemberDef UnicodeError_members[] = { static int UnicodeEncodeError_init(PyObject *self, PyObject *args, PyObject *kwds) { - PyUnicodeErrorObject *err; - - if (BaseException_init((PyBaseExceptionObject *)self, args, kwds) == -1) + if (BaseException_init((PyBaseExceptionObject *)self, args, kwds) == -1) { return -1; + } - err = (PyUnicodeErrorObject *)self; + PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; + Py_CLEAR(exc->encoding); + Py_CLEAR(exc->object); + Py_CLEAR(exc->reason); - Py_CLEAR(err->encoding); - Py_CLEAR(err->object); - Py_CLEAR(err->reason); + PyObject *encoding = NULL, *object = NULL, *reason = NULL; // borrowed + Py_ssize_t start = -1, end = -1; if (!PyArg_ParseTuple(args, "UUnnU", - &err->encoding, &err->object, - &err->start, &err->end, &err->reason)) { - goto error; + &encoding, &object, &start, &end, &reason)) + { + return -1; } - if (err->start < 0) { + if (start < 0) { PyErr_SetString(PyExc_ValueError, "'start' must be >= 0"); - goto error; + return -1; } - Py_INCREF(err->encoding); - Py_INCREF(err->object); - Py_INCREF(err->reason); - + exc->encoding = Py_NewRef(encoding); + exc->object = Py_NewRef(object); + exc->start = start; + exc->end = end; + exc->reason = Py_NewRef(reason); return 0; -error: - err->encoding = err->object = err->reason = NULL; - return -1; } static PyObject * @@ -3083,49 +3082,48 @@ PyObject *PyExc_UnicodeEncodeError = (PyObject *)&_PyExc_UnicodeEncodeError; static int UnicodeDecodeError_init(PyObject *self, PyObject *args, PyObject *kwds) { - PyUnicodeErrorObject *ude; - - if (BaseException_init((PyBaseExceptionObject *)self, args, kwds) == -1) + if (BaseException_init((PyBaseExceptionObject *)self, args, kwds) == -1) { return -1; + } + + PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; - ude = (PyUnicodeErrorObject *)self; + Py_CLEAR(exc->encoding); + Py_CLEAR(exc->object); + Py_CLEAR(exc->reason); - Py_CLEAR(ude->encoding); - Py_CLEAR(ude->object); - Py_CLEAR(ude->reason); + PyObject *encoding = NULL, *object = NULL, *reason = NULL; // borrowed + Py_ssize_t start = -1, end = -1; if (!PyArg_ParseTuple(args, "UOnnU", - &ude->encoding, &ude->object, - &ude->start, &ude->end, &ude->reason)) { - ude->encoding = ude->object = ude->reason = NULL; - return -1; + &encoding, &object, &start, &end, &reason)) + { + return -1; } - Py_INCREF(ude->encoding); - Py_INCREF(ude->object); - Py_INCREF(ude->reason); - - if (ude->start < 0) { + if (start < 0) { PyErr_SetString(PyExc_ValueError, "'start' must be >= 0"); - goto error; + return -1; } - if (!PyBytes_Check(ude->object)) { + if (!PyBytes_Check(object)) { Py_buffer view; - if (PyObject_GetBuffer(ude->object, &view, PyBUF_SIMPLE) != 0) - goto error; - Py_XSETREF(ude->object, PyBytes_FromStringAndSize(view.buf, view.len)); + if (PyObject_GetBuffer(object, &view, PyBUF_SIMPLE) != 0) { + return -1; + } + Py_XSETREF(object, PyBytes_FromStringAndSize(view.buf, view.len)); PyBuffer_Release(&view); - if (!ude->object) - goto error; + if (object == NULL) { + return -1; + } } - return 0; -error: - Py_CLEAR(ude->encoding); - Py_CLEAR(ude->object); - Py_CLEAR(ude->reason); - return -1; + exc->encoding = Py_NewRef(encoding); + exc->object = Py_NewRef(object); + exc->start = start; + exc->end = end; + exc->reason = Py_NewRef(reason); + return 0; } static PyObject * @@ -3202,33 +3200,34 @@ PyUnicodeDecodeError_Create( */ static int -UnicodeTranslateError_init(PyUnicodeErrorObject *self, PyObject *args, - PyObject *kwds) +UnicodeTranslateError_init(PyObject *self, PyObject *args, PyObject *kwds) { - if (BaseException_init((PyBaseExceptionObject *)self, args, kwds) == -1) + if (BaseException_init((PyBaseExceptionObject *)self, args, kwds) == -1) { return -1; + } - Py_CLEAR(self->object); - Py_CLEAR(self->reason); + PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; - if (!PyArg_ParseTuple(args, "UnnU", - &self->object, - &self->start, &self->end, &self->reason)) { - goto error; + Py_CLEAR(exc->object); + Py_CLEAR(exc->reason); + + PyObject *object = NULL, *reason = NULL; // borrowed + Py_ssize_t start = -1, end = -1; + + if (!PyArg_ParseTuple(args, "UnnU", &object, &start, &end, &reason)) { + return -1; } - if (self->start < 0) { + if (start < 0) { PyErr_SetString(PyExc_ValueError, "'start' must be >= 0"); - goto error; + return -1; } - Py_INCREF(self->object); - Py_INCREF(self->reason); - + exc->object = Py_NewRef(object); + exc->start = start; + exc->end = end; + exc->reason = Py_NewRef(reason); return 0; -error: - self->object = self->reason = NULL; - return -1; } From 1900d9ac592f36533daf8c1957c4d2f0f3269620 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Thu, 29 Aug 2024 10:15:05 +0200 Subject: [PATCH 13/29] refactor name --- Lib/test/test_capi/test_exceptions.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_capi/test_exceptions.py b/Lib/test/test_capi/test_exceptions.py index 4f0c74923b83d2..74e25897d63c64 100644 --- a/Lib/test/test_capi/test_exceptions.py +++ b/Lib/test/test_capi/test_exceptions.py @@ -463,11 +463,11 @@ def _test_unicode_error_get_start(self, literal, exc_type, get_start): (5, 5, 4), (5, 10, 4), ]: - s = literal * obj_len + obj = literal * obj_len py_end = py_start + 1 - with self.subTest(s, exc_type=exc_type, py_start=py_start, c_start=c_start): - exc = exc_type('utf-8', s, py_start, py_end, 'reason') + with self.subTest(obj, exc_type=exc_type, py_start=py_start, c_start=c_start): + exc = exc_type('utf-8', obj, py_start, py_end, 'reason') self.assertEqual(get_start(exc), c_start) class Test_PyUnstable_Exc_PrepReraiseStar(ExceptionIsLikeMixin, unittest.TestCase): From 8acc563f36af6c06801210c7611ca77eced01202 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Thu, 29 Aug 2024 10:35:11 +0200 Subject: [PATCH 14/29] fix refcounts --- Lib/test/test_exceptions.py | 71 +++++++++++++++++++------------------ Objects/exceptions.c | 6 ++-- 2 files changed, 40 insertions(+), 37 deletions(-) diff --git a/Lib/test/test_exceptions.py b/Lib/test/test_exceptions.py index e4f2e3a97b8bb8..184f12323efcbf 100644 --- a/Lib/test/test_exceptions.py +++ b/Lib/test/test_exceptions.py @@ -534,41 +534,42 @@ def testAttributes(self): pass for exc, args, kwargs, expected in exceptionList: - try: - e = exc(*args, **kwargs) - except: - print(f"\nexc={exc!r}, args={args!r}", file=sys.stderr) - # raise - else: - # Verify module name - if not type(e).__name__.endswith('NaiveException'): - self.assertEqual(type(e).__module__, 'builtins') - # Verify no ref leaks in Exc_str() - s = str(e) - for checkArgName in expected: - value = getattr(e, checkArgName) - self.assertEqual(repr(value), - repr(expected[checkArgName]), - '%r.%s == %r, expected %r' % ( - e, checkArgName, - value, expected[checkArgName])) - - # test for pickling support - for p in [pickle]: - for protocol in range(p.HIGHEST_PROTOCOL + 1): - s = p.dumps(e, protocol) - new = p.loads(s) - for checkArgName in expected: - got = repr(getattr(new, checkArgName)) - if exc == AttributeError and checkArgName == 'obj': - # See GH-103352, we're not pickling - # obj at this point. So verify it's None. - want = repr(None) - else: - want = repr(expected[checkArgName]) - self.assertEqual(got, want, - 'pickled "%r", attribute "%s' % - (e, checkArgName)) + with self.subTest(exc=exc, args=args, kwargs=kwargs): + try: + e = exc(*args, **kwargs) + except: + print(f"\nexc={exc!r}, args={args!r}", file=sys.stderr) + # raise + else: + # Verify module name + if not type(e).__name__.endswith('NaiveException'): + self.assertEqual(type(e).__module__, 'builtins') + # Verify no ref leaks in Exc_str() + s = str(e) + for checkArgName in expected: + value = getattr(e, checkArgName) + self.assertEqual(repr(value), + repr(expected[checkArgName]), + '%r.%s == %r, expected %r' % ( + e, checkArgName, + value, expected[checkArgName])) + + # test for pickling support + for p in [pickle]: + for protocol in range(p.HIGHEST_PROTOCOL + 1): + s = p.dumps(e, protocol) + new = p.loads(s) + for checkArgName in expected: + got = repr(getattr(new, checkArgName)) + if exc == AttributeError and checkArgName == 'obj': + # See GH-103352, we're not pickling + # obj at this point. So verify it's None. + want = repr(None) + else: + want = repr(expected[checkArgName]) + self.assertEqual(got, want, + 'pickled "%r", attribute "%s' % + (e, checkArgName)) def test_setstate(self): e = Exception(42) diff --git a/Objects/exceptions.c b/Objects/exceptions.c index 142cd36a2898ee..3841c7ce7b5a87 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -3111,11 +3111,13 @@ UnicodeDecodeError_init(PyObject *self, PyObject *args, PyObject *kwds) if (PyObject_GetBuffer(object, &view, PyBUF_SIMPLE) != 0) { return -1; } - Py_XSETREF(object, PyBytes_FromStringAndSize(view.buf, view.len)); + PyObject *content = PyBytes_FromStringAndSize(view.buf, view.len); PyBuffer_Release(&view); - if (object == NULL) { + if (content == NULL) { return -1; } + Py_INCREF(object); // make 'object' a strong reference + Py_SETREF(object, content); } exc->encoding = Py_NewRef(encoding); From 0538c8375468d1404eb6039f9d270cc069889a1e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Thu, 29 Aug 2024 10:44:51 +0200 Subject: [PATCH 15/29] remove debugging code --- Lib/test/test_exceptions.py | 71 ++++++++++++++++++------------------- 1 file changed, 35 insertions(+), 36 deletions(-) diff --git a/Lib/test/test_exceptions.py b/Lib/test/test_exceptions.py index 184f12323efcbf..e4f2e3a97b8bb8 100644 --- a/Lib/test/test_exceptions.py +++ b/Lib/test/test_exceptions.py @@ -534,42 +534,41 @@ def testAttributes(self): pass for exc, args, kwargs, expected in exceptionList: - with self.subTest(exc=exc, args=args, kwargs=kwargs): - try: - e = exc(*args, **kwargs) - except: - print(f"\nexc={exc!r}, args={args!r}", file=sys.stderr) - # raise - else: - # Verify module name - if not type(e).__name__.endswith('NaiveException'): - self.assertEqual(type(e).__module__, 'builtins') - # Verify no ref leaks in Exc_str() - s = str(e) - for checkArgName in expected: - value = getattr(e, checkArgName) - self.assertEqual(repr(value), - repr(expected[checkArgName]), - '%r.%s == %r, expected %r' % ( - e, checkArgName, - value, expected[checkArgName])) - - # test for pickling support - for p in [pickle]: - for protocol in range(p.HIGHEST_PROTOCOL + 1): - s = p.dumps(e, protocol) - new = p.loads(s) - for checkArgName in expected: - got = repr(getattr(new, checkArgName)) - if exc == AttributeError and checkArgName == 'obj': - # See GH-103352, we're not pickling - # obj at this point. So verify it's None. - want = repr(None) - else: - want = repr(expected[checkArgName]) - self.assertEqual(got, want, - 'pickled "%r", attribute "%s' % - (e, checkArgName)) + try: + e = exc(*args, **kwargs) + except: + print(f"\nexc={exc!r}, args={args!r}", file=sys.stderr) + # raise + else: + # Verify module name + if not type(e).__name__.endswith('NaiveException'): + self.assertEqual(type(e).__module__, 'builtins') + # Verify no ref leaks in Exc_str() + s = str(e) + for checkArgName in expected: + value = getattr(e, checkArgName) + self.assertEqual(repr(value), + repr(expected[checkArgName]), + '%r.%s == %r, expected %r' % ( + e, checkArgName, + value, expected[checkArgName])) + + # test for pickling support + for p in [pickle]: + for protocol in range(p.HIGHEST_PROTOCOL + 1): + s = p.dumps(e, protocol) + new = p.loads(s) + for checkArgName in expected: + got = repr(getattr(new, checkArgName)) + if exc == AttributeError and checkArgName == 'obj': + # See GH-103352, we're not pickling + # obj at this point. So verify it's None. + want = repr(None) + else: + want = repr(expected[checkArgName]) + self.assertEqual(got, want, + 'pickled "%r", attribute "%s' % + (e, checkArgName)) def test_setstate(self): e = Exception(42) From d5ea357f6c21a7a3ef84d979ee39eca1114331d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Thu, 29 Aug 2024 11:20:53 +0200 Subject: [PATCH 16/29] address Victor's review (round 2) --- Lib/test/test_capi/test_exceptions.py | 6 ++-- Objects/exceptions.c | 47 ++++++++++----------------- 2 files changed, 21 insertions(+), 32 deletions(-) diff --git a/Lib/test/test_capi/test_exceptions.py b/Lib/test/test_capi/test_exceptions.py index 74e25897d63c64..bf917092ecdc13 100644 --- a/Lib/test/test_capi/test_exceptions.py +++ b/Lib/test/test_capi/test_exceptions.py @@ -416,7 +416,7 @@ def test_err_formatunraisable(self): # CRASHES formatunraisable(NULL, NULL) -class PyUnicodeTranslateError(UnicodeTranslateError): +class TestUnicodeTranslateError(UnicodeTranslateError): # UnicodeTranslateError takes 4 arguments instead of 5, # so we just make a UnicodeTranslateError class that is # compatible with the UnicodeError.__init__. @@ -431,7 +431,7 @@ def test_unicode_error_init_start_value(self): for (exc_type, literal), obj_len, start in product([ (UnicodeEncodeError, 'x'), (UnicodeDecodeError, b'x'), - (PyUnicodeTranslateError, 'x'), + (TestUnicodeTranslateError, 'x'), ], range(3), [-2, -1]): obj = literal * obj_len for end in [0, start + 1]: @@ -448,7 +448,7 @@ def test_unicode_decode_error_get_start(self): def test_unicode_translate_error_get_start(self): get_start = _testcapi.unicode_translate_get_start - self._test_unicode_error_get_start('x', PyUnicodeTranslateError, get_start) + self._test_unicode_error_get_start('x', TestUnicodeTranslateError, get_start) def _test_unicode_error_get_start(self, literal, exc_type, get_start): for obj_len, py_start, c_start in [ diff --git a/Objects/exceptions.c b/Objects/exceptions.c index 3841c7ce7b5a87..335fc428f5be3f 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -2982,11 +2982,6 @@ UnicodeEncodeError_init(PyObject *self, PyObject *args, PyObject *kwds) return -1; } - PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; - Py_CLEAR(exc->encoding); - Py_CLEAR(exc->object); - Py_CLEAR(exc->reason); - PyObject *encoding = NULL, *object = NULL, *reason = NULL; // borrowed Py_ssize_t start = -1, end = -1; @@ -3001,11 +2996,12 @@ UnicodeEncodeError_init(PyObject *self, PyObject *args, PyObject *kwds) return -1; } - exc->encoding = Py_NewRef(encoding); - exc->object = Py_NewRef(object); + PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; + Py_XSETREF(exc->encoding, Py_NewRef(encoding)); + Py_XSETREF(exc->object, Py_NewRef(object)); exc->start = start; exc->end = end; - exc->reason = Py_NewRef(reason); + Py_XSETREF(exc->reason, Py_NewRef(reason)); return 0; } @@ -3086,12 +3082,6 @@ UnicodeDecodeError_init(PyObject *self, PyObject *args, PyObject *kwds) return -1; } - PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; - - Py_CLEAR(exc->encoding); - Py_CLEAR(exc->object); - Py_CLEAR(exc->reason); - PyObject *encoding = NULL, *object = NULL, *reason = NULL; // borrowed Py_ssize_t start = -1, end = -1; @@ -3106,25 +3096,28 @@ UnicodeDecodeError_init(PyObject *self, PyObject *args, PyObject *kwds) return -1; } - if (!PyBytes_Check(object)) { + if (PyBytes_Check(object)) { + Py_INCREF(object); // make 'object' a strong reference + } + else { Py_buffer view; if (PyObject_GetBuffer(object, &view, PyBUF_SIMPLE) != 0) { return -1; } - PyObject *content = PyBytes_FromStringAndSize(view.buf, view.len); + // 'object' is borrowed, so we can re-use the variable + object = PyBytes_FromStringAndSize(view.buf, view.len); PyBuffer_Release(&view); - if (content == NULL) { + if (object == NULL) { return -1; } - Py_INCREF(object); // make 'object' a strong reference - Py_SETREF(object, content); } - exc->encoding = Py_NewRef(encoding); - exc->object = Py_NewRef(object); + PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; + Py_XSETREF(exc->encoding, Py_NewRef(encoding)); + Py_XSETREF(exc->object, object /* object is already a strong reference */); exc->start = start; exc->end = end; - exc->reason = Py_NewRef(reason); + Py_XSETREF(exc->reason, Py_NewRef(reason)); return 0; } @@ -3208,11 +3201,6 @@ UnicodeTranslateError_init(PyObject *self, PyObject *args, PyObject *kwds) return -1; } - PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; - - Py_CLEAR(exc->object); - Py_CLEAR(exc->reason); - PyObject *object = NULL, *reason = NULL; // borrowed Py_ssize_t start = -1, end = -1; @@ -3225,10 +3213,11 @@ UnicodeTranslateError_init(PyObject *self, PyObject *args, PyObject *kwds) return -1; } - exc->object = Py_NewRef(object); + PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; + Py_XSETREF(exc->object, Py_NewRef(object)); exc->start = start; exc->end = end; - exc->reason = Py_NewRef(reason); + Py_XSETREF(exc->reason, Py_NewRef(reason)); return 0; } From 7c10769eb1a16d54f41bb63ca626bae7aebf4bc3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 30 Aug 2024 13:30:20 +0200 Subject: [PATCH 17/29] handle negative 'start' and 'end' values --- Include/cpython/pyerrors.h | 2 +- Objects/exceptions.c | 273 ++++++++++++++++++++----------------- 2 files changed, 150 insertions(+), 125 deletions(-) diff --git a/Include/cpython/pyerrors.h b/Include/cpython/pyerrors.h index 7f15ef3ad08f25..b36b4681f5dddb 100644 --- a/Include/cpython/pyerrors.h +++ b/Include/cpython/pyerrors.h @@ -44,7 +44,7 @@ typedef struct { PyException_HEAD PyObject *encoding; PyObject *object; - Py_ssize_t start; // must be >= 0 + Py_ssize_t start; Py_ssize_t end; PyObject *reason; } PyUnicodeErrorObject; diff --git a/Objects/exceptions.c b/Objects/exceptions.c index 335fc428f5be3f..8fb4c6b61ffd78 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -2708,6 +2708,68 @@ set_unicodefromstring(PyObject **attr, const char *value) return 0; } +/* + * Ajust the (inclusive) 'start' value of a UnicodeError object. + * + * The START can be negative or not, but when adjusting the value, + * we clip it in [0, MAX(0, OBJLEN - 1)] but do not intepret it as + * a relative offset. + */ +#define UNICODE_ERROR_ADJUST_START(START, OBJLEN) \ + do { \ + assert(OBJLEN >= 0); \ + if (START < 0) { \ + START = 0; \ + } \ + if (START >= OBJLEN) { \ + START = OBJLEN == 0 ? 0 : OBJLEN - 1; \ + } \ + } while (0) + +/* + * Ajust the (eclusive) 'end' value of a UnicodeError object. + * + * The END can be negative or not, but when adjusting the value, + * we clip it in [MIN(1, OBJLEN), MAX(MIN(1, OBJLEN), OBJLEN)] but + * do not intepret it as a relative offset. + */ +#define UNICODE_ERROR_ADJUST_END(END, OBJLEN) \ + do { \ + assert(OBJLEN >= 0); \ + if (END < 1) { \ + END = 1; \ + } \ + if (END > OBJLEN) { \ + END = OBJLEN; \ + } \ + } while (0) + +static inline int +unicode_error_is_single_bad_char(PyUnicodeErrorObject *exc) +{ + // We use the 'start' and the 'end' values here and NOT those given + // by the corresponding getters since they clip the output. This is + // done to keep a behaviour since Python 3.1 (see gh-51558). + Py_ssize_t start = exc->start, end = exc->end; + return ( + start >= 0 && start < PyUnicode_GET_LENGTH(exc->object) && + end >= 0 && end == start + 1 + ); +} + +static inline int +unicode_error_is_single_bad_byte(PyUnicodeErrorObject *exc) +{ + // We use the 'start' and the 'end' values here and NOT those given + // by the corresponding getters since they clip the output. This is + // done to keep a behaviour since Python 3.1 (see gh-51558). + Py_ssize_t start = exc->start, end = exc->end; + return ( + start >= 0 && start < PyBytes_GET_SIZE(exc->object) && + end >= 0 && end == start + 1 + ); +} + PyObject * PyUnicodeEncodeError_GetEncoding(PyObject *exc) { @@ -2739,38 +2801,33 @@ PyUnicodeTranslateError_GetObject(PyObject *exc) } int -PyUnicodeEncodeError_GetStart(PyObject *exc, Py_ssize_t *start) +PyUnicodeEncodeError_GetStart(PyObject *self, Py_ssize_t *start) { - PyObject *obj = get_unicode(((PyUnicodeErrorObject *)exc)->object, - "object"); - if (!obj) { + PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; + PyObject *obj = get_unicode(exc->object, "object"); + if (obj == NULL) { return -1; } Py_ssize_t size = PyUnicode_GET_LENGTH(obj); - *start = ((PyUnicodeErrorObject *)exc)->start; - assert(*start >= 0); - if (*start >= size) { - *start = size ? size - 1 : 0; - } Py_DECREF(obj); + *start = exc->start; + UNICODE_ERROR_ADJUST_START(*start, size); return 0; } int -PyUnicodeDecodeError_GetStart(PyObject *exc, Py_ssize_t *start) +PyUnicodeDecodeError_GetStart(PyObject *self, Py_ssize_t *start) { - PyObject *obj = get_string(((PyUnicodeErrorObject *)exc)->object, "object"); - if (!obj) { + PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; + PyObject *obj = get_string(exc->object, "object"); + if (obj == NULL) { return -1; } Py_ssize_t size = PyBytes_GET_SIZE(obj); - *start = ((PyUnicodeErrorObject *)exc)->start; - assert(*start >= 0); - if (*start >= size) { - *start = size ? size - 1 : 0; - } Py_DECREF(obj); + *start = exc->start; + UNICODE_ERROR_ADJUST_START(*start, size); return 0; } @@ -2782,75 +2839,63 @@ PyUnicodeTranslateError_GetStart(PyObject *exc, Py_ssize_t *start) } +static inline int +unicode_error_set_start_impl(PyObject *self, Py_ssize_t start) +{ + ((PyUnicodeErrorObject *)self)->start = start; + return 0; +} + + int PyUnicodeEncodeError_SetStart(PyObject *exc, Py_ssize_t start) { - if (start < 0) { - PyErr_SetString(PyExc_ValueError, "'start' must be >= 0"); - return -1; - } - ((PyUnicodeErrorObject *)exc)->start = start; - return 0; + return unicode_error_set_start_impl(exc, start); } int PyUnicodeDecodeError_SetStart(PyObject *exc, Py_ssize_t start) { - if (start < 0) { - PyErr_SetString(PyExc_ValueError, "'start' must be >= 0"); - return -1; - } - ((PyUnicodeErrorObject *)exc)->start = start; - return 0; + return unicode_error_set_start_impl(exc, start); } int PyUnicodeTranslateError_SetStart(PyObject *exc, Py_ssize_t start) { - if (start < 0) { - PyErr_SetString(PyExc_ValueError, "'start' must be >= 0"); - return -1; - } - ((PyUnicodeErrorObject *)exc)->start = start; - return 0; + return unicode_error_set_start_impl(exc, start); } int -PyUnicodeEncodeError_GetEnd(PyObject *exc, Py_ssize_t *end) +PyUnicodeEncodeError_GetEnd(PyObject *self, Py_ssize_t *end) { - Py_ssize_t size; - PyObject *obj = get_unicode(((PyUnicodeErrorObject *)exc)->object, - "object"); - if (!obj) + PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; + PyObject *obj = get_unicode(exc->object, "object"); + if (obj == NULL) { return -1; - *end = ((PyUnicodeErrorObject *)exc)->end; - size = PyUnicode_GET_LENGTH(obj); - if (*end<1) - *end = 1; - if (*end>size) - *end = size; + } + Py_ssize_t size = PyUnicode_GET_LENGTH(obj); Py_DECREF(obj); + *end = exc->end; + UNICODE_ERROR_ADJUST_END(*end, size); return 0; } int -PyUnicodeDecodeError_GetEnd(PyObject *exc, Py_ssize_t *end) +PyUnicodeDecodeError_GetEnd(PyObject *self, Py_ssize_t *end) { - Py_ssize_t size; - PyObject *obj = get_string(((PyUnicodeErrorObject *)exc)->object, "object"); - if (!obj) + PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; + PyObject *obj = get_string(exc->object, "object"); + if (obj == NULL) { return -1; - size = PyBytes_GET_SIZE(obj); - *end = ((PyUnicodeErrorObject *)exc)->end; - if (*end<1) - *end = 1; - if (*end>size) - *end = size; + } + Py_ssize_t size = PyBytes_GET_SIZE(obj); Py_DECREF(obj); + *end = exc->end; + UNICODE_ERROR_ADJUST_END(*end, size); return 0; } @@ -2862,27 +2907,32 @@ PyUnicodeTranslateError_GetEnd(PyObject *exc, Py_ssize_t *end) } -int -PyUnicodeEncodeError_SetEnd(PyObject *exc, Py_ssize_t end) +static inline int +unicode_error_set_end_impl(PyObject *exc, Py_ssize_t end) { ((PyUnicodeErrorObject *)exc)->end = end; return 0; } +int +PyUnicodeEncodeError_SetEnd(PyObject *exc, Py_ssize_t end) +{ + return unicode_error_set_end_impl(exc, end); +} + + int PyUnicodeDecodeError_SetEnd(PyObject *exc, Py_ssize_t end) { - ((PyUnicodeErrorObject *)exc)->end = end; - return 0; + return unicode_error_set_end_impl(exc, end); } int PyUnicodeTranslateError_SetEnd(PyObject *exc, Py_ssize_t end) { - ((PyUnicodeErrorObject *)exc)->end = end; - return 0; + return unicode_error_set_end_impl(exc, end); } PyObject * @@ -2991,11 +3041,6 @@ UnicodeEncodeError_init(PyObject *self, PyObject *args, PyObject *kwds) return -1; } - if (start < 0) { - PyErr_SetString(PyExc_ValueError, "'start' must be >= 0"); - return -1; - } - PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; Py_XSETREF(exc->encoding, Py_NewRef(encoding)); Py_XSETREF(exc->object, Py_NewRef(object)); @@ -3013,42 +3058,40 @@ UnicodeEncodeError_str(PyObject *self) PyObject *reason_str = NULL; PyObject *encoding_str = NULL; - if (!uself->object) + if (!uself->object) { /* Not properly initialized. */ return PyUnicode_FromString(""); + } /* Get reason and encoding as strings, which they might not be if they've been modified after we were constructed. */ reason_str = PyObject_Str(uself->reason); - if (reason_str == NULL) + if (reason_str == NULL) { goto done; + } encoding_str = PyObject_Str(uself->encoding); - if (encoding_str == NULL) + if (encoding_str == NULL) { goto done; - - if (uself->start < PyUnicode_GET_LENGTH(uself->object) && uself->end == uself->start+1) { + } + if (unicode_error_is_single_bad_char(uself)) { Py_UCS4 badchar = PyUnicode_ReadChar(uself->object, uself->start); const char *fmt; - if (badchar <= 0xff) + if (badchar <= 0xff) { fmt = "'%U' codec can't encode character '\\x%02x' in position %zd: %U"; - else if (badchar <= 0xffff) + } + else if (badchar <= 0xffff) { fmt = "'%U' codec can't encode character '\\u%04x' in position %zd: %U"; - else + } + else { fmt = "'%U' codec can't encode character '\\U%08x' in position %zd: %U"; + } result = PyUnicode_FromFormat( - fmt, - encoding_str, - (int)badchar, - uself->start, - reason_str); + fmt, encoding_str, (int)badchar, uself->start, reason_str); } else { result = PyUnicode_FromFormat( "'%U' codec can't encode characters in position %zd-%zd: %U", - encoding_str, - uself->start, - uself->end-1, - reason_str); + encoding_str, uself->start, uself->end - 1, reason_str); } done: Py_XDECREF(reason_str); @@ -3091,11 +3134,6 @@ UnicodeDecodeError_init(PyObject *self, PyObject *args, PyObject *kwds) return -1; } - if (start < 0) { - PyErr_SetString(PyExc_ValueError, "'start' must be >= 0"); - return -1; - } - if (PyBytes_Check(object)) { Py_INCREF(object); // make 'object' a strong reference } @@ -3114,7 +3152,7 @@ UnicodeDecodeError_init(PyObject *self, PyObject *args, PyObject *kwds) PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; Py_XSETREF(exc->encoding, Py_NewRef(encoding)); - Py_XSETREF(exc->object, object /* object is already a strong reference */); + Py_XSETREF(exc->object, object /* already a strong reference */); exc->start = start; exc->end = end; Py_XSETREF(exc->reason, Py_NewRef(reason)); @@ -3129,36 +3167,32 @@ UnicodeDecodeError_str(PyObject *self) PyObject *reason_str = NULL; PyObject *encoding_str = NULL; - if (!uself->object) + if (!uself->object) { /* Not properly initialized. */ return PyUnicode_FromString(""); + } /* Get reason and encoding as strings, which they might not be if they've been modified after we were constructed. */ reason_str = PyObject_Str(uself->reason); - if (reason_str == NULL) + if (reason_str == NULL) { goto done; + } encoding_str = PyObject_Str(uself->encoding); - if (encoding_str == NULL) + if (encoding_str == NULL) { goto done; + } - if (uself->start < PyBytes_GET_SIZE(uself->object) && uself->end == uself->start+1) { - int byte = (int)(PyBytes_AS_STRING(((PyUnicodeErrorObject *)self)->object)[uself->start]&0xff); + if (unicode_error_is_single_bad_byte(uself)) { + int byte = (int)(PyBytes_AS_STRING(uself->object)[uself->start] & 0xff); result = PyUnicode_FromFormat( "'%U' codec can't decode byte 0x%02x in position %zd: %U", - encoding_str, - byte, - uself->start, - reason_str); + encoding_str, byte, uself->start, reason_str); } else { result = PyUnicode_FromFormat( "'%U' codec can't decode bytes in position %zd-%zd: %U", - encoding_str, - uself->start, - uself->end-1, - reason_str - ); + encoding_str, uself->start, uself->end - 1, reason_str); } done: Py_XDECREF(reason_str); @@ -3208,11 +3242,6 @@ UnicodeTranslateError_init(PyObject *self, PyObject *args, PyObject *kwds) return -1; } - if (start < 0) { - PyErr_SetString(PyExc_ValueError, "'start' must be >= 0"); - return -1; - } - PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; Py_XSETREF(exc->object, Py_NewRef(object)); exc->start = start; @@ -3239,28 +3268,24 @@ UnicodeTranslateError_str(PyObject *self) if (reason_str == NULL) goto done; - if (uself->start < PyUnicode_GET_LENGTH(uself->object) && uself->end == uself->start+1) { + if (unicode_error_is_single_bad_char(uself)) { Py_UCS4 badchar = PyUnicode_ReadChar(uself->object, uself->start); const char *fmt; - if (badchar <= 0xff) + if (badchar <= 0xff) { fmt = "can't translate character '\\x%02x' in position %zd: %U"; - else if (badchar <= 0xffff) + } + else if (badchar <= 0xffff) { fmt = "can't translate character '\\u%04x' in position %zd: %U"; - else + } + else { fmt = "can't translate character '\\U%08x' in position %zd: %U"; - result = PyUnicode_FromFormat( - fmt, - (int)badchar, - uself->start, - reason_str - ); - } else { + } + result = PyUnicode_FromFormat(fmt, (int)badchar, uself->start, reason_str); + } + else { result = PyUnicode_FromFormat( "can't translate characters in position %zd-%zd: %U", - uself->start, - uself->end-1, - reason_str - ); + uself->start, uself->end - 1, reason_str); } done: Py_XDECREF(reason_str); From 7ce2ef09e30658d71281efaefedfbdd409ae0c40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 30 Aug 2024 13:30:39 +0200 Subject: [PATCH 18/29] add C API tests --- Modules/_testcapi/exceptions.c | 131 +++++++++++++++++++++++++++++++++ 1 file changed, 131 insertions(+) diff --git a/Modules/_testcapi/exceptions.c b/Modules/_testcapi/exceptions.c index 47516881777ce9..e92d9670e7c792 100644 --- a/Modules/_testcapi/exceptions.c +++ b/Modules/_testcapi/exceptions.c @@ -392,6 +392,128 @@ unicode_translate_get_start(PyObject *Py_UNUSED(module), PyObject *arg) RETURN_SIZE(start); } +/* Test PyUnicodeEncodeError_SetStart */ +static PyObject * +unicode_encode_set_start(PyObject *Py_UNUSED(module), PyObject *args) +{ + PyObject *exc; + Py_ssize_t start; + if (PyArg_ParseTuple(args, "On", &exc, &start) < 0) { + return NULL; + } + if (PyUnicodeEncodeError_SetStart(exc, start) < 0) { + return NULL; + } + Py_RETURN_NONE; +} + +/* Test PyUnicodeDecodeError_SetStart */ +static PyObject * +unicode_decode_set_start(PyObject *Py_UNUSED(module), PyObject *args) +{ + PyObject *exc; + Py_ssize_t start; + if (PyArg_ParseTuple(args, "On", &exc, &start) < 0) { + return NULL; + } + if (PyUnicodeDecodeError_SetStart(exc, start) < 0) { + return NULL; + } + Py_RETURN_NONE; +} + +/* Test PyUnicodeTranslateError_SetStart */ +static PyObject * +unicode_translate_set_start(PyObject *Py_UNUSED(module), PyObject *args) +{ + PyObject *exc; + Py_ssize_t start; + if (PyArg_ParseTuple(args, "On", &exc, &start) < 0) { + return NULL; + } + if (PyUnicodeTranslateError_SetStart(exc, start) < 0) { + return NULL; + } + Py_RETURN_NONE; +} + +/* Test PyUnicodeEncodeError_GetEnd */ +static PyObject * +unicode_encode_get_end(PyObject *Py_UNUSED(module), PyObject *arg) +{ + Py_ssize_t end; + if (PyUnicodeEncodeError_GetEnd(arg, &end) < 0) { + return NULL; + } + RETURN_SIZE(end); +} + +/* Test PyUnicodeDecodeError_GetEnd */ +static PyObject * +unicode_decode_get_end(PyObject *Py_UNUSED(module), PyObject *arg) +{ + Py_ssize_t end; + if (PyUnicodeDecodeError_GetEnd(arg, &end) < 0) { + return NULL; + } + RETURN_SIZE(end); +} + +/* Test PyUnicodeTranslateError_GetEnd */ +static PyObject * +unicode_translate_get_end(PyObject *Py_UNUSED(module), PyObject *arg) +{ + Py_ssize_t end; + if (PyUnicodeTranslateError_GetEnd(arg, &end) < 0) { + return NULL; + } + RETURN_SIZE(end); +} + +/* Test PyUnicodeEncodeError_SetEnd */ +static PyObject * +unicode_encode_set_end(PyObject *Py_UNUSED(module), PyObject *args) +{ + PyObject *exc; + Py_ssize_t end; + if (PyArg_ParseTuple(args, "On", &exc, &end) < 0) { + return NULL; + } + if (PyUnicodeEncodeError_SetEnd(exc, end) < 0) { + return NULL; + } + Py_RETURN_NONE; +} + +/* Test PyUnicodeDecodeError_SetEnd */ +static PyObject * +unicode_decode_set_end(PyObject *Py_UNUSED(module), PyObject *args) +{ + PyObject *exc; + Py_ssize_t end; + if (PyArg_ParseTuple(args, "On", &exc, &end) < 0) { + return NULL; + } + if (PyUnicodeDecodeError_SetEnd(exc, end) < 0) { + return NULL; + } + Py_RETURN_NONE; +} + +/* Test PyUnicodeTranslateError_SetEnd */ +static PyObject * +unicode_translate_set_end(PyObject *Py_UNUSED(module), PyObject *args) +{ + PyObject *exc; + Py_ssize_t end; + if (PyArg_ParseTuple(args, "On", &exc, &end) < 0) { + return NULL; + } + if (PyUnicodeTranslateError_SetEnd(exc, end) < 0) { + return NULL; + } + Py_RETURN_NONE; +} /* * Define the PyRecurdingInfinitelyError_Type @@ -439,6 +561,15 @@ static PyMethodDef test_methods[] = { {"unicode_encode_get_start", unicode_encode_get_start, METH_O}, {"unicode_decode_get_start", unicode_decode_get_start, METH_O}, {"unicode_translate_get_start", unicode_translate_get_start, METH_O}, + {"unicode_encode_set_start", unicode_encode_set_start, METH_VARARGS}, + {"unicode_decode_set_start", unicode_decode_set_start, METH_VARARGS}, + {"unicode_translate_set_start", unicode_translate_set_start, METH_VARARGS}, + {"unicode_encode_get_end", unicode_encode_get_end, METH_O}, + {"unicode_decode_get_end", unicode_decode_get_end, METH_O}, + {"unicode_translate_get_end", unicode_translate_get_end, METH_O}, + {"unicode_encode_set_end", unicode_encode_set_end, METH_VARARGS}, + {"unicode_decode_set_end", unicode_decode_set_end, METH_VARARGS}, + {"unicode_translate_set_end", unicode_translate_set_end, METH_VARARGS}, {NULL}, }; From b55ca5afcf4f33ea19e3134ed70123f28afecbad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 30 Aug 2024 13:30:44 +0200 Subject: [PATCH 19/29] add Python tests --- Lib/test/test_capi/test_exceptions.py | 129 ++++++++++++++++++++++---- 1 file changed, 112 insertions(+), 17 deletions(-) diff --git a/Lib/test/test_capi/test_exceptions.py b/Lib/test/test_capi/test_exceptions.py index bf917092ecdc13..666e2f2ab09548 100644 --- a/Lib/test/test_capi/test_exceptions.py +++ b/Lib/test/test_capi/test_exceptions.py @@ -5,7 +5,6 @@ import unittest import textwrap -from itertools import product from test import support from test.support import import_helper from test.support.os_helper import TESTFN, TESTFN_UNDECODABLE @@ -426,17 +425,9 @@ def __init__(self, encoding, *args, **kwargs): class TestUnicodeError(unittest.TestCase): - def test_unicode_error_init_start_value(self): - # negative start is not allowed - for (exc_type, literal), obj_len, start in product([ - (UnicodeEncodeError, 'x'), - (UnicodeDecodeError, b'x'), - (TestUnicodeTranslateError, 'x'), - ], range(3), [-2, -1]): - obj = literal * obj_len - for end in [0, start + 1]: - with self.subTest(start=start, end=end, exc_type=exc_type, obj_len=obj_len): - self.assertRaises(ValueError, exc_type, 'utf-8', obj, start, end, 'reason') + def _check_no_crash(self, exc): + # ensure that the __str__() method does not crash + _ = str(exc) def test_unicode_encode_error_get_start(self): get_start = _testcapi.unicode_encode_get_start @@ -451,7 +442,7 @@ def test_unicode_translate_error_get_start(self): self._test_unicode_error_get_start('x', TestUnicodeTranslateError, get_start) def _test_unicode_error_get_start(self, literal, exc_type, get_start): - for obj_len, py_start, c_start in [ + for obj_len, start, c_start in [ # normal cases (5, 0, 0), (5, 1, 1), @@ -462,13 +453,117 @@ def _test_unicode_error_get_start(self, literal, exc_type, get_start): (0, 10, 0), (5, 5, 4), (5, 10, 4), + # negative values are allowed but clipped in the getter + (0, -1, 0), + (1, -1, 0), + (2, -1, 0), + (2, -2, 0), ]: obj = literal * obj_len - py_end = py_start + 1 - - with self.subTest(obj, exc_type=exc_type, py_start=py_start, c_start=c_start): - exc = exc_type('utf-8', obj, py_start, py_end, 'reason') + with self.subTest(obj, exc_type=exc_type, start=start): + exc = exc_type('utf-8', obj, start, obj_len, 'reason') self.assertEqual(get_start(exc), c_start) + self._check_no_crash(exc) + + def test_unicode_encode_error_set_start(self): + set_start = _testcapi.unicode_encode_set_start + self._test_unicode_error_set_start('x', UnicodeEncodeError, set_start) + + def test_unicode_decode_error_set_start(self): + set_start = _testcapi.unicode_decode_set_start + self._test_unicode_error_set_start(b'x', UnicodeDecodeError, set_start) + + def test_unicode_translate_error_set_start(self): + set_start = _testcapi.unicode_translate_set_start + self._test_unicode_error_set_start('x', TestUnicodeTranslateError, set_start) + + def _test_unicode_error_set_start(self, literal, exc_type, set_start): + obj_len = 5 + obj = literal * obj_len + for new_start in range(-2 * obj_len, 2 * obj_len): + with self.subTest('C-API', obj=obj, exc_type=exc_type, new_start=new_start): + exc = exc_type('utf-8', obj, 0, obj_len, 'reason') + # arbitrary value is allowed in the C API setter + set_start(exc, new_start) + self.assertEqual(exc.start, new_start) + self._check_no_crash(exc) + + with self.subTest('Py-API', obj=obj, exc_type=exc_type, new_start=new_start): + exc = exc_type('utf-8', obj, 0, obj_len, 'reason') + # arbitrary value is allowed in the attribute setter + exc.start = new_start + self.assertEqual(exc.start, new_start) + self._check_no_crash(exc) + + def test_unicode_encode_error_get_end(self): + get_end = _testcapi.unicode_encode_get_end + self._test_unicode_error_get_end('x', UnicodeEncodeError, get_end) + + def test_unicode_decode_error_get_end(self): + get_end = _testcapi.unicode_decode_get_end + self._test_unicode_error_get_end(b'x', UnicodeDecodeError, get_end) + + def test_unicode_translate_error_get_end(self): + get_end = _testcapi.unicode_translate_get_end + self._test_unicode_error_get_end('x', TestUnicodeTranslateError, get_end) + + def _test_unicode_error_get_end(self, literal, exc_type, get_end): + for obj_len, end, c_end in [ + # normal cases + (5, 0, 1), + (5, 1, 1), + (5, 2, 2), + # out-of-range clipped in [MIN(1, OBJLEN), MAX(MIN(1, OBJLEN), OBJLEN)] + (0, 0, 0), + (0, 1, 0), + (0, 10, 0), + (1, 1, 1), + (1, 2, 1), + (5, 5, 5), + (5, 5, 5), + (5, 10, 5), + # negative values are allowed but clipped in the getter + (0, -1, 0), + (1, -1, 1), + (2, -1, 1), + (2, -2, 1), + ]: + obj = literal * obj_len + with self.subTest(obj, exc_type=exc_type, end=end): + exc = exc_type('utf-8', obj, 0, end, 'reason') + self.assertEqual(get_end(exc), c_end) + self._check_no_crash(exc) + + def test_unicode_encode_error_set_end(self): + set_end = _testcapi.unicode_encode_set_end + self._test_unicode_error_set_end('x', UnicodeEncodeError, set_end) + + def test_unicode_decode_error_set_end(self): + set_end = _testcapi.unicode_decode_set_end + self._test_unicode_error_set_end(b'x', UnicodeDecodeError, set_end) + + def test_unicode_translate_error_set_end(self): + set_end = _testcapi.unicode_translate_set_end + self._test_unicode_error_set_end('x', TestUnicodeTranslateError, set_end) + + def _test_unicode_error_set_end(self, literal, exc_type, set_end): + obj_len = 5 + obj = literal * obj_len + for new_end in range(-2 * obj_len, 2 * obj_len): + with self.subTest('C-API', obj=obj, exc_type=exc_type, new_end=new_end): + exc = exc_type('utf-8', obj, 0, obj_len, 'reason') + # arbitrary value is allowed in the C API setter + set_end(exc, new_end) + self.assertEqual(exc.end, new_end) + self._check_no_crash(exc) + + with self.subTest('Py-API', obj=obj, exc_type=exc_type, new_end=new_end): + exc = exc_type('utf-8', obj, 0, obj_len, 'reason') + # arbitrary value is allowed in the attribute setter + exc.end = new_end + self.assertEqual(exc.end, new_end) + self._check_no_crash(exc) + class Test_PyUnstable_Exc_PrepReraiseStar(ExceptionIsLikeMixin, unittest.TestCase): From 4e34e5fd13fb458f3835221e0e871a65e87663e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 30 Aug 2024 13:52:35 +0200 Subject: [PATCH 20/29] update docs --- Doc/c-api/exceptions.rst | 20 ++++++++++++++++++-- Doc/library/exceptions.rst | 9 +++++++-- 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/Doc/c-api/exceptions.rst b/Doc/c-api/exceptions.rst index 8756b76e0a40ae..37b5c4ab8bd88d 100644 --- a/Doc/c-api/exceptions.rst +++ b/Doc/c-api/exceptions.rst @@ -853,12 +853,23 @@ The following functions are used to create and modify Unicode exceptions from C. *\*start*. *start* must not be ``NULL``. Return ``0`` on success, ``-1`` on failure. + If the :attr:`UnicodeError.object` is an empty sequence, the resulting + *start* is ``0``. Otherwise, it is clipped to ``[0, len(object) - 1]``. + + .. seealso:: :attr:`UnicodeError.start` + .. c:function:: int PyUnicodeDecodeError_SetStart(PyObject *exc, Py_ssize_t start) int PyUnicodeEncodeError_SetStart(PyObject *exc, Py_ssize_t start) int PyUnicodeTranslateError_SetStart(PyObject *exc, Py_ssize_t start) - Set the *start* attribute of the given exception object to *start*. *start* - must be non-negative. Return ``0`` on success, ``-1`` on failure. + Set the *start* attribute of the given exception object to *start*. + Return ``0`` on success, ``-1`` on failure. + + .. note:: + + While passing a negative *start* does not raise an exception, + the corresponding getters will not consider it as a relative + offset. .. c:function:: int PyUnicodeDecodeError_GetEnd(PyObject *exc, Py_ssize_t *end) int PyUnicodeEncodeError_GetEnd(PyObject *exc, Py_ssize_t *end) @@ -868,6 +879,9 @@ The following functions are used to create and modify Unicode exceptions from C. *\*end*. *end* must not be ``NULL``. Return ``0`` on success, ``-1`` on failure. + If the :attr:`UnicodeError.object` is an empty sequence, the resulting + *end* is ``0``. Otherwise, it is clipped to ``[1, len(object)]``. + .. c:function:: int PyUnicodeDecodeError_SetEnd(PyObject *exc, Py_ssize_t end) int PyUnicodeEncodeError_SetEnd(PyObject *exc, Py_ssize_t end) int PyUnicodeTranslateError_SetEnd(PyObject *exc, Py_ssize_t end) @@ -875,6 +889,8 @@ The following functions are used to create and modify Unicode exceptions from C. Set the *end* attribute of the given exception object to *end*. Return ``0`` on success, ``-1`` on failure. + .. seealso:: :attr:`UnicodeError.end` + .. c:function:: PyObject* PyUnicodeDecodeError_GetReason(PyObject *exc) PyObject* PyUnicodeEncodeError_GetReason(PyObject *exc) PyObject* PyUnicodeTranslateError_GetReason(PyObject *exc) diff --git a/Doc/library/exceptions.rst b/Doc/library/exceptions.rst index 55325ac9c1e11a..f72b11e34c5c3d 100644 --- a/Doc/library/exceptions.rst +++ b/Doc/library/exceptions.rst @@ -642,13 +642,18 @@ The following exceptions are the exceptions that are usually raised. .. attribute:: start - The first index of invalid data in :attr:`object`. This value - must be non-negative. + The first index of invalid data in :attr:`object`. + + This value should not be negative as it is interpreted as an + absolute offset but this constraint is not enforced at runtime. .. attribute:: end The index after the last invalid data in :attr:`object`. + This value should not be negative as it is interpreted as an + absolute offset but this constraint is not enforced at runtime. + .. exception:: UnicodeEncodeError From 033a1ac112949a97f2e635af33e679f78dd81f70 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 30 Aug 2024 13:53:13 +0200 Subject: [PATCH 21/29] fix typo --- Objects/exceptions.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Objects/exceptions.c b/Objects/exceptions.c index 8fb4c6b61ffd78..0cb48e23cbae02 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -2709,7 +2709,7 @@ set_unicodefromstring(PyObject **attr, const char *value) } /* - * Ajust the (inclusive) 'start' value of a UnicodeError object. + * Adjust the (inclusive) 'start' value of a UnicodeError object. * * The START can be negative or not, but when adjusting the value, * we clip it in [0, MAX(0, OBJLEN - 1)] but do not intepret it as @@ -2727,7 +2727,7 @@ set_unicodefromstring(PyObject **attr, const char *value) } while (0) /* - * Ajust the (eclusive) 'end' value of a UnicodeError object. + * Adjust the (eclusive) 'end' value of a UnicodeError object. * * The END can be negative or not, but when adjusting the value, * we clip it in [MIN(1, OBJLEN), MAX(MIN(1, OBJLEN), OBJLEN)] but From c802e64eec49be8bfad618d8b16c7ec40c66150d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 13 Sep 2024 13:19:19 +0200 Subject: [PATCH 22/29] convert macros into `static inline` functions --- Objects/exceptions.c | 56 ++++++++++++++++++++++---------------------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/Objects/exceptions.c b/Objects/exceptions.c index 0cb48e23cbae02..18ad3ec60d9b4d 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -2715,16 +2715,18 @@ set_unicodefromstring(PyObject **attr, const char *value) * we clip it in [0, MAX(0, OBJLEN - 1)] but do not intepret it as * a relative offset. */ -#define UNICODE_ERROR_ADJUST_START(START, OBJLEN) \ - do { \ - assert(OBJLEN >= 0); \ - if (START < 0) { \ - START = 0; \ - } \ - if (START >= OBJLEN) { \ - START = OBJLEN == 0 ? 0 : OBJLEN - 1; \ - } \ - } while (0) +static inline Py_ssize_t +unicode_error_adjust_start(Py_ssize_t start, Py_ssize_t objlen) +{ + assert(objlen >= 0); + if (start < 0) { + start = 0; + } + if (start >= objlen) { + start = objlen == 0 ? 0 : objlen - 1; + } + return start; +} /* * Adjust the (eclusive) 'end' value of a UnicodeError object. @@ -2733,16 +2735,18 @@ set_unicodefromstring(PyObject **attr, const char *value) * we clip it in [MIN(1, OBJLEN), MAX(MIN(1, OBJLEN), OBJLEN)] but * do not intepret it as a relative offset. */ -#define UNICODE_ERROR_ADJUST_END(END, OBJLEN) \ - do { \ - assert(OBJLEN >= 0); \ - if (END < 1) { \ - END = 1; \ - } \ - if (END > OBJLEN) { \ - END = OBJLEN; \ - } \ - } while (0) +static inline Py_ssize_t +unicode_error_adjust_end(Py_ssize_t end, Py_ssize_t objlen) +{ + assert(objlen >= 0); + if (end < 1) { + end = 1; + } + if (end > objlen) { + end = objlen; + } + return end; +} static inline int unicode_error_is_single_bad_char(PyUnicodeErrorObject *exc) @@ -2810,8 +2814,7 @@ PyUnicodeEncodeError_GetStart(PyObject *self, Py_ssize_t *start) } Py_ssize_t size = PyUnicode_GET_LENGTH(obj); Py_DECREF(obj); - *start = exc->start; - UNICODE_ERROR_ADJUST_START(*start, size); + *start = unicode_error_adjust_start(exc->start, size); return 0; } @@ -2826,8 +2829,7 @@ PyUnicodeDecodeError_GetStart(PyObject *self, Py_ssize_t *start) } Py_ssize_t size = PyBytes_GET_SIZE(obj); Py_DECREF(obj); - *start = exc->start; - UNICODE_ERROR_ADJUST_START(*start, size); + *start = unicode_error_adjust_start(exc->start, size); return 0; } @@ -2878,8 +2880,7 @@ PyUnicodeEncodeError_GetEnd(PyObject *self, Py_ssize_t *end) } Py_ssize_t size = PyUnicode_GET_LENGTH(obj); Py_DECREF(obj); - *end = exc->end; - UNICODE_ERROR_ADJUST_END(*end, size); + *end = unicode_error_adjust_end(exc->end, size); return 0; } @@ -2894,8 +2895,7 @@ PyUnicodeDecodeError_GetEnd(PyObject *self, Py_ssize_t *end) } Py_ssize_t size = PyBytes_GET_SIZE(obj); Py_DECREF(obj); - *end = exc->end; - UNICODE_ERROR_ADJUST_END(*end, size); + *end = unicode_error_adjust_end(exc->end, size); return 0; } From fcde448436a38ca9ce55fcd5e395ac869262c2fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 27 Oct 2024 09:26:23 +0100 Subject: [PATCH 23/29] post-merge cleanup --- Objects/exceptions.c | 26 -------------------------- 1 file changed, 26 deletions(-) diff --git a/Objects/exceptions.c b/Objects/exceptions.c index bc02539ea25c8d..8a2097f863e2d7 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -2748,32 +2748,6 @@ unicode_error_adjust_end(Py_ssize_t end, Py_ssize_t objlen) return end; } -static inline int -unicode_error_is_single_bad_char(PyUnicodeErrorObject *exc) -{ - // We use the 'start' and the 'end' values here and NOT those given - // by the corresponding getters since they clip the output. This is - // done to keep a behaviour since Python 3.1 (see gh-51558). - Py_ssize_t start = exc->start, end = exc->end; - return ( - start >= 0 && start < PyUnicode_GET_LENGTH(exc->object) && - end >= 0 && end == start + 1 - ); -} - -static inline int -unicode_error_is_single_bad_byte(PyUnicodeErrorObject *exc) -{ - // We use the 'start' and the 'end' values here and NOT those given - // by the corresponding getters since they clip the output. This is - // done to keep a behaviour since Python 3.1 (see gh-51558). - Py_ssize_t start = exc->start, end = exc->end; - return ( - start >= 0 && start < PyBytes_GET_SIZE(exc->object) && - end >= 0 && end == start + 1 - ); -} - PyObject * PyUnicodeEncodeError_GetEncoding(PyObject *exc) { From baa5cb20a9eb35f3f1e9124f9aca385164148cd8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 2 Dec 2024 16:05:21 +0100 Subject: [PATCH 24/29] fix typo --- Objects/exceptions.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Objects/exceptions.c b/Objects/exceptions.c index 8a2097f863e2d7..321b3c5cd29ef9 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -2729,7 +2729,7 @@ unicode_error_adjust_start(Py_ssize_t start, Py_ssize_t objlen) } /* - * Adjust the (eclusive) 'end' value of a UnicodeError object. + * Adjust the (exclusive) 'end' value of a UnicodeError object. * * The END can be negative or not, but when adjusting the value, * we clip it in [MIN(1, OBJLEN), MAX(MIN(1, OBJLEN), OBJLEN)] but From 4c4808ebcb9ab154e2fa1e793f5b831fd369de2e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 2 Dec 2024 16:09:15 +0100 Subject: [PATCH 25/29] update NEWS and docs --- .../C_API/2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst | 6 +++--- .../C_API/2024-12-02-16-10-36.gh-issue-123378.Q6YRwe.rst | 6 ++++++ Objects/exceptions.c | 8 ++++---- 3 files changed, 13 insertions(+), 7 deletions(-) create mode 100644 Misc/NEWS.d/next/C_API/2024-12-02-16-10-36.gh-issue-123378.Q6YRwe.rst diff --git a/Misc/NEWS.d/next/C_API/2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst b/Misc/NEWS.d/next/C_API/2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst index adb4ceeb82d95d..2fbf520b25f3d5 100644 --- a/Misc/NEWS.d/next/C_API/2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst +++ b/Misc/NEWS.d/next/C_API/2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst @@ -1,5 +1,5 @@ -Ensure that *start* is correctly set on :exc:`UnicodeEncodeError` objects. -A negative *start* is not allowed by :c:func:`PyUnicodeEncodeError_SetStart` -and will not be returned by :c:func:`PyUnicodeEncodeError_GetStart`. Similar +Ensure that the value of :attr:`UnicodeEncodeError.start` retrieved by +:c:func:`PyUnicodeEncodeError_GetStart` lie in ``[0, max(0, objlen - 1)]`` +where *objlen* is the length of :attr:`UnicodeEncodeError.object`. Similar arguments apply to :exc:`UnicodeDecodeError` and :exc:`UnicodeTranslateError` and their corresponding C interface. Patch by Bénédikt Tran. diff --git a/Misc/NEWS.d/next/C_API/2024-12-02-16-10-36.gh-issue-123378.Q6YRwe.rst b/Misc/NEWS.d/next/C_API/2024-12-02-16-10-36.gh-issue-123378.Q6YRwe.rst new file mode 100644 index 00000000000000..a0d12c228ce6cf --- /dev/null +++ b/Misc/NEWS.d/next/C_API/2024-12-02-16-10-36.gh-issue-123378.Q6YRwe.rst @@ -0,0 +1,6 @@ +Ensure that the value of :attr:`UnicodeEncodeError.end` retrieved by +:c:func:`PyUnicodeEncodeError_GetEnd` lies in ``[min(1, objlen), max(min(1, +objlen), objlen)]`` where *objlen* is the length of +:attr:`UnicodeEncodeError.object`. Similar arguments apply to +:exc:`UnicodeDecodeError` and :exc:`UnicodeTranslateError` and their +corresponding C interface. Patch by Bénédikt Tran. diff --git a/Objects/exceptions.c b/Objects/exceptions.c index 321b3c5cd29ef9..124b591ee3a13f 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -2711,8 +2711,8 @@ set_unicodefromstring(PyObject **attr, const char *value) /* * Adjust the (inclusive) 'start' value of a UnicodeError object. * - * The START can be negative or not, but when adjusting the value, - * we clip it in [0, MAX(0, OBJLEN - 1)] but do not intepret it as + * The 'start' can be negative or not, but when adjusting the value, + * we clip it in [0, max(0, objlen - 1)] but do not intepret it as * a relative offset. */ static inline Py_ssize_t @@ -2731,8 +2731,8 @@ unicode_error_adjust_start(Py_ssize_t start, Py_ssize_t objlen) /* * Adjust the (exclusive) 'end' value of a UnicodeError object. * - * The END can be negative or not, but when adjusting the value, - * we clip it in [MIN(1, OBJLEN), MAX(MIN(1, OBJLEN), OBJLEN)] but + * The 'end' can be negative or not, but when adjusting the value, + * we clip it in [min(1, objlen), max(min(1, objlen), objlen)] but * do not intepret it as a relative offset. */ static inline Py_ssize_t From efbdff1664bb5b44f821d819e3dd152f4cb2f08c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 2 Dec 2024 16:15:16 +0100 Subject: [PATCH 26/29] add some assertion checks --- Objects/exceptions.c | 1 + 1 file changed, 1 insertion(+) diff --git a/Objects/exceptions.c b/Objects/exceptions.c index 124b591ee3a13f..de259125ec929e 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -2884,6 +2884,7 @@ PyUnicodeTranslateError_GetEnd(PyObject *exc, Py_ssize_t *end) static inline int unicode_error_set_end_impl(PyObject *exc, Py_ssize_t end) { + assert(PyObject_TypeCheck(exc, (PyTypeObject*)&PyExc_UnicodeError)); ((PyUnicodeErrorObject *)exc)->end = end; return 0; } From 180f3c205fd0d6bf30e2f45aa4beb27596434f76 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 2 Dec 2024 16:15:45 +0100 Subject: [PATCH 27/29] add some assertion checks --- Objects/exceptions.c | 1 + 1 file changed, 1 insertion(+) diff --git a/Objects/exceptions.c b/Objects/exceptions.c index de259125ec929e..c52c4de662a3d0 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -2818,6 +2818,7 @@ PyUnicodeTranslateError_GetStart(PyObject *exc, Py_ssize_t *start) static inline int unicode_error_set_start_impl(PyObject *self, Py_ssize_t start) { + assert(PyObject_TypeCheck(self, (PyTypeObject*)&PyExc_UnicodeError)); ((PyUnicodeErrorObject *)self)->start = start; return 0; } From 5759a705970bbda64a7ba1705918536b7de53948 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 3 Dec 2024 13:42:13 +0100 Subject: [PATCH 28/29] remove failing assertions for now --- Objects/exceptions.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/Objects/exceptions.c b/Objects/exceptions.c index c52c4de662a3d0..124b591ee3a13f 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -2818,7 +2818,6 @@ PyUnicodeTranslateError_GetStart(PyObject *exc, Py_ssize_t *start) static inline int unicode_error_set_start_impl(PyObject *self, Py_ssize_t start) { - assert(PyObject_TypeCheck(self, (PyTypeObject*)&PyExc_UnicodeError)); ((PyUnicodeErrorObject *)self)->start = start; return 0; } @@ -2885,7 +2884,6 @@ PyUnicodeTranslateError_GetEnd(PyObject *exc, Py_ssize_t *end) static inline int unicode_error_set_end_impl(PyObject *exc, Py_ssize_t end) { - assert(PyObject_TypeCheck(exc, (PyTypeObject*)&PyExc_UnicodeError)); ((PyUnicodeErrorObject *)exc)->end = end; return 0; } From 8c1217173659d0b1d47de55871840438da5dd9bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 3 Dec 2024 14:13:07 +0100 Subject: [PATCH 29/29] fix docs --- .../2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst | 7 ++++--- .../2024-12-02-16-10-36.gh-issue-123378.Q6YRwe.rst | 10 +++++----- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/Misc/NEWS.d/next/C_API/2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst b/Misc/NEWS.d/next/C_API/2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst index 2fbf520b25f3d5..2cfb8b8a1e245a 100644 --- a/Misc/NEWS.d/next/C_API/2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst +++ b/Misc/NEWS.d/next/C_API/2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst @@ -1,5 +1,6 @@ -Ensure that the value of :attr:`UnicodeEncodeError.start` retrieved by -:c:func:`PyUnicodeEncodeError_GetStart` lie in ``[0, max(0, objlen - 1)]`` -where *objlen* is the length of :attr:`UnicodeEncodeError.object`. Similar +Ensure that the value of :attr:`UnicodeEncodeError.start ` +retrieved by :c:func:`PyUnicodeEncodeError_GetStart` lie in +``[0, max(0, objlen - 1)]`` where *objlen* is the length of +:attr:`UnicodeEncodeError.object `. Similar arguments apply to :exc:`UnicodeDecodeError` and :exc:`UnicodeTranslateError` and their corresponding C interface. Patch by Bénédikt Tran. diff --git a/Misc/NEWS.d/next/C_API/2024-12-02-16-10-36.gh-issue-123378.Q6YRwe.rst b/Misc/NEWS.d/next/C_API/2024-12-02-16-10-36.gh-issue-123378.Q6YRwe.rst index a0d12c228ce6cf..107751579c4d91 100644 --- a/Misc/NEWS.d/next/C_API/2024-12-02-16-10-36.gh-issue-123378.Q6YRwe.rst +++ b/Misc/NEWS.d/next/C_API/2024-12-02-16-10-36.gh-issue-123378.Q6YRwe.rst @@ -1,6 +1,6 @@ -Ensure that the value of :attr:`UnicodeEncodeError.end` retrieved by -:c:func:`PyUnicodeEncodeError_GetEnd` lies in ``[min(1, objlen), max(min(1, -objlen), objlen)]`` where *objlen* is the length of -:attr:`UnicodeEncodeError.object`. Similar arguments apply to -:exc:`UnicodeDecodeError` and :exc:`UnicodeTranslateError` and their +Ensure that the value of :attr:`UnicodeEncodeError.end ` +retrieved by :c:func:`PyUnicodeEncodeError_GetEnd` lies in ``[min(1, objlen), +max(min(1, objlen), objlen)]`` where *objlen* is the length of +:attr:`UnicodeEncodeError.object `. Similar arguments +apply to :exc:`UnicodeDecodeError` and :exc:`UnicodeTranslateError` and their corresponding C interface. Patch by Bénédikt Tran.