From 7a1574dfcee9688813487ae6765ecc87ad57425b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Mon, 26 Aug 2024 17:40:48 +0200
Subject: [PATCH 01/29] Fix `PyUnicode{Encode,Decode}Error_GetStart`.

This fixes a bug when the `start` value of a `UnicodeError` is 0
and the underlying object is an empty string or bytes.
---
 Objects/exceptions.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Objects/exceptions.c b/Objects/exceptions.c
index fda62f159c1540..9bcdc88e1291ca 100644
--- a/Objects/exceptions.c
+++ b/Objects/exceptions.c
@@ -2751,7 +2751,7 @@ PyUnicodeEncodeError_GetStart(PyObject *exc, Py_ssize_t *start)
     if (*start<0)
         *start = 0; /*XXX check for values <0*/
     if (*start>=size)
-        *start = size-1;
+        *start = size ? size-1 : 0;
     Py_DECREF(obj);
     return 0;
 }
@@ -2769,7 +2769,7 @@ PyUnicodeDecodeError_GetStart(PyObject *exc, Py_ssize_t *start)
     if (*start<0)
         *start = 0;
     if (*start>=size)
-        *start = size-1;
+        *start = size ? size-1 : 0;
     Py_DECREF(obj);
     return 0;
 }

From 6ef0c6d47a0b6b41ad5e1af4e77a2a6200297c23 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Tue, 27 Aug 2024 09:07:59 +0200
Subject: [PATCH 02/29] blurb

---
 .../next/C API/2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst | 4 ++++
 1 file changed, 4 insertions(+)
 create mode 100644 Misc/NEWS.d/next/C API/2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst

diff --git a/Misc/NEWS.d/next/C API/2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst b/Misc/NEWS.d/next/C API/2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst
new file mode 100644
index 00000000000000..909eac97dde81e
--- /dev/null
+++ b/Misc/NEWS.d/next/C API/2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst	
@@ -0,0 +1,4 @@
+Ensure that *start* is correctly set by :c:func:`PyUnicodeEncodeError_GetStart`
+and :c:func:`PyUnicodeDecodeError_GetStart` when :attr:`UnicodeError.start` is
+*0* and the underlying :attr:`UnicodeEncode.object` is empty.
+Patch by Bénédikt Tran.

From 60ab0bb67f1b0469a2b76f9b25fc6f5ae2e89efb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Tue, 27 Aug 2024 09:48:12 +0200
Subject: [PATCH 03/29] add tests

---
 Lib/test/test_capi/test_exceptions.py | 46 +++++++++++++++++++++++++++
 Modules/_testcapi/exceptions.c        | 23 ++++++++++++++
 2 files changed, 69 insertions(+)

diff --git a/Lib/test/test_capi/test_exceptions.py b/Lib/test/test_capi/test_exceptions.py
index b22ddd8ad858d4..750712f629137b 100644
--- a/Lib/test/test_capi/test_exceptions.py
+++ b/Lib/test/test_capi/test_exceptions.py
@@ -415,6 +415,52 @@ def test_err_formatunraisable(self):
         # CRASHES formatunraisable(NULL, NULL)
 
 
+class TestUnicodeError(unittest.TestCase):
+
+    def test_unicode_encode_error_get_start(self):
+        test_func = _testcapi.unicode_encode_get_start
+        self._test_unicode_error_get_start('x', UnicodeEncodeError, test_func)
+
+    def test_unicode_decode_error_get_start(self):
+        test_func = _testcapi.unicode_decode_get_start
+        self._test_unicode_error_get_start(b'x', UnicodeDecodeError, test_func)
+
+    def _test_unicode_error_get_start(self, literal, exc_type, test_func):
+        for obj_len, py_start, c_start in [
+            # normal cases
+            (5, 0, 0),
+            (5, 1, 1),
+            (5, 2, 2),
+            # negative start is clamped to 0
+            (0, -1, 0),
+            (2, -1, 0),
+            # out of range start is clamped to max(0, obj_len - 1)
+            (0, 0, 0),
+            (0, 1, 0),
+            (0, 10, 0),
+            (2, 0, 0),
+            (5, 5, 4),
+            (5, 10, 4),
+        ]:
+            c_start_computed = py_start
+            if c_start_computed < 0:
+                c_start_computed = 0
+            if c_start_computed >= obj_len:
+                if obj_len == 0:
+                    c_start_computed = 0
+                else:
+                    c_start_computed = obj_len - 1
+
+            s = literal * obj_len
+            py_end = py_start + 1
+
+            with self.subTest(s, exc_type=exc_type, py_start=py_start, c_start=c_start):
+                self.assertEqual(c_start, c_start_computed)
+                exc = exc_type('utf-8', s, py_start, py_end, 'reason')
+                c_start_actual = test_func(exc)
+                self.assertEqual(c_start_actual, c_start)
+
+
 class Test_PyUnstable_Exc_PrepReraiseStar(ExceptionIsLikeMixin, unittest.TestCase):
 
     def setUp(self):
diff --git a/Modules/_testcapi/exceptions.c b/Modules/_testcapi/exceptions.c
index 316ef0e7ad7e55..a36bec3959acda 100644
--- a/Modules/_testcapi/exceptions.c
+++ b/Modules/_testcapi/exceptions.c
@@ -359,6 +359,27 @@ _testcapi_unstable_exc_prep_reraise_star_impl(PyObject *module,
     return PyUnstable_Exc_PrepReraiseStar(orig, excs);
 }
 
+/* Test PyUnicodeEncodeError_GetStart */
+static PyObject *
+unicode_encode_get_start(PyObject *Py_UNUSED(module), PyObject *arg)
+{
+    Py_ssize_t start;
+    if (PyUnicodeEncodeError_GetStart(arg, &start) < 0) {
+        return NULL;
+    }
+    RETURN_SIZE(start);
+}
+
+/* Test PyUnicodeDecodeError_GetStart */
+static PyObject *
+unicode_decode_get_start(PyObject *Py_UNUSED(module), PyObject *arg)
+{
+    Py_ssize_t start;
+    if (PyUnicodeDecodeError_GetStart(arg, &start) < 0) {
+        return NULL;
+    }
+    RETURN_SIZE(start);
+}
 
 /*
  * Define the PyRecurdingInfinitelyError_Type
@@ -403,6 +424,8 @@ static PyMethodDef test_methods[] = {
     _TESTCAPI_SET_EXCEPTION_METHODDEF
     _TESTCAPI_TRACEBACK_PRINT_METHODDEF
     _TESTCAPI_UNSTABLE_EXC_PREP_RERAISE_STAR_METHODDEF
+    {"unicode_encode_get_start", unicode_encode_get_start,       METH_O},
+    {"unicode_decode_get_start", unicode_decode_get_start,       METH_O},
     {NULL},
 };
 

From 67b3d8ef92543dcb2501eedf31c48e323e3c7a7c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Tue, 27 Aug 2024 09:56:36 +0200
Subject: [PATCH 04/29] fix NEWS

---
 .../next/C API/2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Misc/NEWS.d/next/C API/2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst b/Misc/NEWS.d/next/C API/2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst
index 909eac97dde81e..922d0b34ec7546 100644
--- a/Misc/NEWS.d/next/C API/2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst	
+++ b/Misc/NEWS.d/next/C API/2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst	
@@ -1,4 +1,4 @@
 Ensure that *start* is correctly set by :c:func:`PyUnicodeEncodeError_GetStart`
 and :c:func:`PyUnicodeDecodeError_GetStart` when :attr:`UnicodeError.start` is
-*0* and the underlying :attr:`UnicodeEncode.object` is empty.
+*0* and the underlying :attr:`UnicodeError.object` is empty.
 Patch by Bénédikt Tran.

From 78fff57f57a0fc4a74d24deefa5608ea46265532 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Tue, 27 Aug 2024 10:29:39 +0200
Subject: [PATCH 05/29] remove a duplicated normal case

---
 Lib/test/test_capi/test_exceptions.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/Lib/test/test_capi/test_exceptions.py b/Lib/test/test_capi/test_exceptions.py
index 750712f629137b..c3e49485677494 100644
--- a/Lib/test/test_capi/test_exceptions.py
+++ b/Lib/test/test_capi/test_exceptions.py
@@ -438,7 +438,6 @@ def _test_unicode_error_get_start(self, literal, exc_type, test_func):
             (0, 0, 0),
             (0, 1, 0),
             (0, 10, 0),
-            (2, 0, 0),
             (5, 5, 4),
             (5, 10, 4),
         ]:

From a6e6f80b86b9b55c6f90d64895d1e44c9c6c6ff4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Wed, 28 Aug 2024 13:49:42 +0200
Subject: [PATCH 06/29] handle start < 0

---
 Include/cpython/pyerrors.h |  2 +-
 Objects/exceptions.c       | 67 ++++++++++++++++++++++++++++----------
 2 files changed, 50 insertions(+), 19 deletions(-)

diff --git a/Include/cpython/pyerrors.h b/Include/cpython/pyerrors.h
index b36b4681f5dddb..0aaf7388a4a662 100644
--- a/Include/cpython/pyerrors.h
+++ b/Include/cpython/pyerrors.h
@@ -44,7 +44,7 @@ typedef struct {
     PyException_HEAD
     PyObject *encoding;
     PyObject *object;
-    Py_ssize_t start;
+    Py_ssize_t start;  // should be >= 0
     Py_ssize_t end;
     PyObject *reason;
 } PyUnicodeErrorObject;
diff --git a/Objects/exceptions.c b/Objects/exceptions.c
index 9bcdc88e1291ca..8cd1746b7b928f 100644
--- a/Objects/exceptions.c
+++ b/Objects/exceptions.c
@@ -2741,17 +2741,17 @@ PyUnicodeTranslateError_GetObject(PyObject *exc)
 int
 PyUnicodeEncodeError_GetStart(PyObject *exc, Py_ssize_t *start)
 {
-    Py_ssize_t size;
     PyObject *obj = get_unicode(((PyUnicodeErrorObject *)exc)->object,
                                 "object");
-    if (!obj)
+    if (!obj) {
         return -1;
+    }
+    Py_ssize_t size = PyUnicode_GET_LENGTH(obj);
     *start = ((PyUnicodeErrorObject *)exc)->start;
-    size = PyUnicode_GET_LENGTH(obj);
-    if (*start<0)
-        *start = 0; /*XXX check for values <0*/
-    if (*start>=size)
-        *start = size ? size-1 : 0;
+    assert(*start >= 0);
+    if (*start >= size) {
+        *start = size ? size - 1 : 0;
+    }
     Py_DECREF(obj);
     return 0;
 }
@@ -2760,16 +2760,16 @@ PyUnicodeEncodeError_GetStart(PyObject *exc, Py_ssize_t *start)
 int
 PyUnicodeDecodeError_GetStart(PyObject *exc, Py_ssize_t *start)
 {
-    Py_ssize_t size;
     PyObject *obj = get_string(((PyUnicodeErrorObject *)exc)->object, "object");
-    if (!obj)
+    if (!obj) {
         return -1;
-    size = PyBytes_GET_SIZE(obj);
+    }
+    Py_ssize_t size = PyBytes_GET_SIZE(obj);
     *start = ((PyUnicodeErrorObject *)exc)->start;
-    if (*start<0)
-        *start = 0;
-    if (*start>=size)
-        *start = size ? size-1 : 0;
+    assert(*start >= 0);
+    if (*start >= size) {
+        *start = size ? size - 1 : 0;
+    }
     Py_DECREF(obj);
     return 0;
 }
@@ -2785,6 +2785,10 @@ PyUnicodeTranslateError_GetStart(PyObject *exc, Py_ssize_t *start)
 int
 PyUnicodeEncodeError_SetStart(PyObject *exc, Py_ssize_t start)
 {
+    if (start < 0) {
+        PyErr_SetString(PyExc_ValueError, "'start' must be >= 0");
+        return -1;
+    }
     ((PyUnicodeErrorObject *)exc)->start = start;
     return 0;
 }
@@ -2793,6 +2797,10 @@ PyUnicodeEncodeError_SetStart(PyObject *exc, Py_ssize_t start)
 int
 PyUnicodeDecodeError_SetStart(PyObject *exc, Py_ssize_t start)
 {
+    if (start < 0) {
+        PyErr_SetString(PyExc_ValueError, "'start' must be >= 0");
+        return -1;
+    }
     ((PyUnicodeErrorObject *)exc)->start = start;
     return 0;
 }
@@ -2801,6 +2809,10 @@ PyUnicodeDecodeError_SetStart(PyObject *exc, Py_ssize_t start)
 int
 PyUnicodeTranslateError_SetStart(PyObject *exc, Py_ssize_t start)
 {
+    if (start < 0) {
+        PyErr_SetString(PyExc_ValueError, "'start' must be >= 0");
+        return -1;
+    }
     ((PyUnicodeErrorObject *)exc)->start = start;
     return 0;
 }
@@ -2980,8 +2992,12 @@ UnicodeEncodeError_init(PyObject *self, PyObject *args, PyObject *kwds)
     if (!PyArg_ParseTuple(args, "UUnnU",
                           &err->encoding, &err->object,
                           &err->start, &err->end, &err->reason)) {
-        err->encoding = err->object = err->reason = NULL;
-        return -1;
+        goto error;
+    }
+
+    if (err->start < 0) {
+        PyErr_SetString(PyExc_ValueError, "'start' must be >= 0");
+        goto error;
     }
 
     Py_INCREF(err->encoding);
@@ -2989,6 +3005,9 @@ UnicodeEncodeError_init(PyObject *self, PyObject *args, PyObject *kwds)
     Py_INCREF(err->reason);
 
     return 0;
+error:
+    err->encoding = err->object = err->reason = NULL;
+    return -1;
 }
 
 static PyObject *
@@ -3086,6 +3105,11 @@ UnicodeDecodeError_init(PyObject *self, PyObject *args, PyObject *kwds)
     Py_INCREF(ude->object);
     Py_INCREF(ude->reason);
 
+    if (ude->start < 0) {
+        PyErr_SetString(PyExc_ValueError, "'start' must be >= 0");
+        goto error;
+    }
+
     if (!PyBytes_Check(ude->object)) {
         Py_buffer view;
         if (PyObject_GetBuffer(ude->object, &view, PyBUF_SIMPLE) != 0)
@@ -3190,14 +3214,21 @@ UnicodeTranslateError_init(PyUnicodeErrorObject *self, PyObject *args,
     if (!PyArg_ParseTuple(args, "UnnU",
                           &self->object,
                           &self->start, &self->end, &self->reason)) {
-        self->object = self->reason = NULL;
-        return -1;
+        goto error;
+    }
+
+    if (self->start < 0) {
+        PyErr_SetString(PyExc_ValueError, "'start' must be >= 0");
+        goto error;
     }
 
     Py_INCREF(self->object);
     Py_INCREF(self->reason);
 
     return 0;
+error:
+    self->object = self->reason = NULL;
+    return -1;
 }
 
 

From 20c47ba557b1dcac343402d2b4ab23649b79dfd9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Wed, 28 Aug 2024 13:49:36 +0200
Subject: [PATCH 07/29] add C tests

---
 Modules/_testcapi/exceptions.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/Modules/_testcapi/exceptions.c b/Modules/_testcapi/exceptions.c
index a36bec3959acda..47516881777ce9 100644
--- a/Modules/_testcapi/exceptions.c
+++ b/Modules/_testcapi/exceptions.c
@@ -381,6 +381,18 @@ unicode_decode_get_start(PyObject *Py_UNUSED(module), PyObject *arg)
     RETURN_SIZE(start);
 }
 
+/* Test PyUnicodeTranslateError_GetStart */
+static PyObject *
+unicode_translate_get_start(PyObject *Py_UNUSED(module), PyObject *arg)
+{
+    Py_ssize_t start;
+    if (PyUnicodeTranslateError_GetStart(arg, &start) < 0) {
+        return NULL;
+    }
+    RETURN_SIZE(start);
+}
+
+
 /*
  * Define the PyRecurdingInfinitelyError_Type
  */
@@ -426,6 +438,7 @@ static PyMethodDef test_methods[] = {
     _TESTCAPI_UNSTABLE_EXC_PREP_RERAISE_STAR_METHODDEF
     {"unicode_encode_get_start", unicode_encode_get_start,       METH_O},
     {"unicode_decode_get_start", unicode_decode_get_start,       METH_O},
+    {"unicode_translate_get_start", unicode_translate_get_start, METH_O},
     {NULL},
 };
 

From 51bc77e33cf535dcae3f0dc5d35c74b92784ae87 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Wed, 28 Aug 2024 13:49:31 +0200
Subject: [PATCH 08/29] add test coverage

---
 Lib/test/test_capi/test_exceptions.py | 32 +++++++++++++++++++++------
 1 file changed, 25 insertions(+), 7 deletions(-)

diff --git a/Lib/test/test_capi/test_exceptions.py b/Lib/test/test_capi/test_exceptions.py
index c3e49485677494..1ee55835b3b635 100644
--- a/Lib/test/test_capi/test_exceptions.py
+++ b/Lib/test/test_capi/test_exceptions.py
@@ -415,8 +415,28 @@ def test_err_formatunraisable(self):
         # CRASHES formatunraisable(NULL, NULL)
 
 
+class PyUnicodeTranslateError(UnicodeTranslateError):
+    # UnicodeTranslateError takes 4 arguments instead of 5,
+    # so we just make a UnicodeTranslateError class that is
+    # compatible with the UnicodeError.__init__.
+    def __init__(self, encoding, *args, **kwargs):
+        super().__init__(*args)
+
+
 class TestUnicodeError(unittest.TestCase):
 
+    def test_unicode_error_start_value(self):
+        # negative start is not allowed
+        for exc_type, literal in [
+            (UnicodeEncodeError, 'x'),
+            (UnicodeDecodeError, b'x'),
+            (PyUnicodeTranslateError, 'x'),
+        ]:
+            for obj_len in [0, 1, 2]:
+                s = literal * obj_len
+                with self.subTest(exc_type=exc_type, obj_len=obj_len):
+                    self.assertRaises(ValueError, exc_type, 'utf-8', s, -1, 0, '?')
+
     def test_unicode_encode_error_get_start(self):
         test_func = _testcapi.unicode_encode_get_start
         self._test_unicode_error_get_start('x', UnicodeEncodeError, test_func)
@@ -425,15 +445,16 @@ def test_unicode_decode_error_get_start(self):
         test_func = _testcapi.unicode_decode_get_start
         self._test_unicode_error_get_start(b'x', UnicodeDecodeError, test_func)
 
+    def test_unicode_translate_error_get_start(self):
+        test_func = _testcapi.unicode_translate_get_start
+        self._test_unicode_error_get_start('x', PyUnicodeTranslateError, test_func)
+
     def _test_unicode_error_get_start(self, literal, exc_type, test_func):
         for obj_len, py_start, c_start in [
             # normal cases
             (5, 0, 0),
             (5, 1, 1),
             (5, 2, 2),
-            # negative start is clamped to 0
-            (0, -1, 0),
-            (2, -1, 0),
             # out of range start is clamped to max(0, obj_len - 1)
             (0, 0, 0),
             (0, 1, 0),
@@ -442,8 +463,6 @@ def _test_unicode_error_get_start(self, literal, exc_type, test_func):
             (5, 10, 4),
         ]:
             c_start_computed = py_start
-            if c_start_computed < 0:
-                c_start_computed = 0
             if c_start_computed >= obj_len:
                 if obj_len == 0:
                     c_start_computed = 0
@@ -455,11 +474,10 @@ def _test_unicode_error_get_start(self, literal, exc_type, test_func):
 
             with self.subTest(s, exc_type=exc_type, py_start=py_start, c_start=c_start):
                 self.assertEqual(c_start, c_start_computed)
-                exc = exc_type('utf-8', s, py_start, py_end, 'reason')
+                exc = exc_type('utf-8', s, py_start, py_end, 'why')
                 c_start_actual = test_func(exc)
                 self.assertEqual(c_start_actual, c_start)
 
-
 class Test_PyUnstable_Exc_PrepReraiseStar(ExceptionIsLikeMixin, unittest.TestCase):
 
     def setUp(self):

From b290e58441ae4ffe4735c6b4b854bdf279d88b7d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Wed, 28 Aug 2024 13:53:33 +0200
Subject: [PATCH 09/29] update docs

---
 Doc/c-api/exceptions.rst   | 4 ++--
 Doc/library/exceptions.rst | 3 ++-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/Doc/c-api/exceptions.rst b/Doc/c-api/exceptions.rst
index 499bfb47cc4be5..8756b76e0a40ae 100644
--- a/Doc/c-api/exceptions.rst
+++ b/Doc/c-api/exceptions.rst
@@ -857,8 +857,8 @@ The following functions are used to create and modify Unicode exceptions from C.
                 int PyUnicodeEncodeError_SetStart(PyObject *exc, Py_ssize_t start)
                 int PyUnicodeTranslateError_SetStart(PyObject *exc, Py_ssize_t start)
 
-   Set the *start* attribute of the given exception object to *start*.  Return
-   ``0`` on success, ``-1`` on failure.
+   Set the *start* attribute of the given exception object to *start*.  *start*
+   must be non-negative. Return ``0`` on success, ``-1`` on failure.
 
 .. c:function:: int PyUnicodeDecodeError_GetEnd(PyObject *exc, Py_ssize_t *end)
                 int PyUnicodeEncodeError_GetEnd(PyObject *exc, Py_ssize_t *end)
diff --git a/Doc/library/exceptions.rst b/Doc/library/exceptions.rst
index b5ba86f1b19223..55325ac9c1e11a 100644
--- a/Doc/library/exceptions.rst
+++ b/Doc/library/exceptions.rst
@@ -642,7 +642,8 @@ The following exceptions are the exceptions that are usually raised.
 
    .. attribute:: start
 
-       The first index of invalid data in :attr:`object`.
+       The first index of invalid data in :attr:`object`. This value
+       must be non-negative.
 
    .. attribute:: end
 

From 75398a9a8494f8ae74ff481219a1272266b431c1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Wed, 28 Aug 2024 13:54:48 +0200
Subject: [PATCH 10/29] fixup

---
 Lib/test/test_capi/test_exceptions.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Lib/test/test_capi/test_exceptions.py b/Lib/test/test_capi/test_exceptions.py
index 1ee55835b3b635..2f3478042c5f46 100644
--- a/Lib/test/test_capi/test_exceptions.py
+++ b/Lib/test/test_capi/test_exceptions.py
@@ -420,7 +420,7 @@ class PyUnicodeTranslateError(UnicodeTranslateError):
     # so we just make a UnicodeTranslateError class that is
     # compatible with the UnicodeError.__init__.
     def __init__(self, encoding, *args, **kwargs):
-        super().__init__(*args)
+        super().__init__(*args, **kwargs)
 
 
 class TestUnicodeError(unittest.TestCase):

From 546be87f5ac0c61c7985f22ed8e362f84843e431 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Wed, 28 Aug 2024 13:59:07 +0200
Subject: [PATCH 11/29] update blurb

---
 .../C API/2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/Misc/NEWS.d/next/C API/2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst b/Misc/NEWS.d/next/C API/2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst
index 922d0b34ec7546..adb4ceeb82d95d 100644
--- a/Misc/NEWS.d/next/C API/2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst	
+++ b/Misc/NEWS.d/next/C API/2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst	
@@ -1,4 +1,5 @@
-Ensure that *start* is correctly set by :c:func:`PyUnicodeEncodeError_GetStart`
-and :c:func:`PyUnicodeDecodeError_GetStart` when :attr:`UnicodeError.start` is
-*0* and the underlying :attr:`UnicodeError.object` is empty.
-Patch by Bénédikt Tran.
+Ensure that *start* is correctly set on :exc:`UnicodeEncodeError` objects.
+A negative *start* is not allowed by :c:func:`PyUnicodeEncodeError_SetStart`
+and will not be returned by :c:func:`PyUnicodeEncodeError_GetStart`. Similar
+arguments apply to :exc:`UnicodeDecodeError` and :exc:`UnicodeTranslateError`
+and their corresponding C interface. Patch by Bénédikt Tran.

From cded571ddbd58b6d52000ce719445f0ce3505f02 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Thu, 29 Aug 2024 10:13:40 +0200
Subject: [PATCH 12/29] address Victor's review

---
 Include/cpython/pyerrors.h            |   2 +-
 Lib/test/test_capi/test_exceptions.py |  42 ++++-----
 Objects/exceptions.c                  | 129 +++++++++++++-------------
 3 files changed, 82 insertions(+), 91 deletions(-)

diff --git a/Include/cpython/pyerrors.h b/Include/cpython/pyerrors.h
index 0aaf7388a4a662..7f15ef3ad08f25 100644
--- a/Include/cpython/pyerrors.h
+++ b/Include/cpython/pyerrors.h
@@ -44,7 +44,7 @@ typedef struct {
     PyException_HEAD
     PyObject *encoding;
     PyObject *object;
-    Py_ssize_t start;  // should be >= 0
+    Py_ssize_t start;  // must be >= 0
     Py_ssize_t end;
     PyObject *reason;
 } PyUnicodeErrorObject;
diff --git a/Lib/test/test_capi/test_exceptions.py b/Lib/test/test_capi/test_exceptions.py
index 2f3478042c5f46..4f0c74923b83d2 100644
--- a/Lib/test/test_capi/test_exceptions.py
+++ b/Lib/test/test_capi/test_exceptions.py
@@ -5,6 +5,7 @@
 import unittest
 import textwrap
 
+from itertools import product
 from test import support
 from test.support import import_helper
 from test.support.os_helper import TESTFN, TESTFN_UNDECODABLE
@@ -425,31 +426,31 @@ def __init__(self, encoding, *args, **kwargs):
 
 class TestUnicodeError(unittest.TestCase):
 
-    def test_unicode_error_start_value(self):
+    def test_unicode_error_init_start_value(self):
         # negative start is not allowed
-        for exc_type, literal in [
+        for (exc_type, literal), obj_len, start in product([
             (UnicodeEncodeError, 'x'),
             (UnicodeDecodeError, b'x'),
             (PyUnicodeTranslateError, 'x'),
-        ]:
-            for obj_len in [0, 1, 2]:
-                s = literal * obj_len
-                with self.subTest(exc_type=exc_type, obj_len=obj_len):
-                    self.assertRaises(ValueError, exc_type, 'utf-8', s, -1, 0, '?')
+        ], range(3), [-2, -1]):
+            obj = literal * obj_len
+            for end in [0, start + 1]:
+                with self.subTest(start=start, end=end, exc_type=exc_type, obj_len=obj_len):
+                    self.assertRaises(ValueError, exc_type, 'utf-8', obj, start, end, 'reason')
 
     def test_unicode_encode_error_get_start(self):
-        test_func = _testcapi.unicode_encode_get_start
-        self._test_unicode_error_get_start('x', UnicodeEncodeError, test_func)
+        get_start = _testcapi.unicode_encode_get_start
+        self._test_unicode_error_get_start('x', UnicodeEncodeError, get_start)
 
     def test_unicode_decode_error_get_start(self):
-        test_func = _testcapi.unicode_decode_get_start
-        self._test_unicode_error_get_start(b'x', UnicodeDecodeError, test_func)
+        get_start = _testcapi.unicode_decode_get_start
+        self._test_unicode_error_get_start(b'x', UnicodeDecodeError, get_start)
 
     def test_unicode_translate_error_get_start(self):
-        test_func = _testcapi.unicode_translate_get_start
-        self._test_unicode_error_get_start('x', PyUnicodeTranslateError, test_func)
+        get_start = _testcapi.unicode_translate_get_start
+        self._test_unicode_error_get_start('x', PyUnicodeTranslateError, get_start)
 
-    def _test_unicode_error_get_start(self, literal, exc_type, test_func):
+    def _test_unicode_error_get_start(self, literal, exc_type, get_start):
         for obj_len, py_start, c_start in [
             # normal cases
             (5, 0, 0),
@@ -462,21 +463,12 @@ def _test_unicode_error_get_start(self, literal, exc_type, test_func):
             (5, 5, 4),
             (5, 10, 4),
         ]:
-            c_start_computed = py_start
-            if c_start_computed >= obj_len:
-                if obj_len == 0:
-                    c_start_computed = 0
-                else:
-                    c_start_computed = obj_len - 1
-
             s = literal * obj_len
             py_end = py_start + 1
 
             with self.subTest(s, exc_type=exc_type, py_start=py_start, c_start=c_start):
-                self.assertEqual(c_start, c_start_computed)
-                exc = exc_type('utf-8', s, py_start, py_end, 'why')
-                c_start_actual = test_func(exc)
-                self.assertEqual(c_start_actual, c_start)
+                exc = exc_type('utf-8', s, py_start, py_end, 'reason')
+                self.assertEqual(get_start(exc), c_start)
 
 class Test_PyUnstable_Exc_PrepReraiseStar(ExceptionIsLikeMixin, unittest.TestCase):
 
diff --git a/Objects/exceptions.c b/Objects/exceptions.c
index 8cd1746b7b928f..142cd36a2898ee 100644
--- a/Objects/exceptions.c
+++ b/Objects/exceptions.c
@@ -2978,36 +2978,35 @@ static PyMemberDef UnicodeError_members[] = {
 static int
 UnicodeEncodeError_init(PyObject *self, PyObject *args, PyObject *kwds)
 {
-    PyUnicodeErrorObject *err;
-
-    if (BaseException_init((PyBaseExceptionObject *)self, args, kwds) == -1)
+    if (BaseException_init((PyBaseExceptionObject *)self, args, kwds) == -1) {
         return -1;
+    }
 
-    err = (PyUnicodeErrorObject *)self;
+    PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self;
+    Py_CLEAR(exc->encoding);
+    Py_CLEAR(exc->object);
+    Py_CLEAR(exc->reason);
 
-    Py_CLEAR(err->encoding);
-    Py_CLEAR(err->object);
-    Py_CLEAR(err->reason);
+    PyObject *encoding = NULL, *object = NULL, *reason = NULL;  // borrowed
+    Py_ssize_t start = -1, end = -1;
 
     if (!PyArg_ParseTuple(args, "UUnnU",
-                          &err->encoding, &err->object,
-                          &err->start, &err->end, &err->reason)) {
-        goto error;
+                          &encoding, &object, &start, &end, &reason))
+    {
+        return -1;
     }
 
-    if (err->start < 0) {
+    if (start < 0) {
         PyErr_SetString(PyExc_ValueError, "'start' must be >= 0");
-        goto error;
+        return -1;
     }
 
-    Py_INCREF(err->encoding);
-    Py_INCREF(err->object);
-    Py_INCREF(err->reason);
-
+    exc->encoding = Py_NewRef(encoding);
+    exc->object = Py_NewRef(object);
+    exc->start = start;
+    exc->end = end;
+    exc->reason = Py_NewRef(reason);
     return 0;
-error:
-    err->encoding = err->object = err->reason = NULL;
-    return -1;
 }
 
 static PyObject *
@@ -3083,49 +3082,48 @@ PyObject *PyExc_UnicodeEncodeError = (PyObject *)&_PyExc_UnicodeEncodeError;
 static int
 UnicodeDecodeError_init(PyObject *self, PyObject *args, PyObject *kwds)
 {
-    PyUnicodeErrorObject *ude;
-
-    if (BaseException_init((PyBaseExceptionObject *)self, args, kwds) == -1)
+    if (BaseException_init((PyBaseExceptionObject *)self, args, kwds) == -1) {
         return -1;
+    }
+
+    PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self;
 
-    ude = (PyUnicodeErrorObject *)self;
+    Py_CLEAR(exc->encoding);
+    Py_CLEAR(exc->object);
+    Py_CLEAR(exc->reason);
 
-    Py_CLEAR(ude->encoding);
-    Py_CLEAR(ude->object);
-    Py_CLEAR(ude->reason);
+    PyObject *encoding = NULL, *object = NULL, *reason = NULL;  // borrowed
+    Py_ssize_t start = -1, end = -1;
 
     if (!PyArg_ParseTuple(args, "UOnnU",
-                          &ude->encoding, &ude->object,
-                          &ude->start, &ude->end, &ude->reason)) {
-             ude->encoding = ude->object = ude->reason = NULL;
-             return -1;
+                          &encoding, &object, &start, &end, &reason))
+    {
+        return -1;
     }
 
-    Py_INCREF(ude->encoding);
-    Py_INCREF(ude->object);
-    Py_INCREF(ude->reason);
-
-    if (ude->start < 0) {
+    if (start < 0) {
         PyErr_SetString(PyExc_ValueError, "'start' must be >= 0");
-        goto error;
+        return -1;
     }
 
-    if (!PyBytes_Check(ude->object)) {
+    if (!PyBytes_Check(object)) {
         Py_buffer view;
-        if (PyObject_GetBuffer(ude->object, &view, PyBUF_SIMPLE) != 0)
-            goto error;
-        Py_XSETREF(ude->object, PyBytes_FromStringAndSize(view.buf, view.len));
+        if (PyObject_GetBuffer(object, &view, PyBUF_SIMPLE) != 0) {
+            return -1;
+        }
+        Py_XSETREF(object, PyBytes_FromStringAndSize(view.buf, view.len));
         PyBuffer_Release(&view);
-        if (!ude->object)
-            goto error;
+        if (object == NULL) {
+            return -1;
+        }
     }
-    return 0;
 
-error:
-    Py_CLEAR(ude->encoding);
-    Py_CLEAR(ude->object);
-    Py_CLEAR(ude->reason);
-    return -1;
+    exc->encoding = Py_NewRef(encoding);
+    exc->object = Py_NewRef(object);
+    exc->start = start;
+    exc->end = end;
+    exc->reason = Py_NewRef(reason);
+    return 0;
 }
 
 static PyObject *
@@ -3202,33 +3200,34 @@ PyUnicodeDecodeError_Create(
  */
 
 static int
-UnicodeTranslateError_init(PyUnicodeErrorObject *self, PyObject *args,
-                           PyObject *kwds)
+UnicodeTranslateError_init(PyObject *self, PyObject *args, PyObject *kwds)
 {
-    if (BaseException_init((PyBaseExceptionObject *)self, args, kwds) == -1)
+    if (BaseException_init((PyBaseExceptionObject *)self, args, kwds) == -1) {
         return -1;
+    }
 
-    Py_CLEAR(self->object);
-    Py_CLEAR(self->reason);
+    PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self;
 
-    if (!PyArg_ParseTuple(args, "UnnU",
-                          &self->object,
-                          &self->start, &self->end, &self->reason)) {
-        goto error;
+    Py_CLEAR(exc->object);
+    Py_CLEAR(exc->reason);
+
+    PyObject *object = NULL, *reason = NULL;  // borrowed
+    Py_ssize_t start = -1, end = -1;
+
+    if (!PyArg_ParseTuple(args, "UnnU", &object, &start, &end, &reason)) {
+        return -1;
     }
 
-    if (self->start < 0) {
+    if (start < 0) {
         PyErr_SetString(PyExc_ValueError, "'start' must be >= 0");
-        goto error;
+        return -1;
     }
 
-    Py_INCREF(self->object);
-    Py_INCREF(self->reason);
-
+    exc->object = Py_NewRef(object);
+    exc->start = start;
+    exc->end = end;
+    exc->reason = Py_NewRef(reason);
     return 0;
-error:
-    self->object = self->reason = NULL;
-    return -1;
 }
 
 

From 1900d9ac592f36533daf8c1957c4d2f0f3269620 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Thu, 29 Aug 2024 10:15:05 +0200
Subject: [PATCH 13/29] refactor name

---
 Lib/test/test_capi/test_exceptions.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Lib/test/test_capi/test_exceptions.py b/Lib/test/test_capi/test_exceptions.py
index 4f0c74923b83d2..74e25897d63c64 100644
--- a/Lib/test/test_capi/test_exceptions.py
+++ b/Lib/test/test_capi/test_exceptions.py
@@ -463,11 +463,11 @@ def _test_unicode_error_get_start(self, literal, exc_type, get_start):
             (5, 5, 4),
             (5, 10, 4),
         ]:
-            s = literal * obj_len
+            obj = literal * obj_len
             py_end = py_start + 1
 
-            with self.subTest(s, exc_type=exc_type, py_start=py_start, c_start=c_start):
-                exc = exc_type('utf-8', s, py_start, py_end, 'reason')
+            with self.subTest(obj, exc_type=exc_type, py_start=py_start, c_start=c_start):
+                exc = exc_type('utf-8', obj, py_start, py_end, 'reason')
                 self.assertEqual(get_start(exc), c_start)
 
 class Test_PyUnstable_Exc_PrepReraiseStar(ExceptionIsLikeMixin, unittest.TestCase):

From 8acc563f36af6c06801210c7611ca77eced01202 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Thu, 29 Aug 2024 10:35:11 +0200
Subject: [PATCH 14/29] fix refcounts

---
 Lib/test/test_exceptions.py | 71 +++++++++++++++++++------------------
 Objects/exceptions.c        |  6 ++--
 2 files changed, 40 insertions(+), 37 deletions(-)

diff --git a/Lib/test/test_exceptions.py b/Lib/test/test_exceptions.py
index e4f2e3a97b8bb8..184f12323efcbf 100644
--- a/Lib/test/test_exceptions.py
+++ b/Lib/test/test_exceptions.py
@@ -534,41 +534,42 @@ def testAttributes(self):
             pass
 
         for exc, args, kwargs, expected in exceptionList:
-            try:
-                e = exc(*args, **kwargs)
-            except:
-                print(f"\nexc={exc!r}, args={args!r}", file=sys.stderr)
-                # raise
-            else:
-                # Verify module name
-                if not type(e).__name__.endswith('NaiveException'):
-                    self.assertEqual(type(e).__module__, 'builtins')
-                # Verify no ref leaks in Exc_str()
-                s = str(e)
-                for checkArgName in expected:
-                    value = getattr(e, checkArgName)
-                    self.assertEqual(repr(value),
-                                     repr(expected[checkArgName]),
-                                     '%r.%s == %r, expected %r' % (
-                                     e, checkArgName,
-                                     value, expected[checkArgName]))
-
-                # test for pickling support
-                for p in [pickle]:
-                    for protocol in range(p.HIGHEST_PROTOCOL + 1):
-                        s = p.dumps(e, protocol)
-                        new = p.loads(s)
-                        for checkArgName in expected:
-                            got = repr(getattr(new, checkArgName))
-                            if exc == AttributeError and checkArgName == 'obj':
-                                # See GH-103352, we're not pickling
-                                # obj at this point. So verify it's None.
-                                want = repr(None)
-                            else:
-                                want = repr(expected[checkArgName])
-                            self.assertEqual(got, want,
-                                             'pickled "%r", attribute "%s' %
-                                             (e, checkArgName))
+            with self.subTest(exc=exc, args=args, kwargs=kwargs):
+                try:
+                    e = exc(*args, **kwargs)
+                except:
+                    print(f"\nexc={exc!r}, args={args!r}", file=sys.stderr)
+                    # raise
+                else:
+                    # Verify module name
+                    if not type(e).__name__.endswith('NaiveException'):
+                        self.assertEqual(type(e).__module__, 'builtins')
+                    # Verify no ref leaks in Exc_str()
+                    s = str(e)
+                    for checkArgName in expected:
+                        value = getattr(e, checkArgName)
+                        self.assertEqual(repr(value),
+                                         repr(expected[checkArgName]),
+                                         '%r.%s == %r, expected %r' % (
+                                         e, checkArgName,
+                                         value, expected[checkArgName]))
+
+                    # test for pickling support
+                    for p in [pickle]:
+                        for protocol in range(p.HIGHEST_PROTOCOL + 1):
+                            s = p.dumps(e, protocol)
+                            new = p.loads(s)
+                            for checkArgName in expected:
+                                got = repr(getattr(new, checkArgName))
+                                if exc == AttributeError and checkArgName == 'obj':
+                                    # See GH-103352, we're not pickling
+                                    # obj at this point. So verify it's None.
+                                    want = repr(None)
+                                else:
+                                    want = repr(expected[checkArgName])
+                                self.assertEqual(got, want,
+                                                 'pickled "%r", attribute "%s' %
+                                                 (e, checkArgName))
 
     def test_setstate(self):
         e = Exception(42)
diff --git a/Objects/exceptions.c b/Objects/exceptions.c
index 142cd36a2898ee..3841c7ce7b5a87 100644
--- a/Objects/exceptions.c
+++ b/Objects/exceptions.c
@@ -3111,11 +3111,13 @@ UnicodeDecodeError_init(PyObject *self, PyObject *args, PyObject *kwds)
         if (PyObject_GetBuffer(object, &view, PyBUF_SIMPLE) != 0) {
             return -1;
         }
-        Py_XSETREF(object, PyBytes_FromStringAndSize(view.buf, view.len));
+        PyObject *content = PyBytes_FromStringAndSize(view.buf, view.len);
         PyBuffer_Release(&view);
-        if (object == NULL) {
+        if (content == NULL) {
             return -1;
         }
+        Py_INCREF(object);          // make 'object' a strong reference
+        Py_SETREF(object, content);
     }
 
     exc->encoding = Py_NewRef(encoding);

From 0538c8375468d1404eb6039f9d270cc069889a1e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Thu, 29 Aug 2024 10:44:51 +0200
Subject: [PATCH 15/29] remove debugging code

---
 Lib/test/test_exceptions.py | 71 ++++++++++++++++++-------------------
 1 file changed, 35 insertions(+), 36 deletions(-)

diff --git a/Lib/test/test_exceptions.py b/Lib/test/test_exceptions.py
index 184f12323efcbf..e4f2e3a97b8bb8 100644
--- a/Lib/test/test_exceptions.py
+++ b/Lib/test/test_exceptions.py
@@ -534,42 +534,41 @@ def testAttributes(self):
             pass
 
         for exc, args, kwargs, expected in exceptionList:
-            with self.subTest(exc=exc, args=args, kwargs=kwargs):
-                try:
-                    e = exc(*args, **kwargs)
-                except:
-                    print(f"\nexc={exc!r}, args={args!r}", file=sys.stderr)
-                    # raise
-                else:
-                    # Verify module name
-                    if not type(e).__name__.endswith('NaiveException'):
-                        self.assertEqual(type(e).__module__, 'builtins')
-                    # Verify no ref leaks in Exc_str()
-                    s = str(e)
-                    for checkArgName in expected:
-                        value = getattr(e, checkArgName)
-                        self.assertEqual(repr(value),
-                                         repr(expected[checkArgName]),
-                                         '%r.%s == %r, expected %r' % (
-                                         e, checkArgName,
-                                         value, expected[checkArgName]))
-
-                    # test for pickling support
-                    for p in [pickle]:
-                        for protocol in range(p.HIGHEST_PROTOCOL + 1):
-                            s = p.dumps(e, protocol)
-                            new = p.loads(s)
-                            for checkArgName in expected:
-                                got = repr(getattr(new, checkArgName))
-                                if exc == AttributeError and checkArgName == 'obj':
-                                    # See GH-103352, we're not pickling
-                                    # obj at this point. So verify it's None.
-                                    want = repr(None)
-                                else:
-                                    want = repr(expected[checkArgName])
-                                self.assertEqual(got, want,
-                                                 'pickled "%r", attribute "%s' %
-                                                 (e, checkArgName))
+            try:
+                e = exc(*args, **kwargs)
+            except:
+                print(f"\nexc={exc!r}, args={args!r}", file=sys.stderr)
+                # raise
+            else:
+                # Verify module name
+                if not type(e).__name__.endswith('NaiveException'):
+                    self.assertEqual(type(e).__module__, 'builtins')
+                # Verify no ref leaks in Exc_str()
+                s = str(e)
+                for checkArgName in expected:
+                    value = getattr(e, checkArgName)
+                    self.assertEqual(repr(value),
+                                     repr(expected[checkArgName]),
+                                     '%r.%s == %r, expected %r' % (
+                                     e, checkArgName,
+                                     value, expected[checkArgName]))
+
+                # test for pickling support
+                for p in [pickle]:
+                    for protocol in range(p.HIGHEST_PROTOCOL + 1):
+                        s = p.dumps(e, protocol)
+                        new = p.loads(s)
+                        for checkArgName in expected:
+                            got = repr(getattr(new, checkArgName))
+                            if exc == AttributeError and checkArgName == 'obj':
+                                # See GH-103352, we're not pickling
+                                # obj at this point. So verify it's None.
+                                want = repr(None)
+                            else:
+                                want = repr(expected[checkArgName])
+                            self.assertEqual(got, want,
+                                             'pickled "%r", attribute "%s' %
+                                             (e, checkArgName))
 
     def test_setstate(self):
         e = Exception(42)

From d5ea357f6c21a7a3ef84d979ee39eca1114331d4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Thu, 29 Aug 2024 11:20:53 +0200
Subject: [PATCH 16/29] address Victor's review (round 2)

---
 Lib/test/test_capi/test_exceptions.py |  6 ++--
 Objects/exceptions.c                  | 47 ++++++++++-----------------
 2 files changed, 21 insertions(+), 32 deletions(-)

diff --git a/Lib/test/test_capi/test_exceptions.py b/Lib/test/test_capi/test_exceptions.py
index 74e25897d63c64..bf917092ecdc13 100644
--- a/Lib/test/test_capi/test_exceptions.py
+++ b/Lib/test/test_capi/test_exceptions.py
@@ -416,7 +416,7 @@ def test_err_formatunraisable(self):
         # CRASHES formatunraisable(NULL, NULL)
 
 
-class PyUnicodeTranslateError(UnicodeTranslateError):
+class TestUnicodeTranslateError(UnicodeTranslateError):
     # UnicodeTranslateError takes 4 arguments instead of 5,
     # so we just make a UnicodeTranslateError class that is
     # compatible with the UnicodeError.__init__.
@@ -431,7 +431,7 @@ def test_unicode_error_init_start_value(self):
         for (exc_type, literal), obj_len, start in product([
             (UnicodeEncodeError, 'x'),
             (UnicodeDecodeError, b'x'),
-            (PyUnicodeTranslateError, 'x'),
+            (TestUnicodeTranslateError, 'x'),
         ], range(3), [-2, -1]):
             obj = literal * obj_len
             for end in [0, start + 1]:
@@ -448,7 +448,7 @@ def test_unicode_decode_error_get_start(self):
 
     def test_unicode_translate_error_get_start(self):
         get_start = _testcapi.unicode_translate_get_start
-        self._test_unicode_error_get_start('x', PyUnicodeTranslateError, get_start)
+        self._test_unicode_error_get_start('x', TestUnicodeTranslateError, get_start)
 
     def _test_unicode_error_get_start(self, literal, exc_type, get_start):
         for obj_len, py_start, c_start in [
diff --git a/Objects/exceptions.c b/Objects/exceptions.c
index 3841c7ce7b5a87..335fc428f5be3f 100644
--- a/Objects/exceptions.c
+++ b/Objects/exceptions.c
@@ -2982,11 +2982,6 @@ UnicodeEncodeError_init(PyObject *self, PyObject *args, PyObject *kwds)
         return -1;
     }
 
-    PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self;
-    Py_CLEAR(exc->encoding);
-    Py_CLEAR(exc->object);
-    Py_CLEAR(exc->reason);
-
     PyObject *encoding = NULL, *object = NULL, *reason = NULL;  // borrowed
     Py_ssize_t start = -1, end = -1;
 
@@ -3001,11 +2996,12 @@ UnicodeEncodeError_init(PyObject *self, PyObject *args, PyObject *kwds)
         return -1;
     }
 
-    exc->encoding = Py_NewRef(encoding);
-    exc->object = Py_NewRef(object);
+    PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self;
+    Py_XSETREF(exc->encoding, Py_NewRef(encoding));
+    Py_XSETREF(exc->object, Py_NewRef(object));
     exc->start = start;
     exc->end = end;
-    exc->reason = Py_NewRef(reason);
+    Py_XSETREF(exc->reason, Py_NewRef(reason));
     return 0;
 }
 
@@ -3086,12 +3082,6 @@ UnicodeDecodeError_init(PyObject *self, PyObject *args, PyObject *kwds)
         return -1;
     }
 
-    PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self;
-
-    Py_CLEAR(exc->encoding);
-    Py_CLEAR(exc->object);
-    Py_CLEAR(exc->reason);
-
     PyObject *encoding = NULL, *object = NULL, *reason = NULL;  // borrowed
     Py_ssize_t start = -1, end = -1;
 
@@ -3106,25 +3096,28 @@ UnicodeDecodeError_init(PyObject *self, PyObject *args, PyObject *kwds)
         return -1;
     }
 
-    if (!PyBytes_Check(object)) {
+    if (PyBytes_Check(object)) {
+        Py_INCREF(object);  // make 'object' a strong reference
+    }
+    else {
         Py_buffer view;
         if (PyObject_GetBuffer(object, &view, PyBUF_SIMPLE) != 0) {
             return -1;
         }
-        PyObject *content = PyBytes_FromStringAndSize(view.buf, view.len);
+        // 'object' is borrowed, so we can re-use the variable
+        object = PyBytes_FromStringAndSize(view.buf, view.len);
         PyBuffer_Release(&view);
-        if (content == NULL) {
+        if (object == NULL) {
             return -1;
         }
-        Py_INCREF(object);          // make 'object' a strong reference
-        Py_SETREF(object, content);
     }
 
-    exc->encoding = Py_NewRef(encoding);
-    exc->object = Py_NewRef(object);
+    PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self;
+    Py_XSETREF(exc->encoding, Py_NewRef(encoding));
+    Py_XSETREF(exc->object, object /* object is already a strong reference */);
     exc->start = start;
     exc->end = end;
-    exc->reason = Py_NewRef(reason);
+    Py_XSETREF(exc->reason, Py_NewRef(reason));
     return 0;
 }
 
@@ -3208,11 +3201,6 @@ UnicodeTranslateError_init(PyObject *self, PyObject *args, PyObject *kwds)
         return -1;
     }
 
-    PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self;
-
-    Py_CLEAR(exc->object);
-    Py_CLEAR(exc->reason);
-
     PyObject *object = NULL, *reason = NULL;  // borrowed
     Py_ssize_t start = -1, end = -1;
 
@@ -3225,10 +3213,11 @@ UnicodeTranslateError_init(PyObject *self, PyObject *args, PyObject *kwds)
         return -1;
     }
 
-    exc->object = Py_NewRef(object);
+    PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self;
+    Py_XSETREF(exc->object, Py_NewRef(object));
     exc->start = start;
     exc->end = end;
-    exc->reason = Py_NewRef(reason);
+    Py_XSETREF(exc->reason, Py_NewRef(reason));
     return 0;
 }
 

From 7c10769eb1a16d54f41bb63ca626bae7aebf4bc3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Fri, 30 Aug 2024 13:30:20 +0200
Subject: [PATCH 17/29] handle negative 'start' and 'end' values

---
 Include/cpython/pyerrors.h |   2 +-
 Objects/exceptions.c       | 273 ++++++++++++++++++++-----------------
 2 files changed, 150 insertions(+), 125 deletions(-)

diff --git a/Include/cpython/pyerrors.h b/Include/cpython/pyerrors.h
index 7f15ef3ad08f25..b36b4681f5dddb 100644
--- a/Include/cpython/pyerrors.h
+++ b/Include/cpython/pyerrors.h
@@ -44,7 +44,7 @@ typedef struct {
     PyException_HEAD
     PyObject *encoding;
     PyObject *object;
-    Py_ssize_t start;  // must be >= 0
+    Py_ssize_t start;
     Py_ssize_t end;
     PyObject *reason;
 } PyUnicodeErrorObject;
diff --git a/Objects/exceptions.c b/Objects/exceptions.c
index 335fc428f5be3f..8fb4c6b61ffd78 100644
--- a/Objects/exceptions.c
+++ b/Objects/exceptions.c
@@ -2708,6 +2708,68 @@ set_unicodefromstring(PyObject **attr, const char *value)
     return 0;
 }
 
+/*
+ * Ajust the (inclusive) 'start' value of a UnicodeError object.
+ *
+ * The START can be negative or not, but when adjusting the value,
+ * we clip it in [0, MAX(0, OBJLEN - 1)] but do not intepret it as
+ * a relative offset.
+ */
+#define UNICODE_ERROR_ADJUST_START(START, OBJLEN)   \
+    do {                                            \
+        assert(OBJLEN >= 0);                        \
+        if (START < 0) {                            \
+            START = 0;                              \
+        }                                           \
+        if (START >= OBJLEN) {                      \
+            START = OBJLEN == 0 ? 0 : OBJLEN - 1;   \
+        }                                           \
+    } while (0)
+
+/*
+ * Ajust the (eclusive) 'end' value of a UnicodeError object.
+ *
+ * The END can be negative or not, but when adjusting the value,
+ * we clip it in [MIN(1, OBJLEN), MAX(MIN(1, OBJLEN), OBJLEN)] but
+ * do not intepret it as a relative offset.
+ */
+#define UNICODE_ERROR_ADJUST_END(END, OBJLEN)   \
+    do {                                        \
+        assert(OBJLEN >= 0);                    \
+        if (END < 1) {                          \
+            END = 1;                            \
+        }                                       \
+        if (END > OBJLEN) {                     \
+            END = OBJLEN;                       \
+        }                                       \
+    } while (0)
+
+static inline int
+unicode_error_is_single_bad_char(PyUnicodeErrorObject *exc)
+{
+    // We use the 'start' and the 'end' values here and NOT those given
+    // by the corresponding getters since they clip the output. This is
+    // done to keep a behaviour since Python 3.1 (see gh-51558).
+    Py_ssize_t start = exc->start, end = exc->end;
+    return (
+        start >= 0 && start < PyUnicode_GET_LENGTH(exc->object) &&
+        end >= 0 && end == start + 1
+    );
+}
+
+static inline int
+unicode_error_is_single_bad_byte(PyUnicodeErrorObject *exc)
+{
+    // We use the 'start' and the 'end' values here and NOT those given
+    // by the corresponding getters since they clip the output. This is
+    // done to keep a behaviour since Python 3.1 (see gh-51558).
+    Py_ssize_t start = exc->start, end = exc->end;
+    return (
+        start >= 0 && start < PyBytes_GET_SIZE(exc->object) &&
+        end >= 0 && end == start + 1
+    );
+}
+
 PyObject *
 PyUnicodeEncodeError_GetEncoding(PyObject *exc)
 {
@@ -2739,38 +2801,33 @@ PyUnicodeTranslateError_GetObject(PyObject *exc)
 }
 
 int
-PyUnicodeEncodeError_GetStart(PyObject *exc, Py_ssize_t *start)
+PyUnicodeEncodeError_GetStart(PyObject *self, Py_ssize_t *start)
 {
-    PyObject *obj = get_unicode(((PyUnicodeErrorObject *)exc)->object,
-                                "object");
-    if (!obj) {
+    PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self;
+    PyObject *obj = get_unicode(exc->object, "object");
+    if (obj == NULL) {
         return -1;
     }
     Py_ssize_t size = PyUnicode_GET_LENGTH(obj);
-    *start = ((PyUnicodeErrorObject *)exc)->start;
-    assert(*start >= 0);
-    if (*start >= size) {
-        *start = size ? size - 1 : 0;
-    }
     Py_DECREF(obj);
+    *start = exc->start;
+    UNICODE_ERROR_ADJUST_START(*start, size);
     return 0;
 }
 
 
 int
-PyUnicodeDecodeError_GetStart(PyObject *exc, Py_ssize_t *start)
+PyUnicodeDecodeError_GetStart(PyObject *self, Py_ssize_t *start)
 {
-    PyObject *obj = get_string(((PyUnicodeErrorObject *)exc)->object, "object");
-    if (!obj) {
+    PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self;
+    PyObject *obj = get_string(exc->object, "object");
+    if (obj == NULL) {
         return -1;
     }
     Py_ssize_t size = PyBytes_GET_SIZE(obj);
-    *start = ((PyUnicodeErrorObject *)exc)->start;
-    assert(*start >= 0);
-    if (*start >= size) {
-        *start = size ? size - 1 : 0;
-    }
     Py_DECREF(obj);
+    *start = exc->start;
+    UNICODE_ERROR_ADJUST_START(*start, size);
     return 0;
 }
 
@@ -2782,75 +2839,63 @@ PyUnicodeTranslateError_GetStart(PyObject *exc, Py_ssize_t *start)
 }
 
 
+static inline int
+unicode_error_set_start_impl(PyObject *self, Py_ssize_t start)
+{
+    ((PyUnicodeErrorObject *)self)->start = start;
+    return 0;
+}
+
+
 int
 PyUnicodeEncodeError_SetStart(PyObject *exc, Py_ssize_t start)
 {
-    if (start < 0) {
-        PyErr_SetString(PyExc_ValueError, "'start' must be >= 0");
-        return -1;
-    }
-    ((PyUnicodeErrorObject *)exc)->start = start;
-    return 0;
+    return unicode_error_set_start_impl(exc, start);
 }
 
 
 int
 PyUnicodeDecodeError_SetStart(PyObject *exc, Py_ssize_t start)
 {
-    if (start < 0) {
-        PyErr_SetString(PyExc_ValueError, "'start' must be >= 0");
-        return -1;
-    }
-    ((PyUnicodeErrorObject *)exc)->start = start;
-    return 0;
+    return unicode_error_set_start_impl(exc, start);
 }
 
 
 int
 PyUnicodeTranslateError_SetStart(PyObject *exc, Py_ssize_t start)
 {
-    if (start < 0) {
-        PyErr_SetString(PyExc_ValueError, "'start' must be >= 0");
-        return -1;
-    }
-    ((PyUnicodeErrorObject *)exc)->start = start;
-    return 0;
+    return unicode_error_set_start_impl(exc, start);
 }
 
 
 int
-PyUnicodeEncodeError_GetEnd(PyObject *exc, Py_ssize_t *end)
+PyUnicodeEncodeError_GetEnd(PyObject *self, Py_ssize_t *end)
 {
-    Py_ssize_t size;
-    PyObject *obj = get_unicode(((PyUnicodeErrorObject *)exc)->object,
-                                "object");
-    if (!obj)
+    PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self;
+    PyObject *obj = get_unicode(exc->object, "object");
+    if (obj == NULL) {
         return -1;
-    *end = ((PyUnicodeErrorObject *)exc)->end;
-    size = PyUnicode_GET_LENGTH(obj);
-    if (*end<1)
-        *end = 1;
-    if (*end>size)
-        *end = size;
+    }
+    Py_ssize_t size = PyUnicode_GET_LENGTH(obj);
     Py_DECREF(obj);
+    *end = exc->end;
+    UNICODE_ERROR_ADJUST_END(*end, size);
     return 0;
 }
 
 
 int
-PyUnicodeDecodeError_GetEnd(PyObject *exc, Py_ssize_t *end)
+PyUnicodeDecodeError_GetEnd(PyObject *self, Py_ssize_t *end)
 {
-    Py_ssize_t size;
-    PyObject *obj = get_string(((PyUnicodeErrorObject *)exc)->object, "object");
-    if (!obj)
+    PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self;
+    PyObject *obj = get_string(exc->object, "object");
+    if (obj == NULL) {
         return -1;
-    size = PyBytes_GET_SIZE(obj);
-    *end = ((PyUnicodeErrorObject *)exc)->end;
-    if (*end<1)
-        *end = 1;
-    if (*end>size)
-        *end = size;
+    }
+    Py_ssize_t size = PyBytes_GET_SIZE(obj);
     Py_DECREF(obj);
+    *end = exc->end;
+    UNICODE_ERROR_ADJUST_END(*end, size);
     return 0;
 }
 
@@ -2862,27 +2907,32 @@ PyUnicodeTranslateError_GetEnd(PyObject *exc, Py_ssize_t *end)
 }
 
 
-int
-PyUnicodeEncodeError_SetEnd(PyObject *exc, Py_ssize_t end)
+static inline int
+unicode_error_set_end_impl(PyObject *exc, Py_ssize_t end)
 {
     ((PyUnicodeErrorObject *)exc)->end = end;
     return 0;
 }
 
 
+int
+PyUnicodeEncodeError_SetEnd(PyObject *exc, Py_ssize_t end)
+{
+    return unicode_error_set_end_impl(exc, end);
+}
+
+
 int
 PyUnicodeDecodeError_SetEnd(PyObject *exc, Py_ssize_t end)
 {
-    ((PyUnicodeErrorObject *)exc)->end = end;
-    return 0;
+    return unicode_error_set_end_impl(exc, end);
 }
 
 
 int
 PyUnicodeTranslateError_SetEnd(PyObject *exc, Py_ssize_t end)
 {
-    ((PyUnicodeErrorObject *)exc)->end = end;
-    return 0;
+    return unicode_error_set_end_impl(exc, end);
 }
 
 PyObject *
@@ -2991,11 +3041,6 @@ UnicodeEncodeError_init(PyObject *self, PyObject *args, PyObject *kwds)
         return -1;
     }
 
-    if (start < 0) {
-        PyErr_SetString(PyExc_ValueError, "'start' must be >= 0");
-        return -1;
-    }
-
     PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self;
     Py_XSETREF(exc->encoding, Py_NewRef(encoding));
     Py_XSETREF(exc->object, Py_NewRef(object));
@@ -3013,42 +3058,40 @@ UnicodeEncodeError_str(PyObject *self)
     PyObject *reason_str = NULL;
     PyObject *encoding_str = NULL;
 
-    if (!uself->object)
+    if (!uself->object) {
         /* Not properly initialized. */
         return PyUnicode_FromString("");
+    }
 
     /* Get reason and encoding as strings, which they might not be if
        they've been modified after we were constructed. */
     reason_str = PyObject_Str(uself->reason);
-    if (reason_str == NULL)
+    if (reason_str == NULL) {
         goto done;
+    }
     encoding_str = PyObject_Str(uself->encoding);
-    if (encoding_str == NULL)
+    if (encoding_str == NULL) {
         goto done;
-
-    if (uself->start < PyUnicode_GET_LENGTH(uself->object) && uself->end == uself->start+1) {
+    }
+    if (unicode_error_is_single_bad_char(uself)) {
         Py_UCS4 badchar = PyUnicode_ReadChar(uself->object, uself->start);
         const char *fmt;
-        if (badchar <= 0xff)
+        if (badchar <= 0xff) {
             fmt = "'%U' codec can't encode character '\\x%02x' in position %zd: %U";
-        else if (badchar <= 0xffff)
+        }
+        else if (badchar <= 0xffff) {
             fmt = "'%U' codec can't encode character '\\u%04x' in position %zd: %U";
-        else
+        }
+        else {
             fmt = "'%U' codec can't encode character '\\U%08x' in position %zd: %U";
+        }
         result = PyUnicode_FromFormat(
-            fmt,
-            encoding_str,
-            (int)badchar,
-            uself->start,
-            reason_str);
+            fmt, encoding_str, (int)badchar, uself->start, reason_str);
     }
     else {
         result = PyUnicode_FromFormat(
             "'%U' codec can't encode characters in position %zd-%zd: %U",
-            encoding_str,
-            uself->start,
-            uself->end-1,
-            reason_str);
+            encoding_str, uself->start, uself->end - 1, reason_str);
     }
 done:
     Py_XDECREF(reason_str);
@@ -3091,11 +3134,6 @@ UnicodeDecodeError_init(PyObject *self, PyObject *args, PyObject *kwds)
         return -1;
     }
 
-    if (start < 0) {
-        PyErr_SetString(PyExc_ValueError, "'start' must be >= 0");
-        return -1;
-    }
-
     if (PyBytes_Check(object)) {
         Py_INCREF(object);  // make 'object' a strong reference
     }
@@ -3114,7 +3152,7 @@ UnicodeDecodeError_init(PyObject *self, PyObject *args, PyObject *kwds)
 
     PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self;
     Py_XSETREF(exc->encoding, Py_NewRef(encoding));
-    Py_XSETREF(exc->object, object /* object is already a strong reference */);
+    Py_XSETREF(exc->object, object /* already a strong reference */);
     exc->start = start;
     exc->end = end;
     Py_XSETREF(exc->reason, Py_NewRef(reason));
@@ -3129,36 +3167,32 @@ UnicodeDecodeError_str(PyObject *self)
     PyObject *reason_str = NULL;
     PyObject *encoding_str = NULL;
 
-    if (!uself->object)
+    if (!uself->object) {
         /* Not properly initialized. */
         return PyUnicode_FromString("");
+    }
 
     /* Get reason and encoding as strings, which they might not be if
        they've been modified after we were constructed. */
     reason_str = PyObject_Str(uself->reason);
-    if (reason_str == NULL)
+    if (reason_str == NULL) {
         goto done;
+    }
     encoding_str = PyObject_Str(uself->encoding);
-    if (encoding_str == NULL)
+    if (encoding_str == NULL) {
         goto done;
+    }
 
-    if (uself->start < PyBytes_GET_SIZE(uself->object) && uself->end == uself->start+1) {
-        int byte = (int)(PyBytes_AS_STRING(((PyUnicodeErrorObject *)self)->object)[uself->start]&0xff);
+    if (unicode_error_is_single_bad_byte(uself)) {
+        int byte = (int)(PyBytes_AS_STRING(uself->object)[uself->start] & 0xff);
         result = PyUnicode_FromFormat(
             "'%U' codec can't decode byte 0x%02x in position %zd: %U",
-            encoding_str,
-            byte,
-            uself->start,
-            reason_str);
+            encoding_str, byte, uself->start, reason_str);
     }
     else {
         result = PyUnicode_FromFormat(
             "'%U' codec can't decode bytes in position %zd-%zd: %U",
-            encoding_str,
-            uself->start,
-            uself->end-1,
-            reason_str
-            );
+            encoding_str, uself->start, uself->end - 1, reason_str);
     }
 done:
     Py_XDECREF(reason_str);
@@ -3208,11 +3242,6 @@ UnicodeTranslateError_init(PyObject *self, PyObject *args, PyObject *kwds)
         return -1;
     }
 
-    if (start < 0) {
-        PyErr_SetString(PyExc_ValueError, "'start' must be >= 0");
-        return -1;
-    }
-
     PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self;
     Py_XSETREF(exc->object, Py_NewRef(object));
     exc->start = start;
@@ -3239,28 +3268,24 @@ UnicodeTranslateError_str(PyObject *self)
     if (reason_str == NULL)
         goto done;
 
-    if (uself->start < PyUnicode_GET_LENGTH(uself->object) && uself->end == uself->start+1) {
+    if (unicode_error_is_single_bad_char(uself)) {
         Py_UCS4 badchar = PyUnicode_ReadChar(uself->object, uself->start);
         const char *fmt;
-        if (badchar <= 0xff)
+        if (badchar <= 0xff) {
             fmt = "can't translate character '\\x%02x' in position %zd: %U";
-        else if (badchar <= 0xffff)
+        }
+        else if (badchar <= 0xffff) {
             fmt = "can't translate character '\\u%04x' in position %zd: %U";
-        else
+        }
+        else {
             fmt = "can't translate character '\\U%08x' in position %zd: %U";
-        result = PyUnicode_FromFormat(
-            fmt,
-            (int)badchar,
-            uself->start,
-            reason_str
-        );
-    } else {
+        }
+        result = PyUnicode_FromFormat(fmt, (int)badchar, uself->start, reason_str);
+    }
+    else {
         result = PyUnicode_FromFormat(
             "can't translate characters in position %zd-%zd: %U",
-            uself->start,
-            uself->end-1,
-            reason_str
-            );
+            uself->start, uself->end - 1, reason_str);
     }
 done:
     Py_XDECREF(reason_str);

From 7ce2ef09e30658d71281efaefedfbdd409ae0c40 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Fri, 30 Aug 2024 13:30:39 +0200
Subject: [PATCH 18/29] add C API tests

---
 Modules/_testcapi/exceptions.c | 131 +++++++++++++++++++++++++++++++++
 1 file changed, 131 insertions(+)

diff --git a/Modules/_testcapi/exceptions.c b/Modules/_testcapi/exceptions.c
index 47516881777ce9..e92d9670e7c792 100644
--- a/Modules/_testcapi/exceptions.c
+++ b/Modules/_testcapi/exceptions.c
@@ -392,6 +392,128 @@ unicode_translate_get_start(PyObject *Py_UNUSED(module), PyObject *arg)
     RETURN_SIZE(start);
 }
 
+/* Test PyUnicodeEncodeError_SetStart */
+static PyObject *
+unicode_encode_set_start(PyObject *Py_UNUSED(module), PyObject *args)
+{
+    PyObject *exc;
+    Py_ssize_t start;
+    if (PyArg_ParseTuple(args, "On", &exc, &start) < 0) {
+        return NULL;
+    }
+    if (PyUnicodeEncodeError_SetStart(exc, start) < 0) {
+        return NULL;
+    }
+    Py_RETURN_NONE;
+}
+
+/* Test PyUnicodeDecodeError_SetStart */
+static PyObject *
+unicode_decode_set_start(PyObject *Py_UNUSED(module), PyObject *args)
+{
+    PyObject *exc;
+    Py_ssize_t start;
+    if (PyArg_ParseTuple(args, "On", &exc, &start) < 0) {
+        return NULL;
+    }
+    if (PyUnicodeDecodeError_SetStart(exc, start) < 0) {
+        return NULL;
+    }
+    Py_RETURN_NONE;
+}
+
+/* Test PyUnicodeTranslateError_SetStart */
+static PyObject *
+unicode_translate_set_start(PyObject *Py_UNUSED(module), PyObject *args)
+{
+    PyObject *exc;
+    Py_ssize_t start;
+    if (PyArg_ParseTuple(args, "On", &exc, &start) < 0) {
+        return NULL;
+    }
+    if (PyUnicodeTranslateError_SetStart(exc, start) < 0) {
+        return NULL;
+    }
+    Py_RETURN_NONE;
+}
+
+/* Test PyUnicodeEncodeError_GetEnd */
+static PyObject *
+unicode_encode_get_end(PyObject *Py_UNUSED(module), PyObject *arg)
+{
+    Py_ssize_t end;
+    if (PyUnicodeEncodeError_GetEnd(arg, &end) < 0) {
+        return NULL;
+    }
+    RETURN_SIZE(end);
+}
+
+/* Test PyUnicodeDecodeError_GetEnd */
+static PyObject *
+unicode_decode_get_end(PyObject *Py_UNUSED(module), PyObject *arg)
+{
+    Py_ssize_t end;
+    if (PyUnicodeDecodeError_GetEnd(arg, &end) < 0) {
+        return NULL;
+    }
+    RETURN_SIZE(end);
+}
+
+/* Test PyUnicodeTranslateError_GetEnd */
+static PyObject *
+unicode_translate_get_end(PyObject *Py_UNUSED(module), PyObject *arg)
+{
+    Py_ssize_t end;
+    if (PyUnicodeTranslateError_GetEnd(arg, &end) < 0) {
+        return NULL;
+    }
+    RETURN_SIZE(end);
+}
+
+/* Test PyUnicodeEncodeError_SetEnd */
+static PyObject *
+unicode_encode_set_end(PyObject *Py_UNUSED(module), PyObject *args)
+{
+    PyObject *exc;
+    Py_ssize_t end;
+    if (PyArg_ParseTuple(args, "On", &exc, &end) < 0) {
+        return NULL;
+    }
+    if (PyUnicodeEncodeError_SetEnd(exc, end) < 0) {
+        return NULL;
+    }
+    Py_RETURN_NONE;
+}
+
+/* Test PyUnicodeDecodeError_SetEnd */
+static PyObject *
+unicode_decode_set_end(PyObject *Py_UNUSED(module), PyObject *args)
+{
+    PyObject *exc;
+    Py_ssize_t end;
+    if (PyArg_ParseTuple(args, "On", &exc, &end) < 0) {
+        return NULL;
+    }
+    if (PyUnicodeDecodeError_SetEnd(exc, end) < 0) {
+        return NULL;
+    }
+    Py_RETURN_NONE;
+}
+
+/* Test PyUnicodeTranslateError_SetEnd */
+static PyObject *
+unicode_translate_set_end(PyObject *Py_UNUSED(module), PyObject *args)
+{
+    PyObject *exc;
+    Py_ssize_t end;
+    if (PyArg_ParseTuple(args, "On", &exc, &end) < 0) {
+        return NULL;
+    }
+    if (PyUnicodeTranslateError_SetEnd(exc, end) < 0) {
+        return NULL;
+    }
+    Py_RETURN_NONE;
+}
 
 /*
  * Define the PyRecurdingInfinitelyError_Type
@@ -439,6 +561,15 @@ static PyMethodDef test_methods[] = {
     {"unicode_encode_get_start", unicode_encode_get_start,       METH_O},
     {"unicode_decode_get_start", unicode_decode_get_start,       METH_O},
     {"unicode_translate_get_start", unicode_translate_get_start, METH_O},
+    {"unicode_encode_set_start", unicode_encode_set_start,       METH_VARARGS},
+    {"unicode_decode_set_start", unicode_decode_set_start,       METH_VARARGS},
+    {"unicode_translate_set_start", unicode_translate_set_start, METH_VARARGS},
+    {"unicode_encode_get_end", unicode_encode_get_end,           METH_O},
+    {"unicode_decode_get_end", unicode_decode_get_end,           METH_O},
+    {"unicode_translate_get_end", unicode_translate_get_end,     METH_O},
+    {"unicode_encode_set_end", unicode_encode_set_end,           METH_VARARGS},
+    {"unicode_decode_set_end", unicode_decode_set_end,           METH_VARARGS},
+    {"unicode_translate_set_end", unicode_translate_set_end,     METH_VARARGS},
     {NULL},
 };
 

From b55ca5afcf4f33ea19e3134ed70123f28afecbad Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Fri, 30 Aug 2024 13:30:44 +0200
Subject: [PATCH 19/29] add Python tests

---
 Lib/test/test_capi/test_exceptions.py | 129 ++++++++++++++++++++++----
 1 file changed, 112 insertions(+), 17 deletions(-)

diff --git a/Lib/test/test_capi/test_exceptions.py b/Lib/test/test_capi/test_exceptions.py
index bf917092ecdc13..666e2f2ab09548 100644
--- a/Lib/test/test_capi/test_exceptions.py
+++ b/Lib/test/test_capi/test_exceptions.py
@@ -5,7 +5,6 @@
 import unittest
 import textwrap
 
-from itertools import product
 from test import support
 from test.support import import_helper
 from test.support.os_helper import TESTFN, TESTFN_UNDECODABLE
@@ -426,17 +425,9 @@ def __init__(self, encoding, *args, **kwargs):
 
 class TestUnicodeError(unittest.TestCase):
 
-    def test_unicode_error_init_start_value(self):
-        # negative start is not allowed
-        for (exc_type, literal), obj_len, start in product([
-            (UnicodeEncodeError, 'x'),
-            (UnicodeDecodeError, b'x'),
-            (TestUnicodeTranslateError, 'x'),
-        ], range(3), [-2, -1]):
-            obj = literal * obj_len
-            for end in [0, start + 1]:
-                with self.subTest(start=start, end=end, exc_type=exc_type, obj_len=obj_len):
-                    self.assertRaises(ValueError, exc_type, 'utf-8', obj, start, end, 'reason')
+    def _check_no_crash(self, exc):
+        # ensure that the __str__() method does not crash
+        _ = str(exc)
 
     def test_unicode_encode_error_get_start(self):
         get_start = _testcapi.unicode_encode_get_start
@@ -451,7 +442,7 @@ def test_unicode_translate_error_get_start(self):
         self._test_unicode_error_get_start('x', TestUnicodeTranslateError, get_start)
 
     def _test_unicode_error_get_start(self, literal, exc_type, get_start):
-        for obj_len, py_start, c_start in [
+        for obj_len, start, c_start in [
             # normal cases
             (5, 0, 0),
             (5, 1, 1),
@@ -462,13 +453,117 @@ def _test_unicode_error_get_start(self, literal, exc_type, get_start):
             (0, 10, 0),
             (5, 5, 4),
             (5, 10, 4),
+            # negative values are allowed but clipped in the getter
+            (0, -1, 0),
+            (1, -1, 0),
+            (2, -1, 0),
+            (2, -2, 0),
         ]:
             obj = literal * obj_len
-            py_end = py_start + 1
-
-            with self.subTest(obj, exc_type=exc_type, py_start=py_start, c_start=c_start):
-                exc = exc_type('utf-8', obj, py_start, py_end, 'reason')
+            with self.subTest(obj, exc_type=exc_type, start=start):
+                exc = exc_type('utf-8', obj, start, obj_len, 'reason')
                 self.assertEqual(get_start(exc), c_start)
+                self._check_no_crash(exc)
+
+    def test_unicode_encode_error_set_start(self):
+        set_start = _testcapi.unicode_encode_set_start
+        self._test_unicode_error_set_start('x', UnicodeEncodeError, set_start)
+
+    def test_unicode_decode_error_set_start(self):
+        set_start = _testcapi.unicode_decode_set_start
+        self._test_unicode_error_set_start(b'x', UnicodeDecodeError, set_start)
+
+    def test_unicode_translate_error_set_start(self):
+        set_start = _testcapi.unicode_translate_set_start
+        self._test_unicode_error_set_start('x', TestUnicodeTranslateError, set_start)
+
+    def _test_unicode_error_set_start(self, literal, exc_type, set_start):
+        obj_len = 5
+        obj = literal * obj_len
+        for new_start in range(-2 * obj_len, 2 * obj_len):
+            with self.subTest('C-API', obj=obj, exc_type=exc_type, new_start=new_start):
+                exc = exc_type('utf-8', obj, 0, obj_len, 'reason')
+                # arbitrary value is allowed in the C API setter
+                set_start(exc, new_start)
+                self.assertEqual(exc.start, new_start)
+                self._check_no_crash(exc)
+
+            with self.subTest('Py-API', obj=obj, exc_type=exc_type, new_start=new_start):
+                exc = exc_type('utf-8', obj, 0, obj_len, 'reason')
+                # arbitrary value is allowed in the attribute setter
+                exc.start = new_start
+                self.assertEqual(exc.start, new_start)
+                self._check_no_crash(exc)
+
+    def test_unicode_encode_error_get_end(self):
+        get_end = _testcapi.unicode_encode_get_end
+        self._test_unicode_error_get_end('x', UnicodeEncodeError, get_end)
+
+    def test_unicode_decode_error_get_end(self):
+        get_end = _testcapi.unicode_decode_get_end
+        self._test_unicode_error_get_end(b'x', UnicodeDecodeError, get_end)
+
+    def test_unicode_translate_error_get_end(self):
+        get_end = _testcapi.unicode_translate_get_end
+        self._test_unicode_error_get_end('x', TestUnicodeTranslateError, get_end)
+
+    def _test_unicode_error_get_end(self, literal, exc_type, get_end):
+        for obj_len, end, c_end in [
+            # normal cases
+            (5, 0, 1),
+            (5, 1, 1),
+            (5, 2, 2),
+            # out-of-range clipped in [MIN(1, OBJLEN), MAX(MIN(1, OBJLEN), OBJLEN)]
+            (0, 0, 0),
+            (0, 1, 0),
+            (0, 10, 0),
+            (1, 1, 1),
+            (1, 2, 1),
+            (5, 5, 5),
+            (5, 5, 5),
+            (5, 10, 5),
+            # negative values are allowed but clipped in the getter
+            (0, -1, 0),
+            (1, -1, 1),
+            (2, -1, 1),
+            (2, -2, 1),
+        ]:
+            obj = literal * obj_len
+            with self.subTest(obj, exc_type=exc_type, end=end):
+                exc = exc_type('utf-8', obj, 0, end, 'reason')
+                self.assertEqual(get_end(exc), c_end)
+                self._check_no_crash(exc)
+
+    def test_unicode_encode_error_set_end(self):
+        set_end = _testcapi.unicode_encode_set_end
+        self._test_unicode_error_set_end('x', UnicodeEncodeError, set_end)
+
+    def test_unicode_decode_error_set_end(self):
+        set_end = _testcapi.unicode_decode_set_end
+        self._test_unicode_error_set_end(b'x', UnicodeDecodeError, set_end)
+
+    def test_unicode_translate_error_set_end(self):
+        set_end = _testcapi.unicode_translate_set_end
+        self._test_unicode_error_set_end('x', TestUnicodeTranslateError, set_end)
+
+    def _test_unicode_error_set_end(self, literal, exc_type, set_end):
+        obj_len = 5
+        obj = literal * obj_len
+        for new_end in range(-2 * obj_len, 2 * obj_len):
+            with self.subTest('C-API', obj=obj, exc_type=exc_type, new_end=new_end):
+                exc = exc_type('utf-8', obj, 0, obj_len, 'reason')
+                # arbitrary value is allowed in the C API setter
+                set_end(exc, new_end)
+                self.assertEqual(exc.end, new_end)
+                self._check_no_crash(exc)
+
+            with self.subTest('Py-API', obj=obj, exc_type=exc_type, new_end=new_end):
+                exc = exc_type('utf-8', obj, 0, obj_len, 'reason')
+                # arbitrary value is allowed in the attribute setter
+                exc.end = new_end
+                self.assertEqual(exc.end, new_end)
+                self._check_no_crash(exc)
+
 
 class Test_PyUnstable_Exc_PrepReraiseStar(ExceptionIsLikeMixin, unittest.TestCase):
 

From 4e34e5fd13fb458f3835221e0e871a65e87663e0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Fri, 30 Aug 2024 13:52:35 +0200
Subject: [PATCH 20/29] update docs

---
 Doc/c-api/exceptions.rst   | 20 ++++++++++++++++++--
 Doc/library/exceptions.rst |  9 +++++++--
 2 files changed, 25 insertions(+), 4 deletions(-)

diff --git a/Doc/c-api/exceptions.rst b/Doc/c-api/exceptions.rst
index 8756b76e0a40ae..37b5c4ab8bd88d 100644
--- a/Doc/c-api/exceptions.rst
+++ b/Doc/c-api/exceptions.rst
@@ -853,12 +853,23 @@ The following functions are used to create and modify Unicode exceptions from C.
    *\*start*.  *start* must not be ``NULL``.  Return ``0`` on success, ``-1`` on
    failure.
 
+   If the :attr:`UnicodeError.object` is an empty sequence, the resulting
+   *start* is ``0``. Otherwise, it is clipped to ``[0, len(object) - 1]``.
+
+   .. seealso:: :attr:`UnicodeError.start`
+
 .. c:function:: int PyUnicodeDecodeError_SetStart(PyObject *exc, Py_ssize_t start)
                 int PyUnicodeEncodeError_SetStart(PyObject *exc, Py_ssize_t start)
                 int PyUnicodeTranslateError_SetStart(PyObject *exc, Py_ssize_t start)
 
-   Set the *start* attribute of the given exception object to *start*.  *start*
-   must be non-negative. Return ``0`` on success, ``-1`` on failure.
+   Set the *start* attribute of the given exception object to *start*.
+   Return ``0`` on success, ``-1`` on failure.
+
+   .. note::
+
+      While passing a negative *start* does not raise an exception,
+      the corresponding getters will not consider it as a relative
+      offset.
 
 .. c:function:: int PyUnicodeDecodeError_GetEnd(PyObject *exc, Py_ssize_t *end)
                 int PyUnicodeEncodeError_GetEnd(PyObject *exc, Py_ssize_t *end)
@@ -868,6 +879,9 @@ The following functions are used to create and modify Unicode exceptions from C.
    *\*end*.  *end* must not be ``NULL``.  Return ``0`` on success, ``-1`` on
    failure.
 
+   If the :attr:`UnicodeError.object` is an empty sequence, the resulting
+   *end* is ``0``. Otherwise, it is clipped to ``[1, len(object)]``.
+
 .. c:function:: int PyUnicodeDecodeError_SetEnd(PyObject *exc, Py_ssize_t end)
                 int PyUnicodeEncodeError_SetEnd(PyObject *exc, Py_ssize_t end)
                 int PyUnicodeTranslateError_SetEnd(PyObject *exc, Py_ssize_t end)
@@ -875,6 +889,8 @@ The following functions are used to create and modify Unicode exceptions from C.
    Set the *end* attribute of the given exception object to *end*.  Return ``0``
    on success, ``-1`` on failure.
 
+   .. seealso:: :attr:`UnicodeError.end`
+
 .. c:function:: PyObject* PyUnicodeDecodeError_GetReason(PyObject *exc)
                 PyObject* PyUnicodeEncodeError_GetReason(PyObject *exc)
                 PyObject* PyUnicodeTranslateError_GetReason(PyObject *exc)
diff --git a/Doc/library/exceptions.rst b/Doc/library/exceptions.rst
index 55325ac9c1e11a..f72b11e34c5c3d 100644
--- a/Doc/library/exceptions.rst
+++ b/Doc/library/exceptions.rst
@@ -642,13 +642,18 @@ The following exceptions are the exceptions that are usually raised.
 
    .. attribute:: start
 
-       The first index of invalid data in :attr:`object`. This value
-       must be non-negative.
+       The first index of invalid data in :attr:`object`.
+
+       This value should not be negative as it is interpreted as an
+       absolute offset but this constraint is not enforced at runtime.
 
    .. attribute:: end
 
        The index after the last invalid data in :attr:`object`.
 
+       This value should not be negative as it is interpreted as an
+       absolute offset but this constraint is not enforced at runtime.
+
 
 .. exception:: UnicodeEncodeError
 

From 033a1ac112949a97f2e635af33e679f78dd81f70 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Fri, 30 Aug 2024 13:53:13 +0200
Subject: [PATCH 21/29] fix typo

---
 Objects/exceptions.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Objects/exceptions.c b/Objects/exceptions.c
index 8fb4c6b61ffd78..0cb48e23cbae02 100644
--- a/Objects/exceptions.c
+++ b/Objects/exceptions.c
@@ -2709,7 +2709,7 @@ set_unicodefromstring(PyObject **attr, const char *value)
 }
 
 /*
- * Ajust the (inclusive) 'start' value of a UnicodeError object.
+ * Adjust the (inclusive) 'start' value of a UnicodeError object.
  *
  * The START can be negative or not, but when adjusting the value,
  * we clip it in [0, MAX(0, OBJLEN - 1)] but do not intepret it as
@@ -2727,7 +2727,7 @@ set_unicodefromstring(PyObject **attr, const char *value)
     } while (0)
 
 /*
- * Ajust the (eclusive) 'end' value of a UnicodeError object.
+ * Adjust the (eclusive) 'end' value of a UnicodeError object.
  *
  * The END can be negative or not, but when adjusting the value,
  * we clip it in [MIN(1, OBJLEN), MAX(MIN(1, OBJLEN), OBJLEN)] but

From c802e64eec49be8bfad618d8b16c7ec40c66150d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Fri, 13 Sep 2024 13:19:19 +0200
Subject: [PATCH 22/29] convert macros into `static inline` functions

---
 Objects/exceptions.c | 56 ++++++++++++++++++++++----------------------
 1 file changed, 28 insertions(+), 28 deletions(-)

diff --git a/Objects/exceptions.c b/Objects/exceptions.c
index 0cb48e23cbae02..18ad3ec60d9b4d 100644
--- a/Objects/exceptions.c
+++ b/Objects/exceptions.c
@@ -2715,16 +2715,18 @@ set_unicodefromstring(PyObject **attr, const char *value)
  * we clip it in [0, MAX(0, OBJLEN - 1)] but do not intepret it as
  * a relative offset.
  */
-#define UNICODE_ERROR_ADJUST_START(START, OBJLEN)   \
-    do {                                            \
-        assert(OBJLEN >= 0);                        \
-        if (START < 0) {                            \
-            START = 0;                              \
-        }                                           \
-        if (START >= OBJLEN) {                      \
-            START = OBJLEN == 0 ? 0 : OBJLEN - 1;   \
-        }                                           \
-    } while (0)
+static inline Py_ssize_t
+unicode_error_adjust_start(Py_ssize_t start, Py_ssize_t objlen)
+{
+    assert(objlen >= 0);
+    if (start < 0) {
+        start = 0;
+    }
+    if (start >= objlen) {
+        start = objlen == 0 ? 0 : objlen - 1;
+    }
+    return start;
+}
 
 /*
  * Adjust the (eclusive) 'end' value of a UnicodeError object.
@@ -2733,16 +2735,18 @@ set_unicodefromstring(PyObject **attr, const char *value)
  * we clip it in [MIN(1, OBJLEN), MAX(MIN(1, OBJLEN), OBJLEN)] but
  * do not intepret it as a relative offset.
  */
-#define UNICODE_ERROR_ADJUST_END(END, OBJLEN)   \
-    do {                                        \
-        assert(OBJLEN >= 0);                    \
-        if (END < 1) {                          \
-            END = 1;                            \
-        }                                       \
-        if (END > OBJLEN) {                     \
-            END = OBJLEN;                       \
-        }                                       \
-    } while (0)
+static inline Py_ssize_t
+unicode_error_adjust_end(Py_ssize_t end, Py_ssize_t objlen)
+{
+    assert(objlen >= 0);
+    if (end < 1) {
+        end = 1;
+    }
+    if (end > objlen) {
+        end = objlen;
+    }
+    return end;
+}
 
 static inline int
 unicode_error_is_single_bad_char(PyUnicodeErrorObject *exc)
@@ -2810,8 +2814,7 @@ PyUnicodeEncodeError_GetStart(PyObject *self, Py_ssize_t *start)
     }
     Py_ssize_t size = PyUnicode_GET_LENGTH(obj);
     Py_DECREF(obj);
-    *start = exc->start;
-    UNICODE_ERROR_ADJUST_START(*start, size);
+    *start = unicode_error_adjust_start(exc->start, size);
     return 0;
 }
 
@@ -2826,8 +2829,7 @@ PyUnicodeDecodeError_GetStart(PyObject *self, Py_ssize_t *start)
     }
     Py_ssize_t size = PyBytes_GET_SIZE(obj);
     Py_DECREF(obj);
-    *start = exc->start;
-    UNICODE_ERROR_ADJUST_START(*start, size);
+    *start = unicode_error_adjust_start(exc->start, size);
     return 0;
 }
 
@@ -2878,8 +2880,7 @@ PyUnicodeEncodeError_GetEnd(PyObject *self, Py_ssize_t *end)
     }
     Py_ssize_t size = PyUnicode_GET_LENGTH(obj);
     Py_DECREF(obj);
-    *end = exc->end;
-    UNICODE_ERROR_ADJUST_END(*end, size);
+    *end = unicode_error_adjust_end(exc->end, size);
     return 0;
 }
 
@@ -2894,8 +2895,7 @@ PyUnicodeDecodeError_GetEnd(PyObject *self, Py_ssize_t *end)
     }
     Py_ssize_t size = PyBytes_GET_SIZE(obj);
     Py_DECREF(obj);
-    *end = exc->end;
-    UNICODE_ERROR_ADJUST_END(*end, size);
+    *end = unicode_error_adjust_end(exc->end, size);
     return 0;
 }
 

From fcde448436a38ca9ce55fcd5e395ac869262c2fb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Sun, 27 Oct 2024 09:26:23 +0100
Subject: [PATCH 23/29] post-merge cleanup

---
 Objects/exceptions.c | 26 --------------------------
 1 file changed, 26 deletions(-)

diff --git a/Objects/exceptions.c b/Objects/exceptions.c
index bc02539ea25c8d..8a2097f863e2d7 100644
--- a/Objects/exceptions.c
+++ b/Objects/exceptions.c
@@ -2748,32 +2748,6 @@ unicode_error_adjust_end(Py_ssize_t end, Py_ssize_t objlen)
     return end;
 }
 
-static inline int
-unicode_error_is_single_bad_char(PyUnicodeErrorObject *exc)
-{
-    // We use the 'start' and the 'end' values here and NOT those given
-    // by the corresponding getters since they clip the output. This is
-    // done to keep a behaviour since Python 3.1 (see gh-51558).
-    Py_ssize_t start = exc->start, end = exc->end;
-    return (
-        start >= 0 && start < PyUnicode_GET_LENGTH(exc->object) &&
-        end >= 0 && end == start + 1
-    );
-}
-
-static inline int
-unicode_error_is_single_bad_byte(PyUnicodeErrorObject *exc)
-{
-    // We use the 'start' and the 'end' values here and NOT those given
-    // by the corresponding getters since they clip the output. This is
-    // done to keep a behaviour since Python 3.1 (see gh-51558).
-    Py_ssize_t start = exc->start, end = exc->end;
-    return (
-        start >= 0 && start < PyBytes_GET_SIZE(exc->object) &&
-        end >= 0 && end == start + 1
-    );
-}
-
 PyObject *
 PyUnicodeEncodeError_GetEncoding(PyObject *exc)
 {

From baa5cb20a9eb35f3f1e9124f9aca385164148cd8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Mon, 2 Dec 2024 16:05:21 +0100
Subject: [PATCH 24/29] fix typo

---
 Objects/exceptions.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Objects/exceptions.c b/Objects/exceptions.c
index 8a2097f863e2d7..321b3c5cd29ef9 100644
--- a/Objects/exceptions.c
+++ b/Objects/exceptions.c
@@ -2729,7 +2729,7 @@ unicode_error_adjust_start(Py_ssize_t start, Py_ssize_t objlen)
 }
 
 /*
- * Adjust the (eclusive) 'end' value of a UnicodeError object.
+ * Adjust the (exclusive) 'end' value of a UnicodeError object.
  *
  * The END can be negative or not, but when adjusting the value,
  * we clip it in [MIN(1, OBJLEN), MAX(MIN(1, OBJLEN), OBJLEN)] but

From 4c4808ebcb9ab154e2fa1e793f5b831fd369de2e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Mon, 2 Dec 2024 16:09:15 +0100
Subject: [PATCH 25/29] update NEWS and docs

---
 .../C_API/2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst  | 6 +++---
 .../C_API/2024-12-02-16-10-36.gh-issue-123378.Q6YRwe.rst  | 6 ++++++
 Objects/exceptions.c                                      | 8 ++++----
 3 files changed, 13 insertions(+), 7 deletions(-)
 create mode 100644 Misc/NEWS.d/next/C_API/2024-12-02-16-10-36.gh-issue-123378.Q6YRwe.rst

diff --git a/Misc/NEWS.d/next/C_API/2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst b/Misc/NEWS.d/next/C_API/2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst
index adb4ceeb82d95d..2fbf520b25f3d5 100644
--- a/Misc/NEWS.d/next/C_API/2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst
+++ b/Misc/NEWS.d/next/C_API/2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst
@@ -1,5 +1,5 @@
-Ensure that *start* is correctly set on :exc:`UnicodeEncodeError` objects.
-A negative *start* is not allowed by :c:func:`PyUnicodeEncodeError_SetStart`
-and will not be returned by :c:func:`PyUnicodeEncodeError_GetStart`. Similar
+Ensure that the value of :attr:`UnicodeEncodeError.start` retrieved by
+:c:func:`PyUnicodeEncodeError_GetStart` lie in ``[0, max(0, objlen - 1)]``
+where *objlen* is the length of :attr:`UnicodeEncodeError.object`. Similar
 arguments apply to :exc:`UnicodeDecodeError` and :exc:`UnicodeTranslateError`
 and their corresponding C interface. Patch by Bénédikt Tran.
diff --git a/Misc/NEWS.d/next/C_API/2024-12-02-16-10-36.gh-issue-123378.Q6YRwe.rst b/Misc/NEWS.d/next/C_API/2024-12-02-16-10-36.gh-issue-123378.Q6YRwe.rst
new file mode 100644
index 00000000000000..a0d12c228ce6cf
--- /dev/null
+++ b/Misc/NEWS.d/next/C_API/2024-12-02-16-10-36.gh-issue-123378.Q6YRwe.rst
@@ -0,0 +1,6 @@
+Ensure that the value of :attr:`UnicodeEncodeError.end` retrieved by
+:c:func:`PyUnicodeEncodeError_GetEnd` lies in ``[min(1, objlen), max(min(1,
+objlen), objlen)]`` where *objlen* is the length of
+:attr:`UnicodeEncodeError.object`. Similar arguments apply to
+:exc:`UnicodeDecodeError` and :exc:`UnicodeTranslateError` and their
+corresponding C interface. Patch by Bénédikt Tran.
diff --git a/Objects/exceptions.c b/Objects/exceptions.c
index 321b3c5cd29ef9..124b591ee3a13f 100644
--- a/Objects/exceptions.c
+++ b/Objects/exceptions.c
@@ -2711,8 +2711,8 @@ set_unicodefromstring(PyObject **attr, const char *value)
 /*
  * Adjust the (inclusive) 'start' value of a UnicodeError object.
  *
- * The START can be negative or not, but when adjusting the value,
- * we clip it in [0, MAX(0, OBJLEN - 1)] but do not intepret it as
+ * The 'start' can be negative or not, but when adjusting the value,
+ * we clip it in [0, max(0, objlen - 1)] but do not intepret it as
  * a relative offset.
  */
 static inline Py_ssize_t
@@ -2731,8 +2731,8 @@ unicode_error_adjust_start(Py_ssize_t start, Py_ssize_t objlen)
 /*
  * Adjust the (exclusive) 'end' value of a UnicodeError object.
  *
- * The END can be negative or not, but when adjusting the value,
- * we clip it in [MIN(1, OBJLEN), MAX(MIN(1, OBJLEN), OBJLEN)] but
+ * The 'end' can be negative or not, but when adjusting the value,
+ * we clip it in [min(1, objlen), max(min(1, objlen), objlen)] but
  * do not intepret it as a relative offset.
  */
 static inline Py_ssize_t

From efbdff1664bb5b44f821d819e3dd152f4cb2f08c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Mon, 2 Dec 2024 16:15:16 +0100
Subject: [PATCH 26/29] add some assertion checks

---
 Objects/exceptions.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Objects/exceptions.c b/Objects/exceptions.c
index 124b591ee3a13f..de259125ec929e 100644
--- a/Objects/exceptions.c
+++ b/Objects/exceptions.c
@@ -2884,6 +2884,7 @@ PyUnicodeTranslateError_GetEnd(PyObject *exc, Py_ssize_t *end)
 static inline int
 unicode_error_set_end_impl(PyObject *exc, Py_ssize_t end)
 {
+    assert(PyObject_TypeCheck(exc, (PyTypeObject*)&PyExc_UnicodeError));
     ((PyUnicodeErrorObject *)exc)->end = end;
     return 0;
 }

From 180f3c205fd0d6bf30e2f45aa4beb27596434f76 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Mon, 2 Dec 2024 16:15:45 +0100
Subject: [PATCH 27/29] add some assertion checks

---
 Objects/exceptions.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Objects/exceptions.c b/Objects/exceptions.c
index de259125ec929e..c52c4de662a3d0 100644
--- a/Objects/exceptions.c
+++ b/Objects/exceptions.c
@@ -2818,6 +2818,7 @@ PyUnicodeTranslateError_GetStart(PyObject *exc, Py_ssize_t *start)
 static inline int
 unicode_error_set_start_impl(PyObject *self, Py_ssize_t start)
 {
+    assert(PyObject_TypeCheck(self, (PyTypeObject*)&PyExc_UnicodeError));
     ((PyUnicodeErrorObject *)self)->start = start;
     return 0;
 }

From 5759a705970bbda64a7ba1705918536b7de53948 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Tue, 3 Dec 2024 13:42:13 +0100
Subject: [PATCH 28/29] remove failing assertions for now

---
 Objects/exceptions.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/Objects/exceptions.c b/Objects/exceptions.c
index c52c4de662a3d0..124b591ee3a13f 100644
--- a/Objects/exceptions.c
+++ b/Objects/exceptions.c
@@ -2818,7 +2818,6 @@ PyUnicodeTranslateError_GetStart(PyObject *exc, Py_ssize_t *start)
 static inline int
 unicode_error_set_start_impl(PyObject *self, Py_ssize_t start)
 {
-    assert(PyObject_TypeCheck(self, (PyTypeObject*)&PyExc_UnicodeError));
     ((PyUnicodeErrorObject *)self)->start = start;
     return 0;
 }
@@ -2885,7 +2884,6 @@ PyUnicodeTranslateError_GetEnd(PyObject *exc, Py_ssize_t *end)
 static inline int
 unicode_error_set_end_impl(PyObject *exc, Py_ssize_t end)
 {
-    assert(PyObject_TypeCheck(exc, (PyTypeObject*)&PyExc_UnicodeError));
     ((PyUnicodeErrorObject *)exc)->end = end;
     return 0;
 }

From 8c1217173659d0b1d47de55871840438da5dd9bf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Tue, 3 Dec 2024 14:13:07 +0100
Subject: [PATCH 29/29] fix docs

---
 .../2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst     |  7 ++++---
 .../2024-12-02-16-10-36.gh-issue-123378.Q6YRwe.rst     | 10 +++++-----
 2 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/Misc/NEWS.d/next/C_API/2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst b/Misc/NEWS.d/next/C_API/2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst
index 2fbf520b25f3d5..2cfb8b8a1e245a 100644
--- a/Misc/NEWS.d/next/C_API/2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst
+++ b/Misc/NEWS.d/next/C_API/2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst
@@ -1,5 +1,6 @@
-Ensure that the value of :attr:`UnicodeEncodeError.start` retrieved by
-:c:func:`PyUnicodeEncodeError_GetStart` lie in ``[0, max(0, objlen - 1)]``
-where *objlen* is the length of :attr:`UnicodeEncodeError.object`. Similar
+Ensure that the value of :attr:`UnicodeEncodeError.start <UnicodeError.start>`
+retrieved by :c:func:`PyUnicodeEncodeError_GetStart` lie in
+``[0, max(0, objlen - 1)]`` where *objlen* is the length of
+:attr:`UnicodeEncodeError.object <UnicodeError.object>`. Similar
 arguments apply to :exc:`UnicodeDecodeError` and :exc:`UnicodeTranslateError`
 and their corresponding C interface. Patch by Bénédikt Tran.
diff --git a/Misc/NEWS.d/next/C_API/2024-12-02-16-10-36.gh-issue-123378.Q6YRwe.rst b/Misc/NEWS.d/next/C_API/2024-12-02-16-10-36.gh-issue-123378.Q6YRwe.rst
index a0d12c228ce6cf..107751579c4d91 100644
--- a/Misc/NEWS.d/next/C_API/2024-12-02-16-10-36.gh-issue-123378.Q6YRwe.rst
+++ b/Misc/NEWS.d/next/C_API/2024-12-02-16-10-36.gh-issue-123378.Q6YRwe.rst
@@ -1,6 +1,6 @@
-Ensure that the value of :attr:`UnicodeEncodeError.end` retrieved by
-:c:func:`PyUnicodeEncodeError_GetEnd` lies in ``[min(1, objlen), max(min(1,
-objlen), objlen)]`` where *objlen* is the length of
-:attr:`UnicodeEncodeError.object`. Similar arguments apply to
-:exc:`UnicodeDecodeError` and :exc:`UnicodeTranslateError` and their
+Ensure that the value of :attr:`UnicodeEncodeError.end <UnicodeError.end>`
+retrieved by :c:func:`PyUnicodeEncodeError_GetEnd` lies in ``[min(1, objlen),
+max(min(1, objlen), objlen)]`` where *objlen* is the length of
+:attr:`UnicodeEncodeError.object <UnicodeError.object>`. Similar arguments
+apply to :exc:`UnicodeDecodeError` and :exc:`UnicodeTranslateError` and their
 corresponding C interface. Patch by Bénédikt Tran.