From 0cc7bc7e99eacbf25019a38f5c02796617326a62 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 10 Dec 2024 12:34:04 +0100 Subject: [PATCH 01/20] Refactor helpers for `PyUnicodeErrorObject`. - Unify `get_unicode` and `get_string` in a single function. - Allow to retrieve the underlying `object` attribute and its size in one round. - Use a common implementation for the following functions: - `PyUnicode{Decode,Encode}Error_GetEncoding` - `PyUnicode{Decode,Encode,Translate}Error_GetObject` - `PyUnicode{Decode,Encode,Translate}Error_{Get,Set}Reason` - `PyUnicode{Decode,Encode,Translate}Error_{Get,Set}{Start,End}` --- Objects/exceptions.c | 292 ++++++++++++++++++++++++++----------------- 1 file changed, 179 insertions(+), 113 deletions(-) diff --git a/Objects/exceptions.c b/Objects/exceptions.c index 287cbc25305964..6c72f916e20e10 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -2667,47 +2667,105 @@ SimpleExtendsException(PyExc_Exception, ValueError, SimpleExtendsException(PyExc_ValueError, UnicodeError, "Unicode related error."); + static PyObject * -get_string(PyObject *attr, const char *name) +as_unicode_error_attribute(PyObject *attr, const char *name, int as_bytes) { + assert(as_bytes == 0 || as_bytes == 1); if (!attr) { PyErr_Format(PyExc_TypeError, "%.200s attribute not set", name); return NULL; } - - if (!PyBytes_Check(attr)) { - PyErr_Format(PyExc_TypeError, "%.200s attribute must be bytes", name); + if (!PyType_FastSubclass( + Py_TYPE(attr), + as_bytes ? Py_TPFLAGS_BYTES_SUBCLASS : Py_TPFLAGS_UNICODE_SUBCLASS + )) { + PyErr_Format(PyExc_TypeError, + "%.200s attribute must be %s, not %T", + name, as_bytes ? "bytes" : "unicode"); return NULL; } return Py_NewRef(attr); } -static PyObject * -get_unicode(PyObject *attr, const char *name) + +static inline PyObject * +unicode_error_get_encoding_impl(PyObject *self) { - if (!attr) { - PyErr_Format(PyExc_TypeError, "%.200s attribute not set", name); - return NULL; - } + // TODO(picnixz): do an assert-only type-check when gh-127694 is merged + // (the caller function must do an eager type-check) + PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; + return as_unicode_error_attribute(exc->encoding, "encoding", false); +} - if (!PyUnicode_Check(attr)) { - PyErr_Format(PyExc_TypeError, - "%.200s attribute must be unicode", name); - return NULL; + +static inline PyObject * +unicode_error_get_object_impl(PyObject *self, int as_bytes) +{ + // TODO(picnixz): do an assert-only type-check when gh-127694 is merged + // (the caller function must do an eager type-check) + PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; + return as_unicode_error_attribute(exc->object, "object", as_bytes); +} + + +static inline PyObject * +unicode_error_get_reason_impl(PyObject *self) +{ + // TODO(picnixz): do an assert-only type-check when gh-127694 is merged + // (the caller function must do an eager type-check) + PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; + return as_unicode_error_attribute(exc->reason, "reason", false); +} + + +static inline int +unicode_error_set_reason_impl(PyObject *self, const char *reason) +{ + PyObject *value = PyUnicode_FromString(reason); + if (!value) { + return -1; } - return Py_NewRef(attr); + // TODO(picnixz): do an assert-only type-check when gh-127694 is merged + // (the caller function must do an eager type-check) + PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; + Py_XSETREF(*&exc->reason, value); + return 0; } -static int -set_unicodefromstring(PyObject **attr, const char *value) + +static inline int +unicode_error_get_object_and_size(PyObject *self, + PyObject **result, Py_ssize_t *size, + int as_bytes) { - PyObject *obj = PyUnicode_FromString(value); - if (!obj) + assert(as_bytes == 0 || as_bytes == 1); + // TODO(picnixz): do an assert-only type-check when gh-127694 is merged + // (the caller function must do an eager type-check) + PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; + PyObject *obj = as_unicode_error_attribute(exc->object, "object", as_bytes); + if (obj == NULL) { + if (result != NULL) { + *result = NULL; + } + if (size != NULL) { + *size = -1; + } return -1; - Py_XSETREF(*attr, obj); + } + if (size != NULL) { + *size = as_bytes ? PyBytes_GET_SIZE(obj) : PyUnicode_GetLength(obj); + } + if (result != NULL) { + *result = obj; + } + else { + Py_DECREF(obj); + } return 0; } + /* * Adjust the (inclusive) 'start' value of a UnicodeError object. * @@ -2728,6 +2786,7 @@ unicode_error_adjust_start(Py_ssize_t start, Py_ssize_t objlen) return start; } + /* * Adjust the (exclusive) 'end' value of a UnicodeError object. * @@ -2748,209 +2807,216 @@ unicode_error_adjust_end(Py_ssize_t end, Py_ssize_t objlen) return end; } + +static inline int +unicode_error_get_start_impl(PyObject *self, Py_ssize_t *start, int as_bytes) +{ + Py_ssize_t size; + if (unicode_error_get_object_and_size(self, NULL, &size, as_bytes) < 0) { + assert(size == -1); + return -1; + } + // TODO(picnixz): do an assert-only type-check when gh-127694 is merged + // (the caller function must do an eager type-check) + PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; + *start = unicode_error_adjust_start(exc->start, size); + assert(*start >= 0 && *start <= size); + return 0; +} + + +static inline int +unicode_error_set_start_impl(PyObject *self, Py_ssize_t start) +{ + // TODO(picnixz): do an assert-only type-check when gh-127694 is merged + // (the caller function must do an eager type-check) + PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; + exc->start = start; + return 0; +} + + +static inline int +unicode_error_get_end_impl(PyObject *self, Py_ssize_t *end, int as_bytes) +{ + Py_ssize_t size; + if (unicode_error_get_object_and_size(self, NULL, &size, as_bytes) < 0) { + assert(size == -1); + return -1; + } + // TODO(picnixz): do an assert-only type-check when gh-127694 is merged + // (the caller function must do an eager type-check) + PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; + *end = unicode_error_adjust_end(exc->end, size); + assert(*end >= 0 && *end <= size); + return 0; +} + + +static inline int +unicode_error_set_end_impl(PyObject *self, Py_ssize_t end) +{ + // TODO(picnixz): do an assert-only type-check when gh-127694 is merged + // (the caller function must do an eager type-check) + PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; + exc->end = end; + return 0; +} + + PyObject * -PyUnicodeEncodeError_GetEncoding(PyObject *exc) +PyUnicodeEncodeError_GetEncoding(PyObject *self) { - return get_unicode(((PyUnicodeErrorObject *)exc)->encoding, "encoding"); + return unicode_error_get_encoding_impl(self); } PyObject * -PyUnicodeDecodeError_GetEncoding(PyObject *exc) +PyUnicodeDecodeError_GetEncoding(PyObject *self) { - return get_unicode(((PyUnicodeErrorObject *)exc)->encoding, "encoding"); + return unicode_error_get_encoding_impl(self); } PyObject * -PyUnicodeEncodeError_GetObject(PyObject *exc) +PyUnicodeEncodeError_GetObject(PyObject *self) { - return get_unicode(((PyUnicodeErrorObject *)exc)->object, "object"); + return unicode_error_get_object_impl(self, false); } PyObject * -PyUnicodeDecodeError_GetObject(PyObject *exc) +PyUnicodeDecodeError_GetObject(PyObject *self) { - return get_string(((PyUnicodeErrorObject *)exc)->object, "object"); + return unicode_error_get_object_impl(self, true); } PyObject * -PyUnicodeTranslateError_GetObject(PyObject *exc) +PyUnicodeTranslateError_GetObject(PyObject *self) { - return get_unicode(((PyUnicodeErrorObject *)exc)->object, "object"); + return unicode_error_get_object_impl(self, false); } int PyUnicodeEncodeError_GetStart(PyObject *self, Py_ssize_t *start) { - PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; - PyObject *obj = get_unicode(exc->object, "object"); - if (obj == NULL) { - return -1; - } - Py_ssize_t size = PyUnicode_GET_LENGTH(obj); - Py_DECREF(obj); - *start = unicode_error_adjust_start(exc->start, size); - return 0; + return unicode_error_get_start_impl(self, start, false); } int PyUnicodeDecodeError_GetStart(PyObject *self, Py_ssize_t *start) { - PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; - PyObject *obj = get_string(exc->object, "object"); - if (obj == NULL) { - return -1; - } - Py_ssize_t size = PyBytes_GET_SIZE(obj); - Py_DECREF(obj); - *start = unicode_error_adjust_start(exc->start, size); - return 0; + return unicode_error_get_start_impl(self, start, true); } int -PyUnicodeTranslateError_GetStart(PyObject *exc, Py_ssize_t *start) +PyUnicodeTranslateError_GetStart(PyObject *self, Py_ssize_t *start) { - return PyUnicodeEncodeError_GetStart(exc, start); -} - - -static inline int -unicode_error_set_start_impl(PyObject *self, Py_ssize_t start) -{ - ((PyUnicodeErrorObject *)self)->start = start; - return 0; + return unicode_error_get_start_impl(self, start, false); } int -PyUnicodeEncodeError_SetStart(PyObject *exc, Py_ssize_t start) +PyUnicodeEncodeError_SetStart(PyObject *self, Py_ssize_t start) { - return unicode_error_set_start_impl(exc, start); + return unicode_error_set_start_impl(self, start); } int -PyUnicodeDecodeError_SetStart(PyObject *exc, Py_ssize_t start) +PyUnicodeDecodeError_SetStart(PyObject *self, Py_ssize_t start) { - return unicode_error_set_start_impl(exc, start); + return unicode_error_set_start_impl(self, start); } int -PyUnicodeTranslateError_SetStart(PyObject *exc, Py_ssize_t start) +PyUnicodeTranslateError_SetStart(PyObject *self, Py_ssize_t start) { - return unicode_error_set_start_impl(exc, start); + return unicode_error_set_start_impl(self, start); } int PyUnicodeEncodeError_GetEnd(PyObject *self, Py_ssize_t *end) { - PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; - PyObject *obj = get_unicode(exc->object, "object"); - if (obj == NULL) { - return -1; - } - Py_ssize_t size = PyUnicode_GET_LENGTH(obj); - Py_DECREF(obj); - *end = unicode_error_adjust_end(exc->end, size); - return 0; + return unicode_error_get_end_impl(self, end, false); } int PyUnicodeDecodeError_GetEnd(PyObject *self, Py_ssize_t *end) { - PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; - PyObject *obj = get_string(exc->object, "object"); - if (obj == NULL) { - return -1; - } - Py_ssize_t size = PyBytes_GET_SIZE(obj); - Py_DECREF(obj); - *end = unicode_error_adjust_end(exc->end, size); - return 0; + return unicode_error_get_end_impl(self, end, true); } int -PyUnicodeTranslateError_GetEnd(PyObject *exc, Py_ssize_t *end) +PyUnicodeTranslateError_GetEnd(PyObject *self, Py_ssize_t *end) { - return PyUnicodeEncodeError_GetEnd(exc, end); -} - - -static inline int -unicode_error_set_end_impl(PyObject *exc, Py_ssize_t end) -{ - ((PyUnicodeErrorObject *)exc)->end = end; - return 0; + return unicode_error_get_end_impl(self, end, false); } int -PyUnicodeEncodeError_SetEnd(PyObject *exc, Py_ssize_t end) +PyUnicodeEncodeError_SetEnd(PyObject *self, Py_ssize_t end) { - return unicode_error_set_end_impl(exc, end); + return unicode_error_set_end_impl(self, end); } int -PyUnicodeDecodeError_SetEnd(PyObject *exc, Py_ssize_t end) +PyUnicodeDecodeError_SetEnd(PyObject *self, Py_ssize_t end) { - return unicode_error_set_end_impl(exc, end); + return unicode_error_set_end_impl(self, end); } int -PyUnicodeTranslateError_SetEnd(PyObject *exc, Py_ssize_t end) +PyUnicodeTranslateError_SetEnd(PyObject *self, Py_ssize_t end) { - return unicode_error_set_end_impl(exc, end); + return unicode_error_set_end_impl(self, end); } + PyObject * -PyUnicodeEncodeError_GetReason(PyObject *exc) +PyUnicodeEncodeError_GetReason(PyObject *self) { - return get_unicode(((PyUnicodeErrorObject *)exc)->reason, "reason"); + return unicode_error_get_reason_impl(self); } PyObject * -PyUnicodeDecodeError_GetReason(PyObject *exc) +PyUnicodeDecodeError_GetReason(PyObject *self) { - return get_unicode(((PyUnicodeErrorObject *)exc)->reason, "reason"); + return unicode_error_get_reason_impl(self); } PyObject * -PyUnicodeTranslateError_GetReason(PyObject *exc) +PyUnicodeTranslateError_GetReason(PyObject *self) { - return get_unicode(((PyUnicodeErrorObject *)exc)->reason, "reason"); + return unicode_error_get_reason_impl(self); } int -PyUnicodeEncodeError_SetReason(PyObject *exc, const char *reason) +PyUnicodeEncodeError_SetReason(PyObject *self, const char *reason) { - return set_unicodefromstring(&((PyUnicodeErrorObject *)exc)->reason, - reason); + return unicode_error_set_reason_impl(self, reason); } int -PyUnicodeDecodeError_SetReason(PyObject *exc, const char *reason) +PyUnicodeDecodeError_SetReason(PyObject *self, const char *reason) { - return set_unicodefromstring(&((PyUnicodeErrorObject *)exc)->reason, - reason); + return unicode_error_set_reason_impl(self, reason); } int -PyUnicodeTranslateError_SetReason(PyObject *exc, const char *reason) +PyUnicodeTranslateError_SetReason(PyObject *self, const char *reason) { - return set_unicodefromstring(&((PyUnicodeErrorObject *)exc)->reason, - reason); + return unicode_error_set_reason_impl(self, reason); } From 32a199d0c2d2ac7e47b8d38f31a263400a76593e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 10 Dec 2024 13:00:24 +0100 Subject: [PATCH 02/20] put comment section headers --- Objects/exceptions.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/Objects/exceptions.c b/Objects/exceptions.c index 6c72f916e20e10..18e584b268c02b 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -2863,6 +2863,7 @@ unicode_error_set_end_impl(PyObject *self, Py_ssize_t end) return 0; } +// --- PyUnicodeEncodeObject: 'encoding' getters ------------------------------ PyObject * PyUnicodeEncodeError_GetEncoding(PyObject *self) @@ -2870,30 +2871,37 @@ PyUnicodeEncodeError_GetEncoding(PyObject *self) return unicode_error_get_encoding_impl(self); } + PyObject * PyUnicodeDecodeError_GetEncoding(PyObject *self) { return unicode_error_get_encoding_impl(self); } +// --- PyUnicodeEncodeObject: 'object' getters -------------------------------- + PyObject * PyUnicodeEncodeError_GetObject(PyObject *self) { return unicode_error_get_object_impl(self, false); } + PyObject * PyUnicodeDecodeError_GetObject(PyObject *self) { return unicode_error_get_object_impl(self, true); } + PyObject * PyUnicodeTranslateError_GetObject(PyObject *self) { return unicode_error_get_object_impl(self, false); } +// --- PyUnicodeEncodeObject: 'start' getters --------------------------------- + int PyUnicodeEncodeError_GetStart(PyObject *self, Py_ssize_t *start) { @@ -2914,6 +2922,7 @@ PyUnicodeTranslateError_GetStart(PyObject *self, Py_ssize_t *start) return unicode_error_get_start_impl(self, start, false); } +// --- PyUnicodeEncodeObject: 'start' setters --------------------------------- int PyUnicodeEncodeError_SetStart(PyObject *self, Py_ssize_t start) @@ -2935,6 +2944,7 @@ PyUnicodeTranslateError_SetStart(PyObject *self, Py_ssize_t start) return unicode_error_set_start_impl(self, start); } +// --- PyUnicodeEncodeObject: 'end' getters ----------------------------------- int PyUnicodeEncodeError_GetEnd(PyObject *self, Py_ssize_t *end) @@ -2956,6 +2966,7 @@ PyUnicodeTranslateError_GetEnd(PyObject *self, Py_ssize_t *end) return unicode_error_get_end_impl(self, end, false); } +// --- PyUnicodeEncodeObject: 'end' setters ----------------------------------- int PyUnicodeEncodeError_SetEnd(PyObject *self, Py_ssize_t end) @@ -2977,6 +2988,7 @@ PyUnicodeTranslateError_SetEnd(PyObject *self, Py_ssize_t end) return unicode_error_set_end_impl(self, end); } +// --- PyUnicodeEncodeObject: 'reason' getters -------------------------------- PyObject * PyUnicodeEncodeError_GetReason(PyObject *self) @@ -2998,6 +3010,7 @@ PyUnicodeTranslateError_GetReason(PyObject *self) return unicode_error_get_reason_impl(self); } +// --- PyUnicodeEncodeObject: 'reason' setters -------------------------------- int PyUnicodeEncodeError_SetReason(PyObject *self, const char *reason) From 2583095d553ec80c95eb6216d3d5c9e0ee6a5469 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 10 Dec 2024 13:07:31 +0100 Subject: [PATCH 03/20] add comments --- Objects/exceptions.c | 69 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 68 insertions(+), 1 deletion(-) diff --git a/Objects/exceptions.c b/Objects/exceptions.c index 18e584b268c02b..f6406073717739 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -2668,6 +2668,10 @@ SimpleExtendsException(PyExc_ValueError, UnicodeError, "Unicode related error."); +/* + * Check the validity of 'attr' as a unicode or bytes object depending + * on 'as_bytes' and return a new reference on it if it is the case. + */ static PyObject * as_unicode_error_attribute(PyObject *attr, const char *name, int as_bytes) { @@ -2689,6 +2693,11 @@ as_unicode_error_attribute(PyObject *attr, const char *name, int as_bytes) } +/* + * Return the underlying (str) 'encoding' attribute of a Unicode Error object. + * + * The caller is responsible to ensure that 'self' is a PyUnicodeErrorObject. + */ static inline PyObject * unicode_error_get_encoding_impl(PyObject *self) { @@ -2699,6 +2708,12 @@ unicode_error_get_encoding_impl(PyObject *self) } +/* + * Return the underlying 'object' attribute of a Unicode Error object + * as a bytes or a string instance, depending on the 'as_bytes' flag. + * + * The caller is responsible to ensure that 'self' is a PyUnicodeErrorObject. + */ static inline PyObject * unicode_error_get_object_impl(PyObject *self, int as_bytes) { @@ -2709,6 +2724,11 @@ unicode_error_get_object_impl(PyObject *self, int as_bytes) } +/* + * Return the underlying (str) 'reason' attribute of a Unicode Error object. + * + * The caller is responsible to ensure that 'self' is a PyUnicodeErrorObject. + */ static inline PyObject * unicode_error_get_reason_impl(PyObject *self) { @@ -2719,6 +2739,13 @@ unicode_error_get_reason_impl(PyObject *self) } +/* + * Set the underlying (str) 'reason' attribute of a Unicode Error object. + * + * The caller is responsible to ensure that 'self' is a PyUnicodeErrorObject. + * + * Return 0 on success and -1 on failure. + */ static inline int unicode_error_set_reason_impl(PyObject *self, const char *reason) { @@ -2734,7 +2761,19 @@ unicode_error_set_reason_impl(PyObject *self, const char *reason) } -static inline int +/* + * Get the underlying 'object' attribute of a Unicode Error object + * as a bytes or a string instance, depending on the 'as_bytes' flag. + * + * The result is stored in 'result' and its size in 'size', + * which can be NULL to indicate that the value would be + * discarded after the call. + * + * The caller is responsible to ensure that 'self' is a PyUnicodeErrorObject. + * + * Return 0 on success and -1 on failure. + */ +static int unicode_error_get_object_and_size(PyObject *self, PyObject **result, Py_ssize_t *size, int as_bytes) @@ -2808,6 +2847,13 @@ unicode_error_adjust_end(Py_ssize_t end, Py_ssize_t objlen) } +/* + * Retrieve and adjust the 'start' attribute of a Unicode Error object. + * + * The caller is responsible to ensure that 'self' is a PyUnicodeErrorObject. + * + * Return 0 on success and -1 on failure. + */ static inline int unicode_error_get_start_impl(PyObject *self, Py_ssize_t *start, int as_bytes) { @@ -2825,6 +2871,13 @@ unicode_error_get_start_impl(PyObject *self, Py_ssize_t *start, int as_bytes) } +/* + * Set the 'start' attribute of a Unicode Error object. + * + * The caller is responsible to ensure that 'self' is a PyUnicodeErrorObject. + * + * Return 0 on success and -1 on failure. + */ static inline int unicode_error_set_start_impl(PyObject *self, Py_ssize_t start) { @@ -2836,6 +2889,13 @@ unicode_error_set_start_impl(PyObject *self, Py_ssize_t start) } +/* + * Retrieve and adjust the 'end' attribute of a Unicode Error object. + * + * The caller is responsible to ensure that 'self' is a PyUnicodeErrorObject. + * + * Return 0 on success and -1 on failure. + */ static inline int unicode_error_get_end_impl(PyObject *self, Py_ssize_t *end, int as_bytes) { @@ -2853,6 +2913,13 @@ unicode_error_get_end_impl(PyObject *self, Py_ssize_t *end, int as_bytes) } +/* + * Set the 'end' attribute of a Unicode Error object. + * + * The caller is responsible to ensure that 'self' is a PyUnicodeErrorObject. + * + * Return 0 on success and -1 on failure. + */ static inline int unicode_error_set_end_impl(PyObject *self, Py_ssize_t end) { From f0893b74523046dc30cfeeda6262bccc747d26a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 10 Dec 2024 13:25:54 +0100 Subject: [PATCH 04/20] simpler checks --- Objects/exceptions.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/Objects/exceptions.c b/Objects/exceptions.c index f6406073717739..3232eca13f9ff7 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -2680,10 +2680,7 @@ as_unicode_error_attribute(PyObject *attr, const char *name, int as_bytes) PyErr_Format(PyExc_TypeError, "%.200s attribute not set", name); return NULL; } - if (!PyType_FastSubclass( - Py_TYPE(attr), - as_bytes ? Py_TPFLAGS_BYTES_SUBCLASS : Py_TPFLAGS_UNICODE_SUBCLASS - )) { + if (!(as_bytes ? PyBytes_Check(attr) : PyUnicode_Check(attr))) { PyErr_Format(PyExc_TypeError, "%.200s attribute must be %s, not %T", name, as_bytes ? "bytes" : "unicode"); @@ -2793,7 +2790,7 @@ unicode_error_get_object_and_size(PyObject *self, return -1; } if (size != NULL) { - *size = as_bytes ? PyBytes_GET_SIZE(obj) : PyUnicode_GetLength(obj); + *size = as_bytes ? PyBytes_GET_SIZE(obj) : PyUnicode_GET_LENGTH(obj); } if (result != NULL) { *result = obj; From a4b01f0d987b3a59f858a8b32bafaae2317ac206 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 10 Dec 2024 13:26:06 +0100 Subject: [PATCH 05/20] fix tests --- Objects/exceptions.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Objects/exceptions.c b/Objects/exceptions.c index 3232eca13f9ff7..c9a5ed5d6b020d 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -2682,7 +2682,7 @@ as_unicode_error_attribute(PyObject *attr, const char *name, int as_bytes) } if (!(as_bytes ? PyBytes_Check(attr) : PyUnicode_Check(attr))) { PyErr_Format(PyExc_TypeError, - "%.200s attribute must be %s, not %T", + "%.200s attribute must be %s", name, as_bytes ? "bytes" : "unicode"); return NULL; } From 01b5f223f4569dfd61b3ee4328fe7b18bb44d7ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 10 Dec 2024 14:11:34 +0100 Subject: [PATCH 06/20] unify even more the interface using a generic getter --- Include/cpython/pyerrors.h | 6 ++ Objects/exceptions.c | 171 ++++++++++++++++--------------------- 2 files changed, 79 insertions(+), 98 deletions(-) diff --git a/Include/cpython/pyerrors.h b/Include/cpython/pyerrors.h index b36b4681f5dddb..fa21ffdf861e04 100644 --- a/Include/cpython/pyerrors.h +++ b/Include/cpython/pyerrors.h @@ -94,6 +94,12 @@ PyAPI_FUNC(void) _PyErr_ChainExceptions1(PyObject *); /* In exceptions.c */ +PyAPI_FUNC(int) _PyUnicodeError_GetParams( + PyObject *self, + PyObject **obj, Py_ssize_t *objlen, + Py_ssize_t *start, Py_ssize_t *end, int *consistent, + int as_bytes); + PyAPI_FUNC(PyObject*) PyUnstable_Exc_PrepReraiseStar( PyObject *orig, PyObject *excs); diff --git a/Objects/exceptions.c b/Objects/exceptions.c index c9a5ed5d6b020d..3d162b219773c0 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -2759,48 +2759,41 @@ unicode_error_set_reason_impl(PyObject *self, const char *reason) /* - * Get the underlying 'object' attribute of a Unicode Error object - * as a bytes or a string instance, depending on the 'as_bytes' flag. + * Set the 'start' attribute of a Unicode Error object. + * + * The caller is responsible to ensure that 'self' is a PyUnicodeErrorObject. * - * The result is stored in 'result' and its size in 'size', - * which can be NULL to indicate that the value would be - * discarded after the call. + * Return 0 on success and -1 on failure. + */ +static inline int +unicode_error_set_start_impl(PyObject *self, Py_ssize_t start) +{ + // TODO(picnixz): do an assert-only type-check when gh-127694 is merged + // (the caller function must do an eager type-check) + PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; + exc->start = start; + return 0; +} + + +/* + * Set the 'end' attribute of a Unicode Error object. * * The caller is responsible to ensure that 'self' is a PyUnicodeErrorObject. * * Return 0 on success and -1 on failure. */ -static int -unicode_error_get_object_and_size(PyObject *self, - PyObject **result, Py_ssize_t *size, - int as_bytes) +static inline int +unicode_error_set_end_impl(PyObject *self, Py_ssize_t end) { - assert(as_bytes == 0 || as_bytes == 1); // TODO(picnixz): do an assert-only type-check when gh-127694 is merged // (the caller function must do an eager type-check) PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; - PyObject *obj = as_unicode_error_attribute(exc->object, "object", as_bytes); - if (obj == NULL) { - if (result != NULL) { - *result = NULL; - } - if (size != NULL) { - *size = -1; - } - return -1; - } - if (size != NULL) { - *size = as_bytes ? PyBytes_GET_SIZE(obj) : PyUnicode_GET_LENGTH(obj); - } - if (result != NULL) { - *result = obj; - } - else { - Py_DECREF(obj); - } + exc->end = end; return 0; } +// --- PyUnicodeEncodeObject: internal getters -------------------------------- /* * Adjust the (inclusive) 'start' value of a UnicodeError object. @@ -2845,85 +2838,67 @@ unicode_error_adjust_end(Py_ssize_t end, Py_ssize_t objlen) /* - * Retrieve and adjust the 'start' attribute of a Unicode Error object. + * Get various common parameters of a Unicode Error object. * * The caller is responsible to ensure that 'self' is a PyUnicodeErrorObject. * * Return 0 on success and -1 on failure. - */ -static inline int -unicode_error_get_start_impl(PyObject *self, Py_ssize_t *start, int as_bytes) -{ - Py_ssize_t size; - if (unicode_error_get_object_and_size(self, NULL, &size, as_bytes) < 0) { - assert(size == -1); - return -1; - } - // TODO(picnixz): do an assert-only type-check when gh-127694 is merged - // (the caller function must do an eager type-check) - PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; - *start = unicode_error_adjust_start(exc->start, size); - assert(*start >= 0 && *start <= size); - return 0; -} - - -/* - * Set the 'start' attribute of a Unicode Error object. * - * The caller is responsible to ensure that 'self' is a PyUnicodeErrorObject. + * Parameters * - * Return 0 on success and -1 on failure. + * obj The retrieved underlying 'object'. + * objlen The 'object' length. + * start The clipped 'start' attribute. + * end The clipped 'end' attribute. + * consistent Indicate whetehr 'start' and 'end' are consistent. + * as_bytes Indicate whether the underlying 'object' is a bytes object. + * + * The 'obj', 'objlen', 'start', 'end' and 'consistent' parameters may + * be NULL to indicate that the parameter does not need to be stored. + * + * The 'consistent' value is only set if 'start', and 'end' are retrieved. */ -static inline int -unicode_error_set_start_impl(PyObject *self, Py_ssize_t start) +int +_PyUnicodeError_GetParams(PyObject *self, + PyObject **obj, Py_ssize_t *objlen, + Py_ssize_t *start, Py_ssize_t *end, int *consistent, + int as_bytes) { + assert(as_bytes == 0 || as_bytes == 1); // TODO(picnixz): do an assert-only type-check when gh-127694 is merged // (the caller function must do an eager type-check) PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; - exc->start = start; - return 0; -} + PyObject *r = as_unicode_error_attribute(exc->object, "object", as_bytes); + if (r == NULL) { + return -1; + } + Py_ssize_t n = as_bytes ? PyBytes_GET_SIZE(r) : PyUnicode_GET_LENGTH(r); + if (objlen != NULL) { + *objlen = n; + } -/* - * Retrieve and adjust the 'end' attribute of a Unicode Error object. - * - * The caller is responsible to ensure that 'self' is a PyUnicodeErrorObject. - * - * Return 0 on success and -1 on failure. - */ -static inline int -unicode_error_get_end_impl(PyObject *self, Py_ssize_t *end, int as_bytes) -{ - Py_ssize_t size; - if (unicode_error_get_object_and_size(self, NULL, &size, as_bytes) < 0) { - assert(size == -1); - return -1; + if (start != NULL) { + *start = unicode_error_adjust_start(exc->start, n); + assert(*start >= 0); + assert(*start <= n); + } + if (end != NULL) { + *end = unicode_error_adjust_end(exc->end, n); + assert(*end >= 0); + assert(*end <= n); } - // TODO(picnixz): do an assert-only type-check when gh-127694 is merged - // (the caller function must do an eager type-check) - PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; - *end = unicode_error_adjust_end(exc->end, size); - assert(*end >= 0 && *end <= size); - return 0; -} + if (start != NULL && end != NULL && consistent != NULL) { + *consistent = *start < *end ? 1 : 0; + } -/* - * Set the 'end' attribute of a Unicode Error object. - * - * The caller is responsible to ensure that 'self' is a PyUnicodeErrorObject. - * - * Return 0 on success and -1 on failure. - */ -static inline int -unicode_error_set_end_impl(PyObject *self, Py_ssize_t end) -{ - // TODO(picnixz): do an assert-only type-check when gh-127694 is merged - // (the caller function must do an eager type-check) - PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; - exc->end = end; + if (obj != NULL) { + *obj = r; + } + else { + Py_DECREF(r); + } return 0; } @@ -2969,21 +2944,21 @@ PyUnicodeTranslateError_GetObject(PyObject *self) int PyUnicodeEncodeError_GetStart(PyObject *self, Py_ssize_t *start) { - return unicode_error_get_start_impl(self, start, false); + return _PyUnicodeError_GetParams(self, NULL, NULL, start, NULL, NULL, false); } int PyUnicodeDecodeError_GetStart(PyObject *self, Py_ssize_t *start) { - return unicode_error_get_start_impl(self, start, true); + return _PyUnicodeError_GetParams(self, NULL, NULL, start, NULL, NULL, true); } int PyUnicodeTranslateError_GetStart(PyObject *self, Py_ssize_t *start) { - return unicode_error_get_start_impl(self, start, false); + return _PyUnicodeError_GetParams(self, NULL, NULL, start, NULL, NULL, false); } // --- PyUnicodeEncodeObject: 'start' setters --------------------------------- @@ -3013,21 +2988,21 @@ PyUnicodeTranslateError_SetStart(PyObject *self, Py_ssize_t start) int PyUnicodeEncodeError_GetEnd(PyObject *self, Py_ssize_t *end) { - return unicode_error_get_end_impl(self, end, false); + return _PyUnicodeError_GetParams(self, NULL, NULL, NULL, end, NULL, false); } int PyUnicodeDecodeError_GetEnd(PyObject *self, Py_ssize_t *end) { - return unicode_error_get_end_impl(self, end, true); + return _PyUnicodeError_GetParams(self, NULL, NULL, NULL, end, NULL, true); } int PyUnicodeTranslateError_GetEnd(PyObject *self, Py_ssize_t *end) { - return unicode_error_get_end_impl(self, end, false); + return _PyUnicodeError_GetParams(self, NULL, NULL, NULL, end, NULL, false); } // --- PyUnicodeEncodeObject: 'end' setters ----------------------------------- From be982a09588467b20f0f5f0ef5afedfa96dfa353 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 10 Dec 2024 14:16:58 +0100 Subject: [PATCH 07/20] remove useless `consistent` parameter --- Include/cpython/pyerrors.h | 2 +- Objects/exceptions.c | 27 +++++++++------------------ 2 files changed, 10 insertions(+), 19 deletions(-) diff --git a/Include/cpython/pyerrors.h b/Include/cpython/pyerrors.h index fa21ffdf861e04..49a6265e5eb02f 100644 --- a/Include/cpython/pyerrors.h +++ b/Include/cpython/pyerrors.h @@ -97,7 +97,7 @@ PyAPI_FUNC(void) _PyErr_ChainExceptions1(PyObject *); PyAPI_FUNC(int) _PyUnicodeError_GetParams( PyObject *self, PyObject **obj, Py_ssize_t *objlen, - Py_ssize_t *start, Py_ssize_t *end, int *consistent, + Py_ssize_t *start, Py_ssize_t *end, int as_bytes); PyAPI_FUNC(PyObject*) PyUnstable_Exc_PrepReraiseStar( diff --git a/Objects/exceptions.c b/Objects/exceptions.c index 3d162b219773c0..5d794c2505ae09 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -2850,18 +2850,15 @@ unicode_error_adjust_end(Py_ssize_t end, Py_ssize_t objlen) * objlen The 'object' length. * start The clipped 'start' attribute. * end The clipped 'end' attribute. - * consistent Indicate whetehr 'start' and 'end' are consistent. * as_bytes Indicate whether the underlying 'object' is a bytes object. * - * The 'obj', 'objlen', 'start', 'end' and 'consistent' parameters may - * be NULL to indicate that the parameter does not need to be stored. - * - * The 'consistent' value is only set if 'start', and 'end' are retrieved. + * The 'obj', 'objlen', 'start' and 'end' parameters may be NULL + * to indicate that the parameter does not need to be stored. */ int _PyUnicodeError_GetParams(PyObject *self, PyObject **obj, Py_ssize_t *objlen, - Py_ssize_t *start, Py_ssize_t *end, int *consistent, + Py_ssize_t *start, Py_ssize_t *end, int as_bytes) { assert(as_bytes == 0 || as_bytes == 1); @@ -2877,7 +2874,6 @@ _PyUnicodeError_GetParams(PyObject *self, if (objlen != NULL) { *objlen = n; } - if (start != NULL) { *start = unicode_error_adjust_start(exc->start, n); assert(*start >= 0); @@ -2888,11 +2884,6 @@ _PyUnicodeError_GetParams(PyObject *self, assert(*end >= 0); assert(*end <= n); } - - if (start != NULL && end != NULL && consistent != NULL) { - *consistent = *start < *end ? 1 : 0; - } - if (obj != NULL) { *obj = r; } @@ -2944,21 +2935,21 @@ PyUnicodeTranslateError_GetObject(PyObject *self) int PyUnicodeEncodeError_GetStart(PyObject *self, Py_ssize_t *start) { - return _PyUnicodeError_GetParams(self, NULL, NULL, start, NULL, NULL, false); + return _PyUnicodeError_GetParams(self, NULL, NULL, start, NULL, false); } int PyUnicodeDecodeError_GetStart(PyObject *self, Py_ssize_t *start) { - return _PyUnicodeError_GetParams(self, NULL, NULL, start, NULL, NULL, true); + return _PyUnicodeError_GetParams(self, NULL, NULL, start, NULL, true); } int PyUnicodeTranslateError_GetStart(PyObject *self, Py_ssize_t *start) { - return _PyUnicodeError_GetParams(self, NULL, NULL, start, NULL, NULL, false); + return _PyUnicodeError_GetParams(self, NULL, NULL, start, NULL, false); } // --- PyUnicodeEncodeObject: 'start' setters --------------------------------- @@ -2988,21 +2979,21 @@ PyUnicodeTranslateError_SetStart(PyObject *self, Py_ssize_t start) int PyUnicodeEncodeError_GetEnd(PyObject *self, Py_ssize_t *end) { - return _PyUnicodeError_GetParams(self, NULL, NULL, NULL, end, NULL, false); + return _PyUnicodeError_GetParams(self, NULL, NULL, NULL, end, false); } int PyUnicodeDecodeError_GetEnd(PyObject *self, Py_ssize_t *end) { - return _PyUnicodeError_GetParams(self, NULL, NULL, NULL, end, NULL, true); + return _PyUnicodeError_GetParams(self, NULL, NULL, NULL, end, true); } int PyUnicodeTranslateError_GetEnd(PyObject *self, Py_ssize_t *end) { - return _PyUnicodeError_GetParams(self, NULL, NULL, NULL, end, NULL, false); + return _PyUnicodeError_GetParams(self, NULL, NULL, NULL, end, false); } // --- PyUnicodeEncodeObject: 'end' setters ----------------------------------- From d4dc9a6814e3006c6e7a6d59e9a6c4e33e8398f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Wed, 11 Dec 2024 12:19:20 +0100 Subject: [PATCH 08/20] Simplify call --- Objects/exceptions.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Objects/exceptions.c b/Objects/exceptions.c index 5d794c2505ae09..6976df783d300b 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -2753,7 +2753,7 @@ unicode_error_set_reason_impl(PyObject *self, const char *reason) // TODO(picnixz): do an assert-only type-check when gh-127694 is merged // (the caller function must do an eager type-check) PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; - Py_XSETREF(*&exc->reason, value); + Py_XSETREF(exc->reason, value); return 0; } From 94800fdec384c3dc3d68cbd6125173b9f3ebc503 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 13 Dec 2024 17:47:04 +0100 Subject: [PATCH 09/20] remove unused function --- Objects/exceptions.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/Objects/exceptions.c b/Objects/exceptions.c index 57ae9d51390c93..aeaf2c6e09d205 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -2709,14 +2709,6 @@ check_unicode_error_type(PyObject *self, const char *expect_type) } -static inline PyUnicodeErrorObject * -as_unicode_error(PyObject *self, const char *expect_type) -{ - int rc = check_unicode_error_type(self, expect_type); - return rc < 0 ? NULL : _PyUnicodeError_CAST(self); -} - - /* * Return the underlying (str) 'encoding' attribute of a Unicode Error object. * From e5709fac841ae57baf60eff30598c6ec10db52c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 13 Dec 2024 17:47:44 +0100 Subject: [PATCH 10/20] remove un-necessary macros --- Objects/exceptions.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Objects/exceptions.c b/Objects/exceptions.c index aeaf2c6e09d205..464301761d6fad 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -2690,11 +2690,10 @@ as_unicode_error_attribute(PyObject *attr, const char *name, int as_bytes) } -#define _PyUnicodeError_CAST(PTR) ((PyUnicodeErrorObject *)(PTR)) #define PyUnicodeError_Check(PTR) \ PyObject_TypeCheck((PTR), (PyTypeObject *)PyExc_UnicodeError) #define PyUnicodeError_CAST(PTR) \ - (assert(PyUnicodeError_Check(PTR)), _PyUnicodeError_CAST(PTR)) + (assert(PyUnicodeError_Check(PTR)), ((PyUnicodeErrorObject *)(PTR))) static inline int From 83eb24db379f4fdfda2bd370ff4b98d84d250f92 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Thu, 2 Jan 2025 15:15:10 +0100 Subject: [PATCH 11/20] Update Objects/exceptions.c Co-authored-by: Petr Viktorin --- Objects/exceptions.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Objects/exceptions.c b/Objects/exceptions.c index 464301761d6fad..214e1a12772089 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -2682,7 +2682,7 @@ as_unicode_error_attribute(PyObject *attr, const char *name, int as_bytes) } if (!(as_bytes ? PyBytes_Check(attr) : PyUnicode_Check(attr))) { PyErr_Format(PyExc_TypeError, - "%.200s attribute must be %s", + "%s attribute must be %s", name, as_bytes ? "bytes" : "unicode"); return NULL; } From 7c9fd99eff79c74a7f0775c6742d267e0da033b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 3 Jan 2025 10:10:17 +0100 Subject: [PATCH 12/20] style update --- Objects/exceptions.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/Objects/exceptions.c b/Objects/exceptions.c index 214e1a12772089..ce542e7f064393 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -2676,14 +2676,15 @@ static PyObject * as_unicode_error_attribute(PyObject *attr, const char *name, int as_bytes) { assert(as_bytes == 0 || as_bytes == 1); - if (!attr) { - PyErr_Format(PyExc_TypeError, "%.200s attribute not set", name); + if (attr == NULL) { + PyErr_Format(PyExc_TypeError, "%s attribute not set", name); return NULL; } if (!(as_bytes ? PyBytes_Check(attr) : PyUnicode_Check(attr))) { PyErr_Format(PyExc_TypeError, "%s attribute must be %s", - name, as_bytes ? "bytes" : "unicode"); + name, + as_bytes ? "bytes" : "unicode"); return NULL; } return Py_NewRef(attr); @@ -2759,7 +2760,7 @@ static inline int unicode_error_set_reason_impl(PyObject *self, const char *reason) { PyObject *value = PyUnicode_FromString(reason); - if (!value) { + if (value == NULL) { return -1; } PyUnicodeErrorObject *exc = PyUnicodeError_CAST(self); From 8219be9bbe4782a35d0134f56633f700fe0a4aae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 3 Jan 2025 10:13:00 +0100 Subject: [PATCH 13/20] use macro for repeated names to avoid typos --- Objects/exceptions.c | 51 ++++++++++++++++++++++++-------------------- 1 file changed, 28 insertions(+), 23 deletions(-) diff --git a/Objects/exceptions.c b/Objects/exceptions.c index ce542e7f064393..0701adf65da45b 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -2696,6 +2696,11 @@ as_unicode_error_attribute(PyObject *attr, const char *name, int as_bytes) #define PyUnicodeError_CAST(PTR) \ (assert(PyUnicodeError_Check(PTR)), ((PyUnicodeErrorObject *)(PTR))) +/* class names to use when reporting errors */ +#define Py_UNICODE_ENCODE_ERROR_NAME "UnicodeEncodeError" +#define Py_UNICODE_DECODE_ERROR_NAME "UnicodeDecodeError" +#define Py_UNICODE_TRANSLATE_ERROR_NAME "UnicodeTranslateError" + static inline int check_unicode_error_type(PyObject *self, const char *expect_type) @@ -2903,7 +2908,7 @@ _PyUnicodeError_GetParams(PyObject *self, PyObject * PyUnicodeEncodeError_GetEncoding(PyObject *self) { - int rc = check_unicode_error_type(self, "UnicodeEncodeError"); + int rc = check_unicode_error_type(self, Py_UNICODE_ENCODE_ERROR_NAME); return rc < 0 ? NULL : unicode_error_get_encoding_impl(self); } @@ -2911,7 +2916,7 @@ PyUnicodeEncodeError_GetEncoding(PyObject *self) PyObject * PyUnicodeDecodeError_GetEncoding(PyObject *self) { - int rc = check_unicode_error_type(self, "UnicodeDecodeError"); + int rc = check_unicode_error_type(self, Py_UNICODE_DECODE_ERROR_NAME); return rc < 0 ? NULL : unicode_error_get_encoding_impl(self); } @@ -2920,7 +2925,7 @@ PyUnicodeDecodeError_GetEncoding(PyObject *self) PyObject * PyUnicodeEncodeError_GetObject(PyObject *self) { - int rc = check_unicode_error_type(self, "UnicodeEncodeError"); + int rc = check_unicode_error_type(self, Py_UNICODE_ENCODE_ERROR_NAME); return rc < 0 ? NULL : unicode_error_get_object_impl(self, false); } @@ -2928,7 +2933,7 @@ PyUnicodeEncodeError_GetObject(PyObject *self) PyObject * PyUnicodeDecodeError_GetObject(PyObject *self) { - int rc = check_unicode_error_type(self, "UnicodeDecodeError"); + int rc = check_unicode_error_type(self, Py_UNICODE_DECODE_ERROR_NAME); return rc < 0 ? NULL : unicode_error_get_object_impl(self, true); } @@ -2936,7 +2941,7 @@ PyUnicodeDecodeError_GetObject(PyObject *self) PyObject * PyUnicodeTranslateError_GetObject(PyObject *self) { - int rc = check_unicode_error_type(self, "UnicodeTranslateError"); + int rc = check_unicode_error_type(self, Py_UNICODE_TRANSLATE_ERROR_NAME); return rc < 0 ? NULL : unicode_error_get_object_impl(self, false); } @@ -2945,7 +2950,7 @@ PyUnicodeTranslateError_GetObject(PyObject *self) int PyUnicodeEncodeError_GetStart(PyObject *self, Py_ssize_t *start) { - if (check_unicode_error_type(self, "UnicodeEncodeError") < 0) { + if (check_unicode_error_type(self, Py_UNICODE_ENCODE_ERROR_NAME) < 0) { return -1; } return _PyUnicodeError_GetParams(self, NULL, NULL, start, NULL, false); @@ -2955,7 +2960,7 @@ PyUnicodeEncodeError_GetStart(PyObject *self, Py_ssize_t *start) int PyUnicodeDecodeError_GetStart(PyObject *self, Py_ssize_t *start) { - if (check_unicode_error_type(self, "UnicodeDecodeError") < 0) { + if (check_unicode_error_type(self, Py_UNICODE_DECODE_ERROR_NAME) < 0) { return -1; } return _PyUnicodeError_GetParams(self, NULL, NULL, start, NULL, true); @@ -2965,7 +2970,7 @@ PyUnicodeDecodeError_GetStart(PyObject *self, Py_ssize_t *start) int PyUnicodeTranslateError_GetStart(PyObject *self, Py_ssize_t *start) { - if (check_unicode_error_type(self, "UnicodeTranslateError") < 0) { + if (check_unicode_error_type(self, Py_UNICODE_TRANSLATE_ERROR_NAME) < 0) { return -1; } return _PyUnicodeError_GetParams(self, NULL, NULL, start, NULL, false); @@ -2976,7 +2981,7 @@ PyUnicodeTranslateError_GetStart(PyObject *self, Py_ssize_t *start) int PyUnicodeEncodeError_SetStart(PyObject *self, Py_ssize_t start) { - int rc = check_unicode_error_type(self, "UnicodeEncodeError"); + int rc = check_unicode_error_type(self, Py_UNICODE_ENCODE_ERROR_NAME); return rc < 0 ? -1 : unicode_error_set_start_impl(self, start); } @@ -2984,7 +2989,7 @@ PyUnicodeEncodeError_SetStart(PyObject *self, Py_ssize_t start) int PyUnicodeDecodeError_SetStart(PyObject *self, Py_ssize_t start) { - int rc = check_unicode_error_type(self, "UnicodeDecodeError"); + int rc = check_unicode_error_type(self, Py_UNICODE_DECODE_ERROR_NAME); return rc < 0 ? -1 : unicode_error_set_start_impl(self, start); } @@ -2992,7 +2997,7 @@ PyUnicodeDecodeError_SetStart(PyObject *self, Py_ssize_t start) int PyUnicodeTranslateError_SetStart(PyObject *self, Py_ssize_t start) { - int rc = check_unicode_error_type(self, "UnicodeTranslateError"); + int rc = check_unicode_error_type(self, Py_UNICODE_TRANSLATE_ERROR_NAME); return rc < 0 ? -1 : unicode_error_set_start_impl(self, start); } @@ -3001,7 +3006,7 @@ PyUnicodeTranslateError_SetStart(PyObject *self, Py_ssize_t start) int PyUnicodeEncodeError_GetEnd(PyObject *self, Py_ssize_t *end) { - if (check_unicode_error_type(self, "UnicodeEncodeError") < 0) { + if (check_unicode_error_type(self, Py_UNICODE_ENCODE_ERROR_NAME) < 0) { return -1; } return _PyUnicodeError_GetParams(self, NULL, NULL, NULL, end, false); @@ -3011,7 +3016,7 @@ PyUnicodeEncodeError_GetEnd(PyObject *self, Py_ssize_t *end) int PyUnicodeDecodeError_GetEnd(PyObject *self, Py_ssize_t *end) { - if (check_unicode_error_type(self, "UnicodeDecodeError") < 0) { + if (check_unicode_error_type(self, Py_UNICODE_DECODE_ERROR_NAME) < 0) { return -1; } return _PyUnicodeError_GetParams(self, NULL, NULL, NULL, end, true); @@ -3021,7 +3026,7 @@ PyUnicodeDecodeError_GetEnd(PyObject *self, Py_ssize_t *end) int PyUnicodeTranslateError_GetEnd(PyObject *self, Py_ssize_t *end) { - if (check_unicode_error_type(self, "UnicodeTranslateError") < 0) { + if (check_unicode_error_type(self, Py_UNICODE_TRANSLATE_ERROR_NAME) < 0) { return -1; } return _PyUnicodeError_GetParams(self, NULL, NULL, NULL, end, false); @@ -3032,7 +3037,7 @@ PyUnicodeTranslateError_GetEnd(PyObject *self, Py_ssize_t *end) int PyUnicodeEncodeError_SetEnd(PyObject *self, Py_ssize_t end) { - int rc = check_unicode_error_type(self, "UnicodeEncodeError"); + int rc = check_unicode_error_type(self, Py_UNICODE_ENCODE_ERROR_NAME); return rc < 0 ? -1 : unicode_error_set_end_impl(self, end); } @@ -3040,7 +3045,7 @@ PyUnicodeEncodeError_SetEnd(PyObject *self, Py_ssize_t end) int PyUnicodeDecodeError_SetEnd(PyObject *self, Py_ssize_t end) { - int rc = check_unicode_error_type(self, "UnicodeDecodeError"); + int rc = check_unicode_error_type(self, Py_UNICODE_DECODE_ERROR_NAME); return rc < 0 ? -1 : unicode_error_set_end_impl(self, end); } @@ -3048,7 +3053,7 @@ PyUnicodeDecodeError_SetEnd(PyObject *self, Py_ssize_t end) int PyUnicodeTranslateError_SetEnd(PyObject *self, Py_ssize_t end) { - int rc = check_unicode_error_type(self, "UnicodeTranslateError"); + int rc = check_unicode_error_type(self, Py_UNICODE_TRANSLATE_ERROR_NAME); return rc < 0 ? -1 : unicode_error_set_end_impl(self, end); } @@ -3057,7 +3062,7 @@ PyUnicodeTranslateError_SetEnd(PyObject *self, Py_ssize_t end) PyObject * PyUnicodeEncodeError_GetReason(PyObject *self) { - int rc = check_unicode_error_type(self, "UnicodeEncodeError"); + int rc = check_unicode_error_type(self, Py_UNICODE_ENCODE_ERROR_NAME); return rc < 0 ? NULL : unicode_error_get_reason_impl(self); } @@ -3065,7 +3070,7 @@ PyUnicodeEncodeError_GetReason(PyObject *self) PyObject * PyUnicodeDecodeError_GetReason(PyObject *self) { - int rc = check_unicode_error_type(self, "UnicodeDecodeError"); + int rc = check_unicode_error_type(self, Py_UNICODE_DECODE_ERROR_NAME); return rc < 0 ? NULL : unicode_error_get_reason_impl(self); } @@ -3073,7 +3078,7 @@ PyUnicodeDecodeError_GetReason(PyObject *self) PyObject * PyUnicodeTranslateError_GetReason(PyObject *self) { - int rc = check_unicode_error_type(self, "UnicodeTranslateError"); + int rc = check_unicode_error_type(self, Py_UNICODE_TRANSLATE_ERROR_NAME); return rc < 0 ? NULL : unicode_error_get_reason_impl(self); } @@ -3082,7 +3087,7 @@ PyUnicodeTranslateError_GetReason(PyObject *self) int PyUnicodeEncodeError_SetReason(PyObject *self, const char *reason) { - int rc = check_unicode_error_type(self, "UnicodeEncodeError"); + int rc = check_unicode_error_type(self, Py_UNICODE_ENCODE_ERROR_NAME); return rc < 0 ? -1 : unicode_error_set_reason_impl(self, reason); } @@ -3090,7 +3095,7 @@ PyUnicodeEncodeError_SetReason(PyObject *self, const char *reason) int PyUnicodeDecodeError_SetReason(PyObject *self, const char *reason) { - int rc = check_unicode_error_type(self, "UnicodeDecodeError"); + int rc = check_unicode_error_type(self, Py_UNICODE_DECODE_ERROR_NAME); return rc < 0 ? -1 : unicode_error_set_reason_impl(self, reason); } @@ -3098,7 +3103,7 @@ PyUnicodeDecodeError_SetReason(PyObject *self, const char *reason) int PyUnicodeTranslateError_SetReason(PyObject *self, const char *reason) { - int rc = check_unicode_error_type(self, "UnicodeTranslateError"); + int rc = check_unicode_error_type(self, Py_UNICODE_TRANSLATE_ERROR_NAME); return rc < 0 ? -1 : unicode_error_set_reason_impl(self, reason); } From 4a5e4e36006a36303a71502f6e1f5c5344cf6b9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 3 Jan 2025 10:24:28 +0100 Subject: [PATCH 14/20] specialize _PyUnicodeError_GetParams for start and end attributes This is typically useful for future refactorization and to be able to write lines below 80 characters. This also helps avoiding having to remember where to place the NULL arguments. --- Objects/exceptions.c | 62 +++++++++++++++++++++++++++----------------- 1 file changed, 38 insertions(+), 24 deletions(-) diff --git a/Objects/exceptions.c b/Objects/exceptions.c index 0701adf65da45b..04bd8275bb2de3 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -2947,33 +2947,40 @@ PyUnicodeTranslateError_GetObject(PyObject *self) // --- PyUnicodeEncodeObject: 'start' getters --------------------------------- +/* + * Specialization of _PyUnicodeError_GetParams() for the 'start' attribute. + * + * The caller is responsible to ensure that 'self' is a PyUnicodeErrorObject, + * although this condition is verified by this function on DEBUG builds. + */ +static inline int +unicode_error_get_start_impl(PyObject *self, Py_ssize_t *start, int as_bytes) +{ + return _PyUnicodeError_GetParams(self, NULL, NULL, start, NULL, as_bytes); +} + + int PyUnicodeEncodeError_GetStart(PyObject *self, Py_ssize_t *start) { - if (check_unicode_error_type(self, Py_UNICODE_ENCODE_ERROR_NAME) < 0) { - return -1; - } - return _PyUnicodeError_GetParams(self, NULL, NULL, start, NULL, false); + int rc = check_unicode_error_type(self, Py_UNICODE_ENCODE_ERROR_NAME); + return rc < 0 ? -1 : unicode_error_get_start_impl(self, start, false); } int PyUnicodeDecodeError_GetStart(PyObject *self, Py_ssize_t *start) { - if (check_unicode_error_type(self, Py_UNICODE_DECODE_ERROR_NAME) < 0) { - return -1; - } - return _PyUnicodeError_GetParams(self, NULL, NULL, start, NULL, true); + int rc = check_unicode_error_type(self, Py_UNICODE_DECODE_ERROR_NAME); + return rc < 0 ? -1 : unicode_error_get_start_impl(self, start, true); } int PyUnicodeTranslateError_GetStart(PyObject *self, Py_ssize_t *start) { - if (check_unicode_error_type(self, Py_UNICODE_TRANSLATE_ERROR_NAME) < 0) { - return -1; - } - return _PyUnicodeError_GetParams(self, NULL, NULL, start, NULL, false); + int rc = check_unicode_error_type(self, Py_UNICODE_TRANSLATE_ERROR_NAME); + return rc < 0 ? -1 : unicode_error_get_start_impl(self, start, false); } // --- PyUnicodeEncodeObject: 'start' setters --------------------------------- @@ -3003,33 +3010,40 @@ PyUnicodeTranslateError_SetStart(PyObject *self, Py_ssize_t start) // --- PyUnicodeEncodeObject: 'end' getters ----------------------------------- +/* + * Specialization of _PyUnicodeError_GetParams() for the 'end' attribute. + * + * The caller is responsible to ensure that 'self' is a PyUnicodeErrorObject, + * although this condition is verified by this function on DEBUG builds. + */ +static inline int +unicode_error_get_end_impl(PyObject *self, Py_ssize_t *end, int as_bytes) +{ + return _PyUnicodeError_GetParams(self, NULL, NULL, NULL, end, as_bytes); +} + + int PyUnicodeEncodeError_GetEnd(PyObject *self, Py_ssize_t *end) { - if (check_unicode_error_type(self, Py_UNICODE_ENCODE_ERROR_NAME) < 0) { - return -1; - } - return _PyUnicodeError_GetParams(self, NULL, NULL, NULL, end, false); + int rc = check_unicode_error_type(self, Py_UNICODE_ENCODE_ERROR_NAME); + return rc < 0 ? -1 : unicode_error_get_end_impl(self, end, false); } int PyUnicodeDecodeError_GetEnd(PyObject *self, Py_ssize_t *end) { - if (check_unicode_error_type(self, Py_UNICODE_DECODE_ERROR_NAME) < 0) { - return -1; - } - return _PyUnicodeError_GetParams(self, NULL, NULL, NULL, end, true); + int rc = check_unicode_error_type(self, Py_UNICODE_DECODE_ERROR_NAME); + return rc < 0 ? -1 : unicode_error_get_end_impl(self, end, true); } int PyUnicodeTranslateError_GetEnd(PyObject *self, Py_ssize_t *end) { - if (check_unicode_error_type(self, Py_UNICODE_TRANSLATE_ERROR_NAME) < 0) { - return -1; - } - return _PyUnicodeError_GetParams(self, NULL, NULL, NULL, end, false); + int rc = check_unicode_error_type(self, Py_UNICODE_TRANSLATE_ERROR_NAME); + return rc < 0 ? -1 : unicode_error_get_end_impl(self, end, false); } // --- PyUnicodeEncodeObject: 'end' setters ----------------------------------- From f7a2efafb4bee266e33ab7ff281146a6cb89ca9c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 3 Jan 2025 10:24:32 +0100 Subject: [PATCH 15/20] update comments --- Objects/exceptions.c | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/Objects/exceptions.c b/Objects/exceptions.c index 04bd8275bb2de3..5f4e40c32928e4 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -2671,6 +2671,11 @@ SimpleExtendsException(PyExc_ValueError, UnicodeError, /* * Check the validity of 'attr' as a unicode or bytes object depending * on 'as_bytes' and return a new reference on it if it is the case. + * + * The 'name' is the attribute name and is only used for error reporting. + * + * On success, this returns a strong reference on 'attr'. + * On failure, this sets an exception and returns NULL. */ static PyObject * as_unicode_error_attribute(PyObject *attr, const char *name, int as_bytes) @@ -2702,6 +2707,12 @@ as_unicode_error_attribute(PyObject *attr, const char *name, int as_bytes) #define Py_UNICODE_TRANSLATE_ERROR_NAME "UnicodeTranslateError" +/* + * Check that 'self' is of a Unicode Error object. + * + * On success, this returns 0. + * On failure, this sets a TypeError exception and returns -1. + */ static inline int check_unicode_error_type(PyObject *self, const char *expect_type) { @@ -2717,7 +2728,8 @@ check_unicode_error_type(PyObject *self, const char *expect_type) /* * Return the underlying (str) 'encoding' attribute of a Unicode Error object. * - * The caller is responsible to ensure that 'self' is a PyUnicodeErrorObject. + * The caller is responsible to ensure that 'self' is a PyUnicodeErrorObject, + * although this condition is verified by this function on DEBUG builds. */ static inline PyObject * unicode_error_get_encoding_impl(PyObject *self) @@ -2731,7 +2743,8 @@ unicode_error_get_encoding_impl(PyObject *self) * Return the underlying 'object' attribute of a Unicode Error object * as a bytes or a string instance, depending on the 'as_bytes' flag. * - * The caller is responsible to ensure that 'self' is a PyUnicodeErrorObject. + * The caller is responsible to ensure that 'self' is a PyUnicodeErrorObject, + * although this condition is verified by this function on DEBUG builds. */ static inline PyObject * unicode_error_get_object_impl(PyObject *self, int as_bytes) @@ -2757,7 +2770,8 @@ unicode_error_get_reason_impl(PyObject *self) /* * Set the underlying (str) 'reason' attribute of a Unicode Error object. * - * The caller is responsible to ensure that 'self' is a PyUnicodeErrorObject. + * The caller is responsible to ensure that 'self' is a PyUnicodeErrorObject, + * although this condition is verified by this function on DEBUG builds. * * Return 0 on success and -1 on failure. */ @@ -2777,7 +2791,8 @@ unicode_error_set_reason_impl(PyObject *self, const char *reason) /* * Set the 'start' attribute of a Unicode Error object. * - * The caller is responsible to ensure that 'self' is a PyUnicodeErrorObject. + * The caller is responsible to ensure that 'self' is a PyUnicodeErrorObject, + * although this condition is verified by this function on DEBUG builds. * * Return 0 on success and -1 on failure. */ @@ -2793,7 +2808,8 @@ unicode_error_set_start_impl(PyObject *self, Py_ssize_t start) /* * Set the 'end' attribute of a Unicode Error object. * - * The caller is responsible to ensure that 'self' is a PyUnicodeErrorObject. + * The caller is responsible to ensure that 'self' is a PyUnicodeErrorObject, + * although this condition is verified by this function on DEBUG builds. * * Return 0 on success and -1 on failure. */ @@ -2852,7 +2868,8 @@ unicode_error_adjust_end(Py_ssize_t end, Py_ssize_t objlen) /* * Get various common parameters of a Unicode Error object. * - * The caller is responsible to ensure that 'self' is a PyUnicodeErrorObject. + * The caller is responsible to ensure that 'self' is a PyUnicodeErrorObject, + * although this condition is verified by this function on DEBUG builds. * * Return 0 on success and -1 on failure. * @@ -2904,6 +2921,7 @@ _PyUnicodeError_GetParams(PyObject *self, } // --- PyUnicodeEncodeObject: 'encoding' getters ------------------------------ +// Note: PyUnicodeTranslateError does not have an 'encoding' attribute. PyObject * PyUnicodeEncodeError_GetEncoding(PyObject *self) From 17367ff6784c5b5dc82171ce7f0d612d31516488 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 3 Jan 2025 10:26:21 +0100 Subject: [PATCH 16/20] put 2 blank lines before sections --- Objects/exceptions.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/Objects/exceptions.c b/Objects/exceptions.c index 5f4e40c32928e4..5e42538cf6f0b4 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -2701,6 +2701,7 @@ as_unicode_error_attribute(PyObject *attr, const char *name, int as_bytes) #define PyUnicodeError_CAST(PTR) \ (assert(PyUnicodeError_Check(PTR)), ((PyUnicodeErrorObject *)(PTR))) + /* class names to use when reporting errors */ #define Py_UNICODE_ENCODE_ERROR_NAME "UnicodeEncodeError" #define Py_UNICODE_DECODE_ERROR_NAME "UnicodeDecodeError" @@ -2920,6 +2921,7 @@ _PyUnicodeError_GetParams(PyObject *self, return 0; } + // --- PyUnicodeEncodeObject: 'encoding' getters ------------------------------ // Note: PyUnicodeTranslateError does not have an 'encoding' attribute. @@ -2938,6 +2940,7 @@ PyUnicodeDecodeError_GetEncoding(PyObject *self) return rc < 0 ? NULL : unicode_error_get_encoding_impl(self); } + // --- PyUnicodeEncodeObject: 'object' getters -------------------------------- PyObject * @@ -2963,6 +2966,7 @@ PyUnicodeTranslateError_GetObject(PyObject *self) return rc < 0 ? NULL : unicode_error_get_object_impl(self, false); } + // --- PyUnicodeEncodeObject: 'start' getters --------------------------------- /* @@ -3001,6 +3005,7 @@ PyUnicodeTranslateError_GetStart(PyObject *self, Py_ssize_t *start) return rc < 0 ? -1 : unicode_error_get_start_impl(self, start, false); } + // --- PyUnicodeEncodeObject: 'start' setters --------------------------------- int @@ -3026,6 +3031,7 @@ PyUnicodeTranslateError_SetStart(PyObject *self, Py_ssize_t start) return rc < 0 ? -1 : unicode_error_set_start_impl(self, start); } + // --- PyUnicodeEncodeObject: 'end' getters ----------------------------------- /* @@ -3064,6 +3070,7 @@ PyUnicodeTranslateError_GetEnd(PyObject *self, Py_ssize_t *end) return rc < 0 ? -1 : unicode_error_get_end_impl(self, end, false); } + // --- PyUnicodeEncodeObject: 'end' setters ----------------------------------- int @@ -3089,6 +3096,7 @@ PyUnicodeTranslateError_SetEnd(PyObject *self, Py_ssize_t end) return rc < 0 ? -1 : unicode_error_set_end_impl(self, end); } + // --- PyUnicodeEncodeObject: 'reason' getters -------------------------------- PyObject * @@ -3114,6 +3122,7 @@ PyUnicodeTranslateError_GetReason(PyObject *self) return rc < 0 ? NULL : unicode_error_get_reason_impl(self); } + // --- PyUnicodeEncodeObject: 'reason' setters -------------------------------- int From c05f2adc85c5d322aec301dd2ced10075df19d5b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 3 Jan 2025 11:11:41 +0100 Subject: [PATCH 17/20] add NULL assertions to avoid obscure segmentation faults --- Objects/exceptions.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Objects/exceptions.c b/Objects/exceptions.c index 5e42538cf6f0b4..c6ba673e05d360 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -2717,6 +2717,7 @@ as_unicode_error_attribute(PyObject *attr, const char *name, int as_bytes) static inline int check_unicode_error_type(PyObject *self, const char *expect_type) { + assert(self != NULL); if (!PyUnicodeError_Check(self)) { PyErr_Format(PyExc_TypeError, "expecting a %s object, got %T", expect_type, self); @@ -2735,6 +2736,7 @@ check_unicode_error_type(PyObject *self, const char *expect_type) static inline PyObject * unicode_error_get_encoding_impl(PyObject *self) { + assert(self != NULL); PyUnicodeErrorObject *exc = PyUnicodeError_CAST(self); return as_unicode_error_attribute(exc->encoding, "encoding", false); } @@ -2750,6 +2752,7 @@ unicode_error_get_encoding_impl(PyObject *self) static inline PyObject * unicode_error_get_object_impl(PyObject *self, int as_bytes) { + assert(self != NULL); PyUnicodeErrorObject *exc = PyUnicodeError_CAST(self); return as_unicode_error_attribute(exc->object, "object", as_bytes); } @@ -2763,6 +2766,7 @@ unicode_error_get_object_impl(PyObject *self, int as_bytes) static inline PyObject * unicode_error_get_reason_impl(PyObject *self) { + assert(self != NULL); PyUnicodeErrorObject *exc = PyUnicodeError_CAST(self); return as_unicode_error_attribute(exc->reason, "reason", false); } @@ -2779,6 +2783,7 @@ unicode_error_get_reason_impl(PyObject *self) static inline int unicode_error_set_reason_impl(PyObject *self, const char *reason) { + assert(self != NULL); PyObject *value = PyUnicode_FromString(reason); if (value == NULL) { return -1; @@ -2800,6 +2805,7 @@ unicode_error_set_reason_impl(PyObject *self, const char *reason) static inline int unicode_error_set_start_impl(PyObject *self, Py_ssize_t start) { + assert(self != NULL); PyUnicodeErrorObject *exc = PyUnicodeError_CAST(self); exc->start = start; return 0; @@ -2817,6 +2823,7 @@ unicode_error_set_start_impl(PyObject *self, Py_ssize_t start) static inline int unicode_error_set_end_impl(PyObject *self, Py_ssize_t end) { + assert(self != NULL); PyUnicodeErrorObject *exc = PyUnicodeError_CAST(self); exc->end = end; return 0; @@ -2891,6 +2898,7 @@ _PyUnicodeError_GetParams(PyObject *self, Py_ssize_t *start, Py_ssize_t *end, int as_bytes) { + assert(self != NULL); assert(as_bytes == 0 || as_bytes == 1); PyUnicodeErrorObject *exc = PyUnicodeError_CAST(self); PyObject *r = as_unicode_error_attribute(exc->object, "object", as_bytes); @@ -2978,6 +2986,7 @@ PyUnicodeTranslateError_GetObject(PyObject *self) static inline int unicode_error_get_start_impl(PyObject *self, Py_ssize_t *start, int as_bytes) { + assert(self != NULL); return _PyUnicodeError_GetParams(self, NULL, NULL, start, NULL, as_bytes); } @@ -3043,6 +3052,7 @@ PyUnicodeTranslateError_SetStart(PyObject *self, Py_ssize_t start) static inline int unicode_error_get_end_impl(PyObject *self, Py_ssize_t *end, int as_bytes) { + assert(self != NULL); return _PyUnicodeError_GetParams(self, NULL, NULL, NULL, end, as_bytes); } From 6c07afc6efa006574a78d5cca5e0cb66c85707a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 3 Jan 2025 12:24:05 +0100 Subject: [PATCH 18/20] update comments --- Objects/exceptions.c | 66 +++++++++++++++++++++++--------------------- 1 file changed, 35 insertions(+), 31 deletions(-) diff --git a/Objects/exceptions.c b/Objects/exceptions.c index c6ba673e05d360..4b7ff8030ac73c 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -2675,7 +2675,7 @@ SimpleExtendsException(PyExc_ValueError, UnicodeError, * The 'name' is the attribute name and is only used for error reporting. * * On success, this returns a strong reference on 'attr'. - * On failure, this sets an exception and returns NULL. + * On failure, this sets a TypeError and returns NULL. */ static PyObject * as_unicode_error_attribute(PyObject *attr, const char *name, int as_bytes) @@ -2709,10 +2709,18 @@ as_unicode_error_attribute(PyObject *attr, const char *name, int as_bytes) /* - * Check that 'self' is of a Unicode Error object. + * Check that 'self' is of a UnicodeError object. * * On success, this returns 0. * On failure, this sets a TypeError exception and returns -1. + * + * The 'expect_type' is the name of the expected type, which is + * only used for error reporting. + * + * As an implementation detail, the `PyUnicode*Error_*` functions + * currently allow *any* subclass of UnicodeError as 'self'. + * + * Use one of the `Py_UNICODE_*_ERROR_NAME` macros to avoid typos. */ static inline int check_unicode_error_type(PyObject *self, const char *expect_type) @@ -2727,11 +2735,14 @@ check_unicode_error_type(PyObject *self, const char *expect_type) } +// --- PyUnicodeEncodeObject: internal helpers -------------------------------- +// +// In the helpers below, the caller is responsible to ensure that 'self' +// is a PyUnicodeErrorObject, although this condition is verified by this +// function on DEBUG builds through PyUnicodeError_CAST(). + /* - * Return the underlying (str) 'encoding' attribute of a Unicode Error object. - * - * The caller is responsible to ensure that 'self' is a PyUnicodeErrorObject, - * although this condition is verified by this function on DEBUG builds. + * Return the underlying (str) 'encoding' attribute of a UnicodeError object. */ static inline PyObject * unicode_error_get_encoding_impl(PyObject *self) @@ -2743,11 +2754,8 @@ unicode_error_get_encoding_impl(PyObject *self) /* - * Return the underlying 'object' attribute of a Unicode Error object + * Return the underlying 'object' attribute of a UnicodeError object * as a bytes or a string instance, depending on the 'as_bytes' flag. - * - * The caller is responsible to ensure that 'self' is a PyUnicodeErrorObject, - * although this condition is verified by this function on DEBUG builds. */ static inline PyObject * unicode_error_get_object_impl(PyObject *self, int as_bytes) @@ -2759,9 +2767,7 @@ unicode_error_get_object_impl(PyObject *self, int as_bytes) /* - * Return the underlying (str) 'reason' attribute of a Unicode Error object. - * - * The caller is responsible to ensure that 'self' is a PyUnicodeErrorObject. + * Return the underlying (str) 'reason' attribute of a UnicodeError object. */ static inline PyObject * unicode_error_get_reason_impl(PyObject *self) @@ -2773,10 +2779,7 @@ unicode_error_get_reason_impl(PyObject *self) /* - * Set the underlying (str) 'reason' attribute of a Unicode Error object. - * - * The caller is responsible to ensure that 'self' is a PyUnicodeErrorObject, - * although this condition is verified by this function on DEBUG builds. + * Set the underlying (str) 'reason' attribute of a UnicodeError object. * * Return 0 on success and -1 on failure. */ @@ -2795,10 +2798,7 @@ unicode_error_set_reason_impl(PyObject *self, const char *reason) /* - * Set the 'start' attribute of a Unicode Error object. - * - * The caller is responsible to ensure that 'self' is a PyUnicodeErrorObject, - * although this condition is verified by this function on DEBUG builds. + * Set the 'start' attribute of a UnicodeError object. * * Return 0 on success and -1 on failure. */ @@ -2813,10 +2813,7 @@ unicode_error_set_start_impl(PyObject *self, Py_ssize_t start) /* - * Set the 'end' attribute of a Unicode Error object. - * - * The caller is responsible to ensure that 'self' is a PyUnicodeErrorObject, - * although this condition is verified by this function on DEBUG builds. + * Set the 'end' attribute of a UnicodeError object. * * Return 0 on success and -1 on failure. */ @@ -2874,23 +2871,30 @@ unicode_error_adjust_end(Py_ssize_t end, Py_ssize_t objlen) /* - * Get various common parameters of a Unicode Error object. + * Get various common parameters of a UnicodeError object. * * The caller is responsible to ensure that 'self' is a PyUnicodeErrorObject, * although this condition is verified by this function on DEBUG builds. * * Return 0 on success and -1 on failure. * - * Parameters + * Output parameters: * - * obj The retrieved underlying 'object'. + * obj A strong reference to the 'object' attribute. * objlen The 'object' length. * start The clipped 'start' attribute. * end The clipped 'end' attribute. - * as_bytes Indicate whether the underlying 'object' is a bytes object. * - * The 'obj', 'objlen', 'start' and 'end' parameters may be NULL - * to indicate that the parameter does not need to be stored. + * An output parameter can be NULL to indicate that + * the corresponding value does not need to be stored. + * + * Input parameter: + * + * as_bytes If 1, the error's 'object' attribute must be a bytes object, + * i.e. the call is for a `UnicodeDecodeError`. Otherwise, the + * 'object' attribute must be a string. + * + * A TypeError is raised if the 'object' type is incompatible. */ int _PyUnicodeError_GetParams(PyObject *self, From ebf86c798b2ad55f3437603950bdf6e9999694d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 3 Jan 2025 12:33:08 +0100 Subject: [PATCH 19/20] fixup comment --- Objects/exceptions.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Objects/exceptions.c b/Objects/exceptions.c index 4b7ff8030ac73c..b0632db9174264 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -2738,8 +2738,8 @@ check_unicode_error_type(PyObject *self, const char *expect_type) // --- PyUnicodeEncodeObject: internal helpers -------------------------------- // // In the helpers below, the caller is responsible to ensure that 'self' -// is a PyUnicodeErrorObject, although this condition is verified by this -// function on DEBUG builds through PyUnicodeError_CAST(). +// is a PyUnicodeErrorObject, although this is verified on DEBUG builds +// through PyUnicodeError_CAST(). /* * Return the underlying (str) 'encoding' attribute of a UnicodeError object. From 3fb81c2212c52d69f4fee67871725af60d6951c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 3 Jan 2025 12:34:04 +0100 Subject: [PATCH 20/20] fixup comment --- Objects/exceptions.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Objects/exceptions.c b/Objects/exceptions.c index b0632db9174264..714f8c828afbc1 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -2709,7 +2709,7 @@ as_unicode_error_attribute(PyObject *attr, const char *name, int as_bytes) /* - * Check that 'self' is of a UnicodeError object. + * Check that 'self' is a UnicodeError object. * * On success, this returns 0. * On failure, this sets a TypeError exception and returns -1.