From 62df0dda8ff68471f574400361a1ff2bc35614d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 21 Jan 2025 15:16:55 +0100 Subject: [PATCH 1/8] Use `_PyUnicodeError_GetParams` for the 'surrogate' handlers --- Python/codecs.c | 415 +++++++++++++++++++++++++++--------------------- 1 file changed, 234 insertions(+), 181 deletions(-) diff --git a/Python/codecs.c b/Python/codecs.c index 2cb3875db35058..696ec932c02f48 100644 --- a/Python/codecs.c +++ b/Python/codecs.c @@ -1056,7 +1056,7 @@ PyObject *PyCodec_NameReplaceErrors(PyObject *exc) #define ENC_UTF32LE 4 static int -get_standard_encoding(const char *encoding, int *bytelength) +get_standard_encoding_impl(const char *encoding, int *bytelength) { if (Py_TOLOWER(encoding[0]) == 'u' && Py_TOLOWER(encoding[1]) == 't' && @@ -1114,165 +1114,204 @@ get_standard_encoding(const char *encoding, int *bytelength) return ENC_UNKNOWN; } -/* This handler is declared static until someone demonstrates - a need to call it directly. */ + +static int +get_standard_encoding(PyObject *encoding, int *code, int *bytelength) +{ + const char *encoding_cstr = PyUnicode_AsUTF8(encoding); + if (encoding_cstr == NULL) { + return -1; + } + *code = get_standard_encoding_impl(encoding_cstr, bytelength); + return 0; +} + + static PyObject * -PyCodec_SurrogatePassErrors(PyObject *exc) +_PyCodec_SurrogatePassUnicodeEncodeError(PyObject *exc) { - PyObject *restuple; - PyObject *object; - PyObject *encode; - const char *encoding; - int code; - int bytelength; - Py_ssize_t i; - Py_ssize_t start; - Py_ssize_t end; - PyObject *res; + PyObject *encoding = PyUnicodeEncodeError_GetEncoding(exc); + if (encoding == NULL) { + return NULL; + } + int code, bytelength; + int rc = get_standard_encoding(encoding, &code, &bytelength); + Py_DECREF(encoding); + if (rc < 0) { + return NULL; + } + if (code == ENC_UNKNOWN) { + goto bail; + } - if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeEncodeError)) { - unsigned char *outp; - if (PyUnicodeEncodeError_GetStart(exc, &start)) - return NULL; - if (PyUnicodeEncodeError_GetEnd(exc, &end)) - return NULL; - if (!(object = PyUnicodeEncodeError_GetObject(exc))) - return NULL; - if (!(encode = PyUnicodeEncodeError_GetEncoding(exc))) { - Py_DECREF(object); - return NULL; - } - if (!(encoding = PyUnicode_AsUTF8(encode))) { - Py_DECREF(object); - Py_DECREF(encode); - return NULL; - } - code = get_standard_encoding(encoding, &bytelength); - Py_DECREF(encode); - if (code == ENC_UNKNOWN) { - /* Not supported, fail with original exception */ - PyErr_SetObject(PyExceptionInstance_Class(exc), exc); - Py_DECREF(object); - return NULL; - } + PyObject *obj; + Py_ssize_t objlen, start, end, slen; + if (_PyUnicodeError_GetParams(exc, + &obj, &objlen, + &start, &end, &slen, false) < 0) + { + return NULL; + } - if (end - start > PY_SSIZE_T_MAX / bytelength) - end = start + PY_SSIZE_T_MAX / bytelength; - res = PyBytes_FromStringAndSize(NULL, bytelength*(end-start)); - if (!res) { - Py_DECREF(object); - return NULL; + if (slen > PY_SSIZE_T_MAX / bytelength) { + end = start + PY_SSIZE_T_MAX / bytelength; + end = Py_MIN(end, objlen); + slen = Py_MAX(0, end - start); + } + + PyObject *res = PyBytes_FromStringAndSize(NULL, bytelength * slen); + if (res == NULL) { + Py_DECREF(obj); + return NULL; + } + + unsigned char *outp = (unsigned char *)PyBytes_AsString(res); + for (Py_ssize_t i = start; i < end; i++) { + /* object is guaranteed to be "ready" */ + Py_UCS4 ch = PyUnicode_READ_CHAR(obj, i); + if (!Py_UNICODE_IS_SURROGATE(ch)) { + /* Not a surrogate, fail with original exception */ + Py_DECREF(res); + goto bail; } - outp = (unsigned char*)PyBytes_AsString(res); - for (i = start; i < end; i++) { - /* object is guaranteed to be "ready" */ - Py_UCS4 ch = PyUnicode_READ_CHAR(object, i); - if (!Py_UNICODE_IS_SURROGATE(ch)) { - /* Not a surrogate, fail with original exception */ - PyErr_SetObject(PyExceptionInstance_Class(exc), exc); - Py_DECREF(res); - Py_DECREF(object); - return NULL; - } - switch (code) { - case ENC_UTF8: + switch (code) { + case ENC_UTF8: { *outp++ = (unsigned char)(0xe0 | (ch >> 12)); *outp++ = (unsigned char)(0x80 | ((ch >> 6) & 0x3f)); *outp++ = (unsigned char)(0x80 | (ch & 0x3f)); break; - case ENC_UTF16LE: - *outp++ = (unsigned char) ch; + } + case ENC_UTF16LE: { + *outp++ = (unsigned char)ch; *outp++ = (unsigned char)(ch >> 8); break; - case ENC_UTF16BE: + } + case ENC_UTF16BE: { *outp++ = (unsigned char)(ch >> 8); - *outp++ = (unsigned char) ch; + *outp++ = (unsigned char)ch; break; - case ENC_UTF32LE: - *outp++ = (unsigned char) ch; + } + case ENC_UTF32LE: { + *outp++ = (unsigned char)ch; *outp++ = (unsigned char)(ch >> 8); *outp++ = (unsigned char)(ch >> 16); *outp++ = (unsigned char)(ch >> 24); break; - case ENC_UTF32BE: + } + case ENC_UTF32BE: { *outp++ = (unsigned char)(ch >> 24); *outp++ = (unsigned char)(ch >> 16); *outp++ = (unsigned char)(ch >> 8); - *outp++ = (unsigned char) ch; + *outp++ = (unsigned char)ch; break; } } - restuple = Py_BuildValue("(On)", res, end); - Py_DECREF(res); - Py_DECREF(object); - return restuple; } - else if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeDecodeError)) { - const unsigned char *p; - Py_UCS4 ch = 0; - if (PyUnicodeDecodeError_GetStart(exc, &start)) - return NULL; - if (PyUnicodeDecodeError_GetEnd(exc, &end)) - return NULL; - if (!(object = PyUnicodeDecodeError_GetObject(exc))) - return NULL; - p = (const unsigned char*)PyBytes_AS_STRING(object); - if (!(encode = PyUnicodeDecodeError_GetEncoding(exc))) { - Py_DECREF(object); - return NULL; - } - if (!(encoding = PyUnicode_AsUTF8(encode))) { - Py_DECREF(object); - Py_DECREF(encode); - return NULL; - } - code = get_standard_encoding(encoding, &bytelength); - Py_DECREF(encode); - if (code == ENC_UNKNOWN) { - /* Not supported, fail with original exception */ - PyErr_SetObject(PyExceptionInstance_Class(exc), exc); - Py_DECREF(object); - return NULL; - } - /* Try decoding a single surrogate character. If - there are more, let the codec call us again. */ - p += start; - if (PyBytes_GET_SIZE(object) - start >= bytelength) { - switch (code) { - case ENC_UTF8: + PyObject *restuple = Py_BuildValue("(On)", res, end); + Py_DECREF(obj); + Py_DECREF(res); + return restuple; + +bail: + Py_XDECREF(obj); + PyErr_SetObject(PyExceptionInstance_Class(exc), exc); + return NULL; + +} + + +static PyObject * +_PyCodec_SurrogatePassUnicodeDecodeError(PyObject *exc) +{ + PyObject *encoding = PyUnicodeDecodeError_GetEncoding(exc); + if (encoding == NULL) { + return NULL; + } + int code, bytelength; + int rc = get_standard_encoding(encoding, &code, &bytelength); + Py_DECREF(encoding); + if (rc < 0) { + return NULL; + } + if (code == ENC_UNKNOWN) { + goto bail; + } + + PyObject *obj; + Py_ssize_t objlen, start, end, slen; + if (_PyUnicodeError_GetParams(exc, + &obj, &objlen, + &start, &end, &slen, true) < 0) + { + return NULL; + } + + /* Try decoding a single surrogate character. If + there are more, let the codec call us again. */ + Py_UCS4 ch = 0; + const unsigned char *p = (const unsigned char *)PyBytes_AS_STRING(obj); + p += start; + + if (objlen - start >= bytelength) { + switch (code) { + case ENC_UTF8: { if ((p[0] & 0xf0) == 0xe0 && (p[1] & 0xc0) == 0x80 && - (p[2] & 0xc0) == 0x80) { + (p[2] & 0xc0) == 0x80) + { /* it's a three-byte code */ - ch = ((p[0] & 0x0f) << 12) + ((p[1] & 0x3f) << 6) + (p[2] & 0x3f); + ch = ((p[0] & 0x0f) << 12) + + ((p[1] & 0x3f) << 6) + + (p[2] & 0x3f); } break; - case ENC_UTF16LE: + } + case ENC_UTF16LE: { ch = p[1] << 8 | p[0]; break; - case ENC_UTF16BE: + } + case ENC_UTF16BE: { ch = p[0] << 8 | p[1]; break; - case ENC_UTF32LE: + } + case ENC_UTF32LE: { ch = (p[3] << 24) | (p[2] << 16) | (p[1] << 8) | p[0]; break; - case ENC_UTF32BE: + } + case ENC_UTF32BE: { ch = (p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3]; break; } } + } + Py_DECREF(obj); + if (!Py_UNICODE_IS_SURROGATE(ch)) { + goto bail; + } - Py_DECREF(object); - if (!Py_UNICODE_IS_SURROGATE(ch)) { - /* it's not a surrogate - fail */ - PyErr_SetObject(PyExceptionInstance_Class(exc), exc); - return NULL; - } - res = PyUnicode_FromOrdinal(ch); - if (res == NULL) - return NULL; - return Py_BuildValue("(Nn)", res, start + bytelength); + PyObject *res = PyUnicode_FromOrdinal(ch); + if (res == NULL) { + return NULL; + } + return Py_BuildValue("(Nn)", res, start + bytelength); + +bail: + PyErr_SetObject(PyExceptionInstance_Class(exc), exc); + return NULL; +} + + +/* This handler is declared static until someone demonstrates + a need to call it directly. */ +static PyObject * +PyCodec_SurrogatePassErrors(PyObject *exc) +{ + if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeEncodeError)) { + return _PyCodec_SurrogatePassUnicodeEncodeError(exc); + } + else if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeDecodeError)) { + return _PyCodec_SurrogatePassUnicodeDecodeError(exc); } else { wrong_exception_type(exc); @@ -1280,76 +1319,90 @@ PyCodec_SurrogatePassErrors(PyObject *exc) } } + static PyObject * -PyCodec_SurrogateEscapeErrors(PyObject *exc) +_PyCodec_SurrogateEscapeUnicodeEncodeError(PyObject *exc) { - PyObject *restuple; - PyObject *object; - Py_ssize_t i; - Py_ssize_t start; - Py_ssize_t end; - PyObject *res; + PyObject *obj; + Py_ssize_t start, end, slen; + if (_PyUnicodeError_GetParams(exc, + &obj, NULL, + &start, &end, &slen, false) < 0) + { + return NULL; + } - if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeEncodeError)) { - char *outp; - if (PyUnicodeEncodeError_GetStart(exc, &start)) - return NULL; - if (PyUnicodeEncodeError_GetEnd(exc, &end)) - return NULL; - if (!(object = PyUnicodeEncodeError_GetObject(exc))) - return NULL; - res = PyBytes_FromStringAndSize(NULL, end-start); - if (!res) { - Py_DECREF(object); + PyObject *res = PyBytes_FromStringAndSize(NULL, slen); + if (res == NULL) { + Py_DECREF(obj); + return NULL; + } + + char *outp = PyBytes_AsString(res); + for (Py_ssize_t i = start; i < end; i++) { + /* object is guaranteed to be "ready" */ + Py_UCS4 ch = PyUnicode_READ_CHAR(obj, i); + if (ch < 0xdc80 || ch > 0xdcff) { + Py_DECREF(obj); + Py_DECREF(res); + PyErr_SetObject(PyExceptionInstance_Class(exc), exc); return NULL; } - outp = PyBytes_AsString(res); - for (i = start; i < end; i++) { - /* object is guaranteed to be "ready" */ - Py_UCS4 ch = PyUnicode_READ_CHAR(object, i); - if (ch < 0xdc80 || ch > 0xdcff) { - /* Not a UTF-8b surrogate, fail with original exception */ - PyErr_SetObject(PyExceptionInstance_Class(exc), exc); - Py_DECREF(res); - Py_DECREF(object); - return NULL; - } - *outp++ = ch - 0xdc00; + *outp++ = ch - 0xdc00; + } + Py_DECREF(obj); + + return Py_BuildValue("(Nn)", res, end); +} + + +static PyObject * +_PyCodec_SurrogateEscapeUnicodeDecodeError(PyObject *exc) +{ + PyObject *obj; + Py_ssize_t start, end, slen; + if (_PyUnicodeError_GetParams(exc, + &obj, NULL, + &start, &end, &slen, true) < 0) + { + return NULL; + } + + Py_UCS2 ch[4]; /* decode up to 4 bad bytes. */ + int consumed = 0; + const unsigned char *p = (const unsigned char*)PyBytes_AS_STRING(obj); + while (consumed < 4 && consumed < slen) { + /* Refuse to escape ASCII bytes. */ + if (p[start + consumed] < 128) { + break; } - restuple = Py_BuildValue("(On)", res, end); - Py_DECREF(res); - Py_DECREF(object); - return restuple; + ch[consumed] = 0xdc00 + p[start + consumed]; + consumed++; + } + Py_DECREF(obj); + + if (consumed == 0) { + /* codec complained about ASCII byte. */ + PyErr_SetObject(PyExceptionInstance_Class(exc), exc); + return NULL; + } + + PyObject *str = PyUnicode_FromKindAndData(PyUnicode_2BYTE_KIND, ch, consumed); + if (str == NULL) { + return NULL; + } + return Py_BuildValue("(Nn)", str, start + consumed); +} + + +static PyObject * +PyCodec_SurrogateEscapeErrors(PyObject *exc) +{ + if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeEncodeError)) { + return _PyCodec_SurrogateEscapeUnicodeEncodeError(exc); } else if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeDecodeError)) { - PyObject *str; - const unsigned char *p; - Py_UCS2 ch[4]; /* decode up to 4 bad bytes. */ - int consumed = 0; - if (PyUnicodeDecodeError_GetStart(exc, &start)) - return NULL; - if (PyUnicodeDecodeError_GetEnd(exc, &end)) - return NULL; - if (!(object = PyUnicodeDecodeError_GetObject(exc))) - return NULL; - p = (const unsigned char*)PyBytes_AS_STRING(object); - while (consumed < 4 && consumed < end-start) { - /* Refuse to escape ASCII bytes. */ - if (p[start+consumed] < 128) - break; - ch[consumed] = 0xdc00 + p[start+consumed]; - consumed++; - } - Py_DECREF(object); - if (!consumed) { - /* codec complained about ASCII byte. */ - PyErr_SetObject(PyExceptionInstance_Class(exc), exc); - return NULL; - } - str = PyUnicode_FromKindAndData(PyUnicode_2BYTE_KIND, ch, consumed); - if (str == NULL) - return NULL; - return Py_BuildValue("(Nn)", str, start+consumed); + return _PyCodec_SurrogateEscapeUnicodeDecodeError(exc); } else { wrong_exception_type(exc); From 6e35c8da4d26b9014784a6b7f81283320ac298d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 21 Jan 2025 15:50:27 +0100 Subject: [PATCH 2/8] Apply suggestions from code review --- Python/codecs.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Python/codecs.c b/Python/codecs.c index 696ec932c02f48..911984d08c6ee3 100644 --- a/Python/codecs.c +++ b/Python/codecs.c @@ -1171,6 +1171,7 @@ _PyCodec_SurrogatePassUnicodeEncodeError(PyObject *exc) Py_UCS4 ch = PyUnicode_READ_CHAR(obj, i); if (!Py_UNICODE_IS_SURROGATE(ch)) { /* Not a surrogate, fail with original exception */ + Py_DECREF(obj); Py_DECREF(res); goto bail; } @@ -1208,13 +1209,11 @@ _PyCodec_SurrogatePassUnicodeEncodeError(PyObject *exc) } } - PyObject *restuple = Py_BuildValue("(On)", res, end); Py_DECREF(obj); - Py_DECREF(res); + PyObject *restuple = Py_BuildValue("(Nn)", res, end); return restuple; bail: - Py_XDECREF(obj); PyErr_SetObject(PyExceptionInstance_Class(exc), exc); return NULL; From 7e2f67790bf857080f6f6c1f670a3b2e19c89551 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 21 Jan 2025 15:52:01 +0100 Subject: [PATCH 3/8] Update Python/codecs.c --- Python/codecs.c | 1 - 1 file changed, 1 deletion(-) diff --git a/Python/codecs.c b/Python/codecs.c index 911984d08c6ee3..3b4077e5bed99d 100644 --- a/Python/codecs.c +++ b/Python/codecs.c @@ -1216,7 +1216,6 @@ _PyCodec_SurrogatePassUnicodeEncodeError(PyObject *exc) bail: PyErr_SetObject(PyExceptionInstance_Class(exc), exc); return NULL; - } From bf6031678a952312042c358c8b5090dc6fcd8143 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Wed, 22 Jan 2025 12:19:28 +0100 Subject: [PATCH 4/8] revert surrogate escape changes --- Python/codecs.c | 139 +++++++++++++++++++++--------------------------- 1 file changed, 62 insertions(+), 77 deletions(-) diff --git a/Python/codecs.c b/Python/codecs.c index 3b4077e5bed99d..ef3ac26bb5231f 100644 --- a/Python/codecs.c +++ b/Python/codecs.c @@ -1317,90 +1317,76 @@ PyCodec_SurrogatePassErrors(PyObject *exc) } } - static PyObject * -_PyCodec_SurrogateEscapeUnicodeEncodeError(PyObject *exc) +PyCodec_SurrogateEscapeErrors(PyObject *exc) { - PyObject *obj; - Py_ssize_t start, end, slen; - if (_PyUnicodeError_GetParams(exc, - &obj, NULL, - &start, &end, &slen, false) < 0) - { - return NULL; - } - - PyObject *res = PyBytes_FromStringAndSize(NULL, slen); - if (res == NULL) { - Py_DECREF(obj); - return NULL; - } + PyObject *restuple; + PyObject *object; + Py_ssize_t i; + Py_ssize_t start; + Py_ssize_t end; + PyObject *res; - char *outp = PyBytes_AsString(res); - for (Py_ssize_t i = start; i < end; i++) { - /* object is guaranteed to be "ready" */ - Py_UCS4 ch = PyUnicode_READ_CHAR(obj, i); - if (ch < 0xdc80 || ch > 0xdcff) { - Py_DECREF(obj); - Py_DECREF(res); - PyErr_SetObject(PyExceptionInstance_Class(exc), exc); + if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeEncodeError)) { + char *outp; + if (PyUnicodeEncodeError_GetStart(exc, &start)) + return NULL; + if (PyUnicodeEncodeError_GetEnd(exc, &end)) + return NULL; + if (!(object = PyUnicodeEncodeError_GetObject(exc))) + return NULL; + res = PyBytes_FromStringAndSize(NULL, end-start); + if (!res) { + Py_DECREF(object); return NULL; } - *outp++ = ch - 0xdc00; - } - Py_DECREF(obj); - - return Py_BuildValue("(Nn)", res, end); -} - - -static PyObject * -_PyCodec_SurrogateEscapeUnicodeDecodeError(PyObject *exc) -{ - PyObject *obj; - Py_ssize_t start, end, slen; - if (_PyUnicodeError_GetParams(exc, - &obj, NULL, - &start, &end, &slen, true) < 0) - { - return NULL; - } - - Py_UCS2 ch[4]; /* decode up to 4 bad bytes. */ - int consumed = 0; - const unsigned char *p = (const unsigned char*)PyBytes_AS_STRING(obj); - while (consumed < 4 && consumed < slen) { - /* Refuse to escape ASCII bytes. */ - if (p[start + consumed] < 128) { - break; + outp = PyBytes_AsString(res); + for (i = start; i < end; i++) { + /* object is guaranteed to be "ready" */ + Py_UCS4 ch = PyUnicode_READ_CHAR(object, i); + if (ch < 0xdc80 || ch > 0xdcff) { + /* Not a UTF-8b surrogate, fail with original exception */ + PyErr_SetObject(PyExceptionInstance_Class(exc), exc); + Py_DECREF(res); + Py_DECREF(object); + return NULL; + } + *outp++ = ch - 0xdc00; } - ch[consumed] = 0xdc00 + p[start + consumed]; - consumed++; - } - Py_DECREF(obj); - - if (consumed == 0) { - /* codec complained about ASCII byte. */ - PyErr_SetObject(PyExceptionInstance_Class(exc), exc); - return NULL; - } - - PyObject *str = PyUnicode_FromKindAndData(PyUnicode_2BYTE_KIND, ch, consumed); - if (str == NULL) { - return NULL; - } - return Py_BuildValue("(Nn)", str, start + consumed); -} - - -static PyObject * -PyCodec_SurrogateEscapeErrors(PyObject *exc) -{ - if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeEncodeError)) { - return _PyCodec_SurrogateEscapeUnicodeEncodeError(exc); + restuple = Py_BuildValue("(On)", res, end); + Py_DECREF(res); + Py_DECREF(object); + return restuple; } else if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeDecodeError)) { - return _PyCodec_SurrogateEscapeUnicodeDecodeError(exc); + PyObject *str; + const unsigned char *p; + Py_UCS2 ch[4]; /* decode up to 4 bad bytes. */ + int consumed = 0; + if (PyUnicodeDecodeError_GetStart(exc, &start)) + return NULL; + if (PyUnicodeDecodeError_GetEnd(exc, &end)) + return NULL; + if (!(object = PyUnicodeDecodeError_GetObject(exc))) + return NULL; + p = (const unsigned char*)PyBytes_AS_STRING(object); + while (consumed < 4 && consumed < end-start) { + /* Refuse to escape ASCII bytes. */ + if (p[start+consumed] < 128) + break; + ch[consumed] = 0xdc00 + p[start+consumed]; + consumed++; + } + Py_DECREF(object); + if (!consumed) { + /* codec complained about ASCII byte. */ + PyErr_SetObject(PyExceptionInstance_Class(exc), exc); + return NULL; + } + str = PyUnicode_FromKindAndData(PyUnicode_2BYTE_KIND, ch, consumed); + if (str == NULL) + return NULL; + return Py_BuildValue("(Nn)", str, start+consumed); } else { wrong_exception_type(exc); @@ -1408,7 +1394,6 @@ PyCodec_SurrogateEscapeErrors(PyObject *exc) } } - static PyObject *strict_errors(PyObject *self, PyObject *exc) { return PyCodec_StrictErrors(exc); From 56839430ddbda5764409eaaaf382375de67a4e32 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Wed, 22 Jan 2025 12:20:21 +0100 Subject: [PATCH 5/8] fixup --- Python/codecs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/Python/codecs.c b/Python/codecs.c index ef3ac26bb5231f..7962451d44b73d 100644 --- a/Python/codecs.c +++ b/Python/codecs.c @@ -1394,6 +1394,7 @@ PyCodec_SurrogateEscapeErrors(PyObject *exc) } } + static PyObject *strict_errors(PyObject *self, PyObject *exc) { return PyCodec_StrictErrors(exc); From 603a6ac095bcedb0776c61c704e834d6801cb8d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 9 Feb 2025 09:44:27 +0100 Subject: [PATCH 6/8] post-merge --- Python/codecs.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Python/codecs.c b/Python/codecs.c index 05d26df8cab1f1..dc0b4310cd3b6c 100644 --- a/Python/codecs.c +++ b/Python/codecs.c @@ -1166,6 +1166,8 @@ get_standard_encoding(PyObject *encoding, int *code, int *bytelength) } +// --- handler: 'surrogatepass' ----------------------------------------------- + static PyObject * _PyCodec_SurrogatePassUnicodeEncodeError(PyObject *exc) { @@ -1356,6 +1358,7 @@ PyCodec_SurrogatePassErrors(PyObject *exc) } } + static PyObject * PyCodec_SurrogateEscapeErrors(PyObject *exc) { @@ -1475,11 +1478,13 @@ namereplace_errors(PyObject *Py_UNUSED(self), PyObject *exc) } -static PyObject *surrogatepass_errors(PyObject *self, PyObject *exc) +static inline PyObject * +surrogatepass_errors(PyObject *Py_UNUSED(self), PyObject *exc) { return PyCodec_SurrogatePassErrors(exc); } + static PyObject *surrogateescape_errors(PyObject *self, PyObject *exc) { return PyCodec_SurrogateEscapeErrors(exc); From 1d55a54ecd4ab0c17a312a4478256b09e08ac948 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 9 Feb 2025 09:46:07 +0100 Subject: [PATCH 7/8] post-merge --- Python/codecs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Python/codecs.c b/Python/codecs.c index dc0b4310cd3b6c..b0262a527aa32f 100644 --- a/Python/codecs.c +++ b/Python/codecs.c @@ -1346,10 +1346,10 @@ _PyCodec_SurrogatePassUnicodeDecodeError(PyObject *exc) static PyObject * PyCodec_SurrogatePassErrors(PyObject *exc) { - if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeEncodeError)) { + if (_PyIsUnicodeEncodeError(exc)) { return _PyCodec_SurrogatePassUnicodeEncodeError(exc); } - else if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeDecodeError)) { + else if (!_PyIsUnicodeDecodeError(exc)) { return _PyCodec_SurrogatePassUnicodeDecodeError(exc); } else { From 79d71ef6624deb518649661a05fb3c9cea852d0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 9 Feb 2025 09:46:22 +0100 Subject: [PATCH 8/8] post-merge --- Python/codecs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/codecs.c b/Python/codecs.c index b0262a527aa32f..406d48b56ddae8 100644 --- a/Python/codecs.c +++ b/Python/codecs.c @@ -1349,7 +1349,7 @@ PyCodec_SurrogatePassErrors(PyObject *exc) if (_PyIsUnicodeEncodeError(exc)) { return _PyCodec_SurrogatePassUnicodeEncodeError(exc); } - else if (!_PyIsUnicodeDecodeError(exc)) { + else if (_PyIsUnicodeDecodeError(exc)) { return _PyCodec_SurrogatePassUnicodeDecodeError(exc); } else {