Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 998c9cd

Browse files
Issue #28561: Clean up UTF-8 encoder: remove dead code, update comments, etc.
Patch by Xiang Zhang.
1 parent b7d14a0 commit 998c9cd

1 file changed

Lines changed: 4 additions & 10 deletions

File tree

Objects/stringlib/codecs.h

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -262,9 +262,7 @@ STRINGLIB(utf8_encoder)(PyObject *unicode,
262262
Py_ssize_t size,
263263
const char *errors)
264264
{
265-
#define MAX_SHORT_UNICHARS 300 /* largest size we'll do on the stack */
266-
267-
Py_ssize_t i; /* index into s of next input byte */
265+
Py_ssize_t i; /* index into data of next input character */
268266
char *p; /* next free byte in output buffer */
269267
#if STRINGLIB_SIZEOF_CHAR > 1
270268
PyObject *error_handler_obj = NULL;
@@ -389,7 +387,7 @@ STRINGLIB(utf8_encoder)(PyObject *unicode,
389387
goto error;
390388

391389
/* subtract preallocated bytes */
392-
writer.min_size -= max_char_size;
390+
writer.min_size -= max_char_size * (newpos - startpos);
393391

394392
if (PyBytes_Check(rep)) {
395393
p = _PyBytesWriter_WriteBytes(&writer, p,
@@ -402,14 +400,12 @@ STRINGLIB(utf8_encoder)(PyObject *unicode,
402400
goto error;
403401

404402
if (!PyUnicode_IS_ASCII(rep)) {
405-
raise_encode_exception(&exc, "utf-8",
406-
unicode,
407-
i-1, i,
403+
raise_encode_exception(&exc, "utf-8", unicode,
404+
startpos, endpos,
408405
"surrogates not allowed");
409406
goto error;
410407
}
411408

412-
assert(PyUnicode_KIND(rep) == PyUnicode_1BYTE_KIND);
413409
p = _PyBytesWriter_WriteBytes(&writer, p,
414410
PyUnicode_DATA(rep),
415411
PyUnicode_GET_LENGTH(rep));
@@ -463,8 +459,6 @@ STRINGLIB(utf8_encoder)(PyObject *unicode,
463459
_PyBytesWriter_Dealloc(&writer);
464460
return NULL;
465461
#endif
466-
467-
#undef MAX_SHORT_UNICHARS
468462
}
469463

470464
/* The pattern for constructing UCS2-repeated masks. */

0 commit comments

Comments
 (0)