@@ -262,9 +262,7 @@ STRINGLIB(utf8_encoder)(PyObject *unicode,
262262 Py_ssize_t size ,
263263 const char * errors )
264264{
265- #define MAX_SHORT_UNICHARS 300 /* largest size we'll do on the stack */
266-
267- Py_ssize_t i ; /* index into s of next input byte */
265+ Py_ssize_t i ; /* index into data of next input character */
268266 char * p ; /* next free byte in output buffer */
269267#if STRINGLIB_SIZEOF_CHAR > 1
270268 PyObject * error_handler_obj = NULL ;
@@ -389,7 +387,7 @@ STRINGLIB(utf8_encoder)(PyObject *unicode,
389387 goto error ;
390388
391389 /* subtract preallocated bytes */
392- writer .min_size -= max_char_size ;
390+ writer .min_size -= max_char_size * ( newpos - startpos ) ;
393391
394392 if (PyBytes_Check (rep )) {
395393 p = _PyBytesWriter_WriteBytes (& writer , p ,
@@ -402,14 +400,12 @@ STRINGLIB(utf8_encoder)(PyObject *unicode,
402400 goto error ;
403401
404402 if (!PyUnicode_IS_ASCII (rep )) {
405- raise_encode_exception (& exc , "utf-8" ,
406- unicode ,
407- i - 1 , i ,
403+ raise_encode_exception (& exc , "utf-8" , unicode ,
404+ startpos , endpos ,
408405 "surrogates not allowed" );
409406 goto error ;
410407 }
411408
412- assert (PyUnicode_KIND (rep ) == PyUnicode_1BYTE_KIND );
413409 p = _PyBytesWriter_WriteBytes (& writer , p ,
414410 PyUnicode_DATA (rep ),
415411 PyUnicode_GET_LENGTH (rep ));
@@ -463,8 +459,6 @@ STRINGLIB(utf8_encoder)(PyObject *unicode,
463459 _PyBytesWriter_Dealloc (& writer );
464460 return NULL ;
465461#endif
466-
467- #undef MAX_SHORT_UNICHARS
468462}
469463
470464/* The pattern for constructing UCS2-repeated masks. */
0 commit comments