@@ -6599,6 +6599,7 @@ unicode_encode_ucs1(PyObject *unicode,
65996599 PyObject * error_handler_obj = NULL ;
66006600 PyObject * exc = NULL ;
66016601 _Py_error_handler error_handler = _Py_ERROR_UNKNOWN ;
6602+ PyObject * rep = NULL ;
66026603 /* output object */
66036604 _PyBytesWriter writer ;
66046605
@@ -6627,8 +6628,7 @@ unicode_encode_ucs1(PyObject *unicode,
66276628 ++ pos ;
66286629 }
66296630 else {
6630- PyObject * repunicode ;
6631- Py_ssize_t repsize , newpos , i ;
6631+ Py_ssize_t newpos , i ;
66326632 /* startpos for collecting unencodable chars */
66336633 Py_ssize_t collstart = pos ;
66346634 Py_ssize_t collend = collstart + 1 ;
@@ -6694,52 +6694,59 @@ unicode_encode_ucs1(PyObject *unicode,
66946694 /* fallback to general error handling */
66956695
66966696 default :
6697- repunicode = unicode_encode_call_errorhandler (errors , & error_handler_obj ,
6698- encoding , reason , unicode , & exc ,
6699- collstart , collend , & newpos );
6700- if (repunicode == NULL || (PyUnicode_Check (repunicode ) &&
6701- PyUnicode_READY (repunicode ) == -1 ))
6697+ rep = unicode_encode_call_errorhandler (errors , & error_handler_obj ,
6698+ encoding , reason , unicode , & exc ,
6699+ collstart , collend , & newpos );
6700+ if (rep == NULL )
67026701 goto onError ;
67036702
67046703 /* substract preallocated bytes */
67056704 writer .min_size -= 1 ;
67066705
6707- if (PyBytes_Check (repunicode )) {
6706+ if (PyBytes_Check (rep )) {
67086707 /* Directly copy bytes result to output. */
67096708 str = _PyBytesWriter_WriteBytes (& writer , str ,
6710- PyBytes_AS_STRING (repunicode ),
6711- PyBytes_GET_SIZE (repunicode ));
6709+ PyBytes_AS_STRING (rep ),
6710+ PyBytes_GET_SIZE (rep ));
67126711 if (str == NULL )
67136712 goto onError ;
6714-
6715- pos = newpos ;
6716- Py_DECREF (repunicode );
6717- break ;
67186713 }
6714+ else {
6715+ assert (PyUnicode_Check (rep ));
67196716
6720- /* need more space? (at least enough for what we
6721- have+the replacement+the rest of the string, so
6722- we won't have to check space for encodable characters) */
6723- repsize = PyUnicode_GET_LENGTH (repunicode );
6717+ if (PyUnicode_READY (rep ) < 0 )
6718+ goto onError ;
67246719
6725- str = _PyBytesWriter_Prepare (& writer , str , repsize );
6726- if (str == NULL )
6727- goto onError ;
6720+ if (PyUnicode_IS_ASCII (rep )) {
6721+ /* Fast path: all characters are smaller than limit */
6722+ assert (limit >= 128 );
6723+ assert (PyUnicode_KIND (rep ) == PyUnicode_1BYTE_KIND );
6724+ str = _PyBytesWriter_WriteBytes (& writer , str ,
6725+ PyUnicode_DATA (rep ),
6726+ PyUnicode_GET_LENGTH (rep ));
6727+ }
6728+ else {
6729+ Py_ssize_t repsize = PyUnicode_GET_LENGTH (rep );
67286730
6729- /* check if there is anything unencodable in the replacement
6730- and copy it to the output */
6731- for (i = 0 ; repsize -- > 0 ; ++ i , ++ str ) {
6732- ch = PyUnicode_READ_CHAR (repunicode , i );
6733- if (ch >= limit ) {
6734- raise_encode_exception (& exc , encoding , unicode ,
6735- pos , pos + 1 , reason );
6736- Py_DECREF (repunicode );
6737- goto onError ;
6731+ str = _PyBytesWriter_Prepare (& writer , str , repsize );
6732+ if (str == NULL )
6733+ goto onError ;
6734+
6735+ /* check if there is anything unencodable in the
6736+ replacement and copy it to the output */
6737+ for (i = 0 ; repsize -- > 0 ; ++ i , ++ str ) {
6738+ ch = PyUnicode_READ_CHAR (rep , i );
6739+ if (ch >= limit ) {
6740+ raise_encode_exception (& exc , encoding , unicode ,
6741+ pos , pos + 1 , reason );
6742+ goto onError ;
6743+ }
6744+ * str = (char )ch ;
6745+ }
67386746 }
6739- * str = (char )ch ;
67406747 }
67416748 pos = newpos ;
6742- Py_DECREF ( repunicode );
6749+ Py_CLEAR ( rep );
67436750 }
67446751
67456752 /* If overallocation was disabled, ensure that it was the last
@@ -6753,6 +6760,7 @@ unicode_encode_ucs1(PyObject *unicode,
67536760 return _PyBytesWriter_Finish (& writer , str );
67546761
67556762 onError :
6763+ Py_XDECREF (rep );
67566764 _PyBytesWriter_Dealloc (& writer );
67576765 Py_XDECREF (error_handler_obj );
67586766 Py_XDECREF (exc );
0 commit comments