@@ -4059,6 +4059,21 @@ make_decode_exception(PyObject **exceptionObject,
40594059}
40604060
40614061#ifdef MS_WINDOWS
4062+ static int
4063+ widechar_resize (wchar_t * * buf , Py_ssize_t * size , Py_ssize_t newsize )
4064+ {
4065+ if (newsize > * size ) {
4066+ wchar_t * newbuf = * buf ;
4067+ if (PyMem_Resize (newbuf , wchar_t , newsize ) == NULL ) {
4068+ PyErr_NoMemory ();
4069+ return -1 ;
4070+ }
4071+ * buf = newbuf ;
4072+ }
4073+ * size = newsize ;
4074+ return 0 ;
4075+ }
4076+
40624077/* error handling callback helper:
40634078 build arguments, call the callback and check the arguments,
40644079 if no exception occurred, copy the replacement to the output
@@ -4072,7 +4087,7 @@ unicode_decode_call_errorhandler_wchar(
40724087 const char * encoding , const char * reason ,
40734088 const char * * input , const char * * inend , Py_ssize_t * startinpos ,
40744089 Py_ssize_t * endinpos , PyObject * * exceptionObject , const char * * inptr ,
4075- PyObject * * output , Py_ssize_t * outpos )
4090+ wchar_t * * buf , Py_ssize_t * bufsize , Py_ssize_t * outpos )
40764091{
40774092 static const char * argparse = "Un;decoding error handler must return (str, int) tuple" ;
40784093
@@ -4086,9 +4101,6 @@ unicode_decode_call_errorhandler_wchar(
40864101 wchar_t * repwstr ;
40874102 Py_ssize_t repwlen ;
40884103
4089- assert (_PyUnicode_KIND (* output ) == PyUnicode_WCHAR_KIND );
4090- outsize = _PyUnicode_WSTR_LENGTH (* output );
4091-
40924104 if (* errorHandler == NULL ) {
40934105 * errorHandler = PyCodec_LookupError (errors );
40944106 if (* errorHandler == NULL )
@@ -4146,13 +4158,15 @@ unicode_decode_call_errorhandler_wchar(
41464158 if (requiredsize > PY_SSIZE_T_MAX - (insize - newpos ))
41474159 goto overflow ;
41484160 requiredsize += insize - newpos ;
4161+ outsize = * bufsize ;
41494162 if (requiredsize > outsize ) {
41504163 if (outsize <= PY_SSIZE_T_MAX /2 && requiredsize < 2 * outsize )
41514164 requiredsize = 2 * outsize ;
4152- if (unicode_resize ( output , requiredsize ) < 0 )
4165+ if (widechar_resize ( buf , bufsize , requiredsize ) < 0 ) {
41534166 goto onError ;
4167+ }
41544168 }
4155- wcsncpy (_PyUnicode_WSTR ( * output ) + * outpos , repwstr , repwlen );
4169+ wcsncpy (* buf + * outpos , repwstr , repwlen );
41564170 * outpos += repwlen ;
41574171 * endinpos = newpos ;
41584172 * inptr = * input + newpos ;
@@ -7146,7 +7160,8 @@ decode_code_page_flags(UINT code_page)
71467160 */
71477161static int
71487162decode_code_page_strict (UINT code_page ,
7149- PyObject * * v ,
7163+ wchar_t * * buf ,
7164+ Py_ssize_t * bufsize ,
71507165 const char * in ,
71517166 int insize )
71527167{
@@ -7160,21 +7175,12 @@ decode_code_page_strict(UINT code_page,
71607175 if (outsize <= 0 )
71617176 goto error ;
71627177
7163- if (* v == NULL ) {
7164- /* Create unicode object */
7165- /* FIXME: don't use _PyUnicode_New(), but allocate a wchar_t* buffer */
7166- * v = (PyObject * )_PyUnicode_New (outsize );
7167- if (* v == NULL )
7168- return -1 ;
7169- out = PyUnicode_AS_UNICODE (* v );
7170- }
7171- else {
7172- /* Extend unicode object */
7173- Py_ssize_t n = PyUnicode_GET_SIZE (* v );
7174- if (unicode_resize (v , n + outsize ) < 0 )
7175- return -1 ;
7176- out = PyUnicode_AS_UNICODE (* v ) + n ;
7178+ /* Extend a wchar_t* buffer */
7179+ Py_ssize_t n = * bufsize ; /* Get the current length */
7180+ if (widechar_resize (buf , bufsize , n + outsize ) < 0 ) {
7181+ return -1 ;
71777182 }
7183+ out = * buf + n ;
71787184
71797185 /* Do the conversion */
71807186 outsize = MultiByteToWideChar (code_page , flags , in , insize , out , outsize );
@@ -7198,7 +7204,8 @@ decode_code_page_strict(UINT code_page,
71987204 */
71997205static int
72007206decode_code_page_errors (UINT code_page ,
7201- PyObject * * v ,
7207+ wchar_t * * buf ,
7208+ Py_ssize_t * bufsize ,
72027209 const char * in , const int size ,
72037210 const char * errors , int final )
72047211{
@@ -7238,29 +7245,16 @@ decode_code_page_errors(UINT code_page,
72387245 goto error ;
72397246 }
72407247
7241- if (* v == NULL ) {
7242- /* Create unicode object */
7243- if (size > PY_SSIZE_T_MAX / (Py_ssize_t )Py_ARRAY_LENGTH (buffer )) {
7244- PyErr_NoMemory ();
7245- goto error ;
7246- }
7247- /* FIXME: don't use _PyUnicode_New(), but allocate a wchar_t* buffer */
7248- * v = (PyObject * )_PyUnicode_New (size * Py_ARRAY_LENGTH (buffer ));
7249- if (* v == NULL )
7250- goto error ;
7251- out = PyUnicode_AS_UNICODE (* v );
7248+ /* Extend a wchar_t* buffer */
7249+ Py_ssize_t n = * bufsize ; /* Get the current length */
7250+ if (size > (PY_SSIZE_T_MAX - n ) / (Py_ssize_t )Py_ARRAY_LENGTH (buffer )) {
7251+ PyErr_NoMemory ();
7252+ goto error ;
72527253 }
7253- else {
7254- /* Extend unicode object */
7255- Py_ssize_t n = PyUnicode_GET_SIZE (* v );
7256- if (size > (PY_SSIZE_T_MAX - n ) / (Py_ssize_t )Py_ARRAY_LENGTH (buffer )) {
7257- PyErr_NoMemory ();
7258- goto error ;
7259- }
7260- if (unicode_resize (v , n + size * Py_ARRAY_LENGTH (buffer )) < 0 )
7261- goto error ;
7262- out = PyUnicode_AS_UNICODE (* v ) + n ;
7254+ if (widechar_resize (buf , bufsize , n + size * Py_ARRAY_LENGTH (buffer )) < 0 ) {
7255+ goto error ;
72637256 }
7257+ out = * buf + n ;
72647258
72657259 /* Decode the byte string character per character */
72667260 while (in < endin )
@@ -7295,16 +7289,16 @@ decode_code_page_errors(UINT code_page,
72957289
72967290 startinpos = in - startin ;
72977291 endinpos = startinpos + 1 ;
7298- outpos = out - PyUnicode_AS_UNICODE ( * v ) ;
7292+ outpos = out - * buf ;
72997293 if (unicode_decode_call_errorhandler_wchar (
73007294 errors , & errorHandler ,
73017295 encoding , reason ,
73027296 & startin , & endin , & startinpos , & endinpos , & exc , & in ,
7303- v , & outpos ))
7297+ buf , bufsize , & outpos ))
73047298 {
73057299 goto error ;
73067300 }
7307- out = PyUnicode_AS_UNICODE ( * v ) + outpos ;
7301+ out = * buf + outpos ;
73087302 }
73097303 else {
73107304 in += insize ;
@@ -7313,14 +7307,9 @@ decode_code_page_errors(UINT code_page,
73137307 }
73147308 }
73157309
7316- /* write a NUL character at the end */
7317- * out = 0 ;
7318-
7319- /* Extend unicode object */
7320- outsize = out - PyUnicode_AS_UNICODE (* v );
7321- assert (outsize <= PyUnicode_WSTR_LENGTH (* v ));
7322- if (unicode_resize (v , outsize ) < 0 )
7323- goto error ;
7310+ /* Shrink the buffer */
7311+ assert (out - * buf <= * bufsize );
7312+ * bufsize = out - * buf ;
73247313 /* (in - startin) <= size and size is an int */
73257314 ret = Py_SAFE_DOWNCAST (in - startin , Py_ssize_t , int );
73267315
@@ -7336,7 +7325,8 @@ decode_code_page_stateful(int code_page,
73367325 const char * s , Py_ssize_t size ,
73377326 const char * errors , Py_ssize_t * consumed )
73387327{
7339- PyObject * v = NULL ;
7328+ wchar_t * buf = NULL ;
7329+ Py_ssize_t bufsize = 0 ;
73407330 int chunk_size , final , converted , done ;
73417331
73427332 if (code_page < 0 ) {
@@ -7368,21 +7358,21 @@ decode_code_page_stateful(int code_page,
73687358 }
73697359
73707360 if (chunk_size == 0 && done ) {
7371- if (v != NULL )
7361+ if (buf != NULL )
73727362 break ;
73737363 _Py_RETURN_UNICODE_EMPTY ();
73747364 }
73757365
7376- converted = decode_code_page_strict (code_page , & v ,
7366+ converted = decode_code_page_strict (code_page , & buf , & bufsize ,
73777367 s , chunk_size );
73787368 if (converted == -2 )
7379- converted = decode_code_page_errors (code_page , & v ,
7369+ converted = decode_code_page_errors (code_page , & buf , & bufsize ,
73807370 s , chunk_size ,
73817371 errors , final );
73827372 assert (converted != 0 || done );
73837373
73847374 if (converted < 0 ) {
7385- Py_XDECREF ( v );
7375+ PyMem_Free ( buf );
73867376 return NULL ;
73877377 }
73887378
@@ -7393,7 +7383,9 @@ decode_code_page_stateful(int code_page,
73937383 size -= converted ;
73947384 } while (!done );
73957385
7396- return unicode_result (v );
7386+ PyObject * v = PyUnicode_FromWideChar (buf , bufsize );
7387+ PyMem_Free (buf );
7388+ return v ;
73977389}
73987390
73997391PyObject *
0 commit comments