@@ -1269,17 +1269,19 @@ int PyUnicode_SetDefaultEncoding(const char *encoding)
12691269static
12701270int unicode_decode_call_errorhandler (const char * errors , PyObject * * errorHandler ,
12711271 const char * encoding , const char * reason ,
1272- const char * input , Py_ssize_t insize , Py_ssize_t * startinpos , Py_ssize_t * endinpos , PyObject * * exceptionObject , const char * * inptr ,
1272+ const char * * input , const char * * inend , Py_ssize_t * startinpos , Py_ssize_t * endinpos , PyObject * * exceptionObject , const char * * inptr ,
12731273 PyObject * * output , Py_ssize_t * outpos , Py_UNICODE * * outptr )
12741274{
12751275 static char * argparse = "O!n;decoding error handler must return (unicode, int) tuple" ;
12761276
12771277 PyObject * restuple = NULL ;
12781278 PyObject * repunicode = NULL ;
12791279 Py_ssize_t outsize = PyUnicode_GET_SIZE (* output );
1280+ Py_ssize_t insize ;
12801281 Py_ssize_t requiredsize ;
12811282 Py_ssize_t newpos ;
12821283 Py_UNICODE * repptr ;
1284+ PyObject * inputobj = NULL ;
12831285 Py_ssize_t repsize ;
12841286 int res = -1 ;
12851287
@@ -1291,7 +1293,7 @@ int unicode_decode_call_errorhandler(const char *errors, PyObject **errorHandler
12911293
12921294 if (* exceptionObject == NULL ) {
12931295 * exceptionObject = PyUnicodeDecodeError_Create (
1294- encoding , input , insize , * startinpos , * endinpos , reason );
1296+ encoding , * input , * inend - * input , * startinpos , * endinpos , reason );
12951297 if (* exceptionObject == NULL )
12961298 goto onError ;
12971299 }
@@ -1313,6 +1315,19 @@ int unicode_decode_call_errorhandler(const char *errors, PyObject **errorHandler
13131315 }
13141316 if (!PyArg_ParseTuple (restuple , argparse , & PyUnicode_Type , & repunicode , & newpos ))
13151317 goto onError ;
1318+
1319+ /* Copy back the bytes variables, which might have been modified by the
1320+ callback */
1321+ inputobj = PyUnicodeDecodeError_GetObject (* exceptionObject );
1322+ if (!inputobj )
1323+ goto onError ;
1324+ if (!PyBytes_Check (inputobj )) {
1325+ PyErr_Format (PyExc_TypeError , "exception attribute object must be bytes" );
1326+ }
1327+ * input = PyBytes_AS_STRING (inputobj );
1328+ insize = PyBytes_GET_SIZE (inputobj );
1329+ * inend = * input + insize ;
1330+
13161331 if (newpos < 0 )
13171332 newpos = insize + newpos ;
13181333 if (newpos < 0 || newpos > insize ) {
@@ -1335,10 +1350,11 @@ int unicode_decode_call_errorhandler(const char *errors, PyObject **errorHandler
13351350 * outptr = PyUnicode_AS_UNICODE (* output ) + * outpos ;
13361351 }
13371352 * endinpos = newpos ;
1338- * inptr = input + newpos ;
1353+ * inptr = * input + newpos ;
13391354 Py_UNICODE_COPY (* outptr , repptr , repsize );
13401355 * outptr += repsize ;
13411356 * outpos += repsize ;
1357+
13421358 /* we made it! */
13431359 res = 0 ;
13441360
@@ -1503,7 +1519,7 @@ PyObject *PyUnicode_DecodeUTF7(const char *s,
15031519 else if (SPECIAL (ch ,0 ,0 )) {
15041520 errmsg = "unexpected special character" ;
15051521 s ++ ;
1506- goto utf7Error ;
1522+ goto utf7Error ;
15071523 }
15081524 else {
15091525 * p ++ = ch ;
@@ -1516,7 +1532,7 @@ PyObject *PyUnicode_DecodeUTF7(const char *s,
15161532 if (unicode_decode_call_errorhandler (
15171533 errors , & errorHandler ,
15181534 "utf7" , errmsg ,
1519- starts , size , & startinpos , & endinpos , & exc , & s ,
1535+ & starts , & e , & startinpos , & endinpos , & exc , & s ,
15201536 (PyObject * * )& unicode , & outpos , & p ))
15211537 goto onError ;
15221538 }
@@ -1527,7 +1543,7 @@ PyObject *PyUnicode_DecodeUTF7(const char *s,
15271543 if (unicode_decode_call_errorhandler (
15281544 errors , & errorHandler ,
15291545 "utf7" , "unterminated shift sequence" ,
1530- starts , size , & startinpos , & endinpos , & exc , & s ,
1546+ & starts , & e , & startinpos , & endinpos , & exc , & s ,
15311547 (PyObject * * )& unicode , & outpos , & p ))
15321548 goto onError ;
15331549 if (s < e )
@@ -1848,7 +1864,7 @@ PyObject *PyUnicode_DecodeUTF8Stateful(const char *s,
18481864 if (unicode_decode_call_errorhandler (
18491865 errors , & errorHandler ,
18501866 "utf8" , errmsg ,
1851- starts , size , & startinpos , & endinpos , & exc , & s ,
1867+ & starts , & e , & startinpos , & endinpos , & exc , & s ,
18521868 (PyObject * * )& unicode , & outpos , & p ))
18531869 goto onError ;
18541870 }
@@ -2132,7 +2148,7 @@ PyUnicode_DecodeUTF16Stateful(const char *s,
21322148 if (unicode_decode_call_errorhandler (
21332149 errors , & errorHandler ,
21342150 "utf16" , errmsg ,
2135- starts , size , & startinpos , & endinpos , & exc , (const char * * )& q ,
2151+ & starts , ( const char * * ) & e , & startinpos , & endinpos , & exc , (const char * * )& q ,
21362152 (PyObject * * )& unicode , & outpos , & p ))
21372153 goto onError ;
21382154 }
@@ -2342,7 +2358,7 @@ PyObject *PyUnicode_DecodeUnicodeEscape(const char *s,
23422358 if (unicode_decode_call_errorhandler (
23432359 errors , & errorHandler ,
23442360 "unicodeescape" , "end of string in escape sequence" ,
2345- starts , size , & startinpos , & endinpos , & exc , & s ,
2361+ & starts , & end , & startinpos , & endinpos , & exc , & s ,
23462362 (PyObject * * )& v , & outpos , & p ))
23472363 goto onError ;
23482364 goto nextByte ;
@@ -2354,7 +2370,7 @@ PyObject *PyUnicode_DecodeUnicodeEscape(const char *s,
23542370 if (unicode_decode_call_errorhandler (
23552371 errors , & errorHandler ,
23562372 "unicodeescape" , message ,
2357- starts , size , & startinpos , & endinpos , & exc , & s ,
2373+ & starts , & end , & startinpos , & endinpos , & exc , & s ,
23582374 (PyObject * * )& v , & outpos , & p ))
23592375 goto onError ;
23602376 goto nextByte ;
@@ -2393,7 +2409,7 @@ PyObject *PyUnicode_DecodeUnicodeEscape(const char *s,
23932409 if (unicode_decode_call_errorhandler (
23942410 errors , & errorHandler ,
23952411 "unicodeescape" , "illegal Unicode character" ,
2396- starts , size , & startinpos , & endinpos , & exc , & s ,
2412+ & starts , & end , & startinpos , & endinpos , & exc , & s ,
23972413 (PyObject * * )& v , & outpos , & p ))
23982414 goto onError ;
23992415 }
@@ -2435,7 +2451,7 @@ PyObject *PyUnicode_DecodeUnicodeEscape(const char *s,
24352451 if (unicode_decode_call_errorhandler (
24362452 errors , & errorHandler ,
24372453 "unicodeescape" , message ,
2438- starts , size , & startinpos , & endinpos , & exc , & s ,
2454+ & starts , & end , & startinpos , & endinpos , & exc , & s ,
24392455 (PyObject * * )& v , & outpos , & p ))
24402456 goto onError ;
24412457 break ;
@@ -2449,7 +2465,7 @@ PyObject *PyUnicode_DecodeUnicodeEscape(const char *s,
24492465 if (unicode_decode_call_errorhandler (
24502466 errors , & errorHandler ,
24512467 "unicodeescape" , message ,
2452- starts , size , & startinpos , & endinpos , & exc , & s ,
2468+ & starts , & end , & startinpos , & endinpos , & exc , & s ,
24532469 (PyObject * * )& v , & outpos , & p ))
24542470 goto onError ;
24552471 }
@@ -2728,7 +2744,7 @@ PyObject *PyUnicode_DecodeRawUnicodeEscape(const char *s,
27282744 if (unicode_decode_call_errorhandler (
27292745 errors , & errorHandler ,
27302746 "rawunicodeescape" , "truncated \\uXXXX" ,
2731- starts , size , & startinpos , & endinpos , & exc , & s ,
2747+ & starts , & end , & startinpos , & endinpos , & exc , & s ,
27322748 (PyObject * * )& v , & outpos , & p ))
27332749 goto onError ;
27342750 goto nextByte ;
@@ -2746,7 +2762,7 @@ PyObject *PyUnicode_DecodeRawUnicodeEscape(const char *s,
27462762 if (unicode_decode_call_errorhandler (
27472763 errors , & errorHandler ,
27482764 "rawunicodeescape" , "\\Uxxxxxxxx out of range" ,
2749- starts , size , & startinpos , & endinpos , & exc , & s ,
2765+ & starts , & end , & startinpos , & endinpos , & exc , & s ,
27502766 (PyObject * * )& v , & outpos , & p ))
27512767 goto onError ;
27522768 }
@@ -2897,7 +2913,7 @@ PyObject *_PyUnicode_DecodeUnicodeInternal(const char *s,
28972913 if (unicode_decode_call_errorhandler (
28982914 errors , & errorHandler ,
28992915 "unicode_internal" , reason ,
2900- starts , size , & startinpos , & endinpos , & exc , & s ,
2916+ & starts , & end , & startinpos , & endinpos , & exc , & s ,
29012917 (PyObject * * )& v , & outpos , & p )) {
29022918 goto onError ;
29032919 }
@@ -3277,7 +3293,7 @@ PyObject *PyUnicode_DecodeASCII(const char *s,
32773293 if (unicode_decode_call_errorhandler (
32783294 errors , & errorHandler ,
32793295 "ascii" , "ordinal not in range(128)" ,
3280- starts , size , & startinpos , & endinpos , & exc , & s ,
3296+ & starts , & e , & startinpos , & endinpos , & exc , & s ,
32813297 (PyObject * * )& v , & outpos , & p ))
32823298 goto onError ;
32833299 }
@@ -3578,7 +3594,7 @@ PyObject *PyUnicode_DecodeCharmap(const char *s,
35783594 if (unicode_decode_call_errorhandler (
35793595 errors , & errorHandler ,
35803596 "charmap" , "character maps to <undefined>" ,
3581- starts , size , & startinpos , & endinpos , & exc , & s ,
3597+ & starts , & e , & startinpos , & endinpos , & exc , & s ,
35823598 (PyObject * * )& v , & outpos , & p )) {
35833599 goto onError ;
35843600 }
@@ -3628,7 +3644,7 @@ PyObject *PyUnicode_DecodeCharmap(const char *s,
36283644 if (unicode_decode_call_errorhandler (
36293645 errors , & errorHandler ,
36303646 "charmap" , "character maps to <undefined>" ,
3631- starts , size , & startinpos , & endinpos , & exc , & s ,
3647+ & starts , & e , & startinpos , & endinpos , & exc , & s ,
36323648 (PyObject * * )& v , & outpos , & p )) {
36333649 Py_DECREF (x );
36343650 goto onError ;
0 commit comments