@@ -6103,6 +6103,11 @@ _PyUnicode_DecodeUnicodeInternal(const char *s,
61036103 while (s < end ) {
61046104 Py_UNICODE uch ;
61056105 Py_UCS4 ch ;
6106+ if (end - s < Py_UNICODE_SIZE ) {
6107+ endinpos = end - starts ;
6108+ reason = "truncated input" ;
6109+ goto error ;
6110+ }
61066111 /* We copy the raw representation one byte at a time because the
61076112 pointer may be unaligned (see test_codeccallbacks). */
61086113 ((char * ) & uch )[0 ] = s [0 ];
@@ -6112,37 +6117,18 @@ _PyUnicode_DecodeUnicodeInternal(const char *s,
61126117 ((char * ) & uch )[3 ] = s [3 ];
61136118#endif
61146119 ch = uch ;
6115-
6120+ #ifdef Py_UNICODE_WIDE
61166121 /* We have to sanity check the raw data, otherwise doom looms for
61176122 some malformed UCS-4 data. */
6118- if (
6119- #ifdef Py_UNICODE_WIDE
6120- ch > 0x10ffff ||
6121- #endif
6122- end - s < Py_UNICODE_SIZE
6123- )
6124- {
6125- startinpos = s - starts ;
6126- if (end - s < Py_UNICODE_SIZE ) {
6127- endinpos = end - starts ;
6128- reason = "truncated input" ;
6129- }
6130- else {
6131- endinpos = s - starts + Py_UNICODE_SIZE ;
6132- reason = "illegal code point (> 0x10FFFF)" ;
6133- }
6134- if (unicode_decode_call_errorhandler (
6135- errors , & errorHandler ,
6136- "unicode_internal" , reason ,
6137- & starts , & end , & startinpos , & endinpos , & exc , & s ,
6138- & v , & outpos ))
6139- goto onError ;
6140- continue ;
6123+ if (ch > 0x10ffff ) {
6124+ endinpos = s - starts + Py_UNICODE_SIZE ;
6125+ reason = "illegal code point (> 0x10FFFF)" ;
6126+ goto error ;
61416127 }
6142-
6128+ #endif
61436129 s += Py_UNICODE_SIZE ;
61446130#ifndef Py_UNICODE_WIDE
6145- if (Py_UNICODE_IS_HIGH_SURROGATE (ch ) && s < end )
6131+ if (Py_UNICODE_IS_HIGH_SURROGATE (ch ) && end - s >= Py_UNICODE_SIZE )
61466132 {
61476133 Py_UNICODE uch2 ;
61486134 ((char * ) & uch2 )[0 ] = s [0 ];
@@ -6157,6 +6143,16 @@ _PyUnicode_DecodeUnicodeInternal(const char *s,
61576143
61586144 if (unicode_putchar (& v , & outpos , ch ) < 0 )
61596145 goto onError ;
6146+ continue ;
6147+
6148+ error :
6149+ startinpos = s - starts ;
6150+ if (unicode_decode_call_errorhandler (
6151+ errors , & errorHandler ,
6152+ "unicode_internal" , reason ,
6153+ & starts , & end , & startinpos , & endinpos , & exc , & s ,
6154+ & v , & outpos ))
6155+ goto onError ;
61606156 }
61616157
61626158 if (unicode_resize (& v , outpos ) < 0 )
0 commit comments