@@ -3246,10 +3246,11 @@ decode_unicode(struct compiling *c, const char *s, size_t len, int rawmode, cons
32463246 u = NULL ;
32473247 } else {
32483248 /* check for integer overflow */
3249- if (len > PY_SIZE_MAX / 4 )
3249+ if (len > PY_SIZE_MAX / 6 )
32503250 return NULL ;
3251- /* "\XX" may become "\u005c\uHHLL" (12 bytes) */
3252- u = PyBytes_FromStringAndSize ((char * )NULL , len * 4 );
3251+ /* "ä" (2 bytes) may become "\U000000E4" (10 bytes), or 1:5
3252+ "\ä" (3 bytes) may become "\u005c\U000000E4" (16 bytes), or ~1:6 */
3253+ u = PyBytes_FromStringAndSize ((char * )NULL , len * 6 );
32533254 if (u == NULL )
32543255 return NULL ;
32553256 p = buf = PyBytes_AsString (u );
@@ -3266,20 +3267,24 @@ decode_unicode(struct compiling *c, const char *s, size_t len, int rawmode, cons
32663267 PyObject * w ;
32673268 char * r ;
32683269 Py_ssize_t rn , i ;
3269- w = decode_utf8 (c , & s , end , "utf-16 -be" );
3270+ w = decode_utf8 (c , & s , end , "utf-32 -be" );
32703271 if (w == NULL ) {
32713272 Py_DECREF (u );
32723273 return NULL ;
32733274 }
32743275 r = PyBytes_AS_STRING (w );
32753276 rn = Py_SIZE (w );
3276- assert (rn % 2 == 0 );
3277- for (i = 0 ; i < rn ; i += 2 ) {
3278- sprintf (p , "\\u %02x%02x" ,
3277+ assert (rn % 4 == 0 );
3278+ for (i = 0 ; i < rn ; i += 4 ) {
3279+ sprintf (p , "\\U%02x%02x %02x%02x" ,
32793280 r [i + 0 ] & 0xFF ,
3280- r [i + 1 ] & 0xFF );
3281- p += 6 ;
3281+ r [i + 1 ] & 0xFF ,
3282+ r [i + 2 ] & 0xFF ,
3283+ r [i + 3 ] & 0xFF );
3284+ p += 10 ;
32823285 }
3286+ /* Should be impossible to overflow */
3287+ assert (p - buf <= Py_SIZE (u ));
32833288 Py_DECREF (w );
32843289 } else {
32853290 * p ++ = * s ++ ;
0 commit comments