@@ -379,19 +379,6 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content)
379379 if (ch > maxchar )
380380 maxchar = ch ;
381381 }
382- if (maxchar > 0x10FFFF ) {
383- printf ("Invalid Unicode string! {" );
384- for (i = 0 ; i < ascii -> length ; i ++ )
385- {
386- Py_UCS4 ch = PyUnicode_READ (kind , data , i );
387- if (i )
388- printf (", U+%04x" , ch );
389- else
390- printf ("U+%04x" , ch );
391- }
392- printf ("} (len=%lu)\n" , ascii -> length );
393- abort ();
394- }
395382 if (kind == PyUnicode_1BYTE_KIND ) {
396383 if (ascii -> state .ascii == 0 ) {
397384 assert (maxchar >= 128 );
@@ -406,7 +393,9 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content)
406393 }
407394 else {
408395 assert (maxchar >= 0x10000 );
409- assert (maxchar <= 0x10FFFF );
396+ /* FIXME: Issue #13441: on Solaris, localeconv() and strxfrm()
397+ return characters outside the range U+0000-U+10FFFF. */
398+ /* assert(maxchar <= 0x10FFFF); */
410399 }
411400 }
412401 return 1 ;
@@ -3482,6 +3471,7 @@ PyUnicode_AsUnicodeAndSize(PyObject *unicode, Py_ssize_t *size)
34823471 four_bytes = PyUnicode_4BYTE_DATA (unicode );
34833472 for (; four_bytes < ucs4_end ; ++ four_bytes , ++ w ) {
34843473 if (* four_bytes > 0xFFFF ) {
3474+ assert (* four_bytes <= 0x10FFFF );
34853475 /* encode surrogate pair in this case */
34863476 * w ++ = 0xD800 | ((* four_bytes - 0x10000 ) >> 10 );
34873477 * w = 0xDC00 | ((* four_bytes - 0x10000 ) & 0x3FF );
@@ -4128,6 +4118,8 @@ _PyUnicode_EncodeUTF7(PyObject *str,
41284118 continue ;
41294119encode_char :
41304120 if (ch >= 0x10000 ) {
4121+ assert (ch <= 0x10FFFF );
4122+
41314123 /* code first surrogate */
41324124 base64bits += 16 ;
41334125 base64buffer = (base64buffer << 16 ) | 0xd800 | ((ch - 0x10000 ) >> 10 );
@@ -4899,6 +4891,7 @@ _PyUnicode_AsUTF8String(PyObject *unicode, const char *errors)
48994891 * p ++ = (char )(0x80 | ((ch >> 6 ) & 0x3f ));
49004892 * p ++ = (char )(0x80 | (ch & 0x3f ));
49014893 } else /* ch >= 0x10000 */ {
4894+ assert (ch <= 0x10FFFF );
49024895 /* Encode UCS4 Unicode ordinals */
49034896 * p ++ = (char )(0xf0 | (ch >> 18 ));
49044897 * p ++ = (char )(0x80 | ((ch >> 12 ) & 0x3f ));
@@ -5971,6 +5964,7 @@ PyUnicode_AsUnicodeEscapeString(PyObject *unicode)
59715964
59725965 /* Map 21-bit characters to '\U00xxxxxx' */
59735966 else if (ch >= 0x10000 ) {
5967+ assert (ch <= 0x10FFFF );
59745968 * p ++ = '\\' ;
59755969 * p ++ = 'U' ;
59765970 * p ++ = Py_hexdigits [(ch >> 28 ) & 0x0000000F ];
@@ -6191,6 +6185,7 @@ PyUnicode_AsRawUnicodeEscapeString(PyObject *unicode)
61916185 Py_UCS4 ch = PyUnicode_READ (kind , data , pos );
61926186 /* Map 32-bit characters to '\Uxxxxxxxx' */
61936187 if (ch >= 0x10000 ) {
6188+ assert (ch <= 0x10FFFF );
61946189 * p ++ = '\\' ;
61956190 * p ++ = 'U' ;
61966191 * p ++ = Py_hexdigits [(ch >> 28 ) & 0xf ];
@@ -6546,17 +6541,14 @@ unicode_encode_ucs1(PyObject *unicode,
65466541 repsize += 2 + 3 + 1 ;
65476542 else if (ch < 10000 )
65486543 repsize += 2 + 4 + 1 ;
6549- #ifndef Py_UNICODE_WIDE
6550- else
6551- repsize += 2 + 5 + 1 ;
6552- #else
65536544 else if (ch < 100000 )
65546545 repsize += 2 + 5 + 1 ;
65556546 else if (ch < 1000000 )
65566547 repsize += 2 + 6 + 1 ;
6557- else
6548+ else {
6549+ assert (ch <= 0x10FFFF );
65586550 repsize += 2 + 7 + 1 ;
6559- #endif
6551+ }
65606552 }
65616553 requiredsize = respos + repsize + (size - collend );
65626554 if (requiredsize > ressize ) {
0 commit comments