File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -66,10 +66,11 @@ Copyright (c) Corporation for National Research Initiatives.
6666#error Must define Py_UNICODE_SIZE
6767#endif
6868
69- /* experimental UCS-4 support. enable at your own risk! */
70- #undef USE_UCS4_STORAGE
71- #if Py_UNICODE_SIZE == 4
72- #define USE_UCS4_STORAGE
69+ /* Setting Py_UNICODE_WIDE enables UCS-4 storage. Otherwise, Unicode
70+ strings are stored as UCS-2 (with limited support for UTF-16) */
71+
72+ #if Py_UNICODE_SIZE >= 4
73+ #define Py_UNICODE_WIDE
7374#endif
7475
7576/* Set these flags if the platform has "wchar.h", "wctype.h" and the
@@ -81,12 +82,12 @@ Copyright (c) Corporation for National Research Initiatives.
8182#ifndef PY_UNICODE_TYPE
8283
8384/* Windows has a usable wchar_t type (unless we're using UCS-4) */
84- # if defined(MS_WIN32 ) && !defined( USE_UCS4_STORAGE )
85+ # if defined(MS_WIN32 ) && Py_UNICODE_SIZE == 2
8586# define HAVE_USABLE_WCHAR_T
8687# define PY_UNICODE_TYPE wchar_t
8788# endif
8889
89- # if defined(USE_UCS4_STORAGE )
90+ # if defined(Py_UNICODE_WIDE )
9091# define PY_UNICODE_TYPE Py_UCS4
9192# endif
9293
Original file line number Diff line number Diff line change 1313
1414#include "sre_constants.h"
1515
16- /* size of a code word (must be unsigned short or larger) */
17- #ifdef USE_UCS4_STORAGE
16+ /* size of a code word (must be unsigned short or larger, and
17+ large enough to hold a Py_UNICODE character) */
18+ #ifdef Py_UNICODE_WIDE
1819#define SRE_CODE unsigned long
1920#else
2021#define SRE_CODE unsigned short
Original file line number Diff line number Diff line change @@ -68,7 +68,7 @@ Py_UNICODE _PyUnicode_ToTitlecase(register Py_UNICODE ch)
6868 else
6969 ch += ctype -> upper ;
7070
71- #ifdef USE_UCS4_STORAGE
71+ #ifdef Py_UNICODE_WIDE
7272 /* The database assumes that the values wrap around at 0x10000. */
7373 if (ch > 0x10000 )
7474 ch -= 0x10000 ;
@@ -360,7 +360,7 @@ Py_UNICODE _PyUnicode_ToUppercase(register Py_UNICODE ch)
360360 const _PyUnicode_TypeRecord * ctype = gettyperecord (ch );
361361
362362 ch += ctype -> upper ;
363- #ifdef USE_UCS4_STORAGE
363+ #ifdef Py_UNICODE_WIDE
364364 /* The database assumes that the values wrap around at 0x10000. */
365365 if (ch > 0x10000 )
366366 ch -= 0x10000 ;
@@ -376,7 +376,7 @@ Py_UNICODE _PyUnicode_ToLowercase(register Py_UNICODE ch)
376376 const _PyUnicode_TypeRecord * ctype = gettyperecord (ch );
377377
378378 ch += ctype -> lower ;
379- #ifdef USE_UCS4_STORAGE
379+ #ifdef Py_UNICODE_WIDE
380380 /* The database assumes that the values wrap around at 0x10000. */
381381 if (ch > 0x10000 )
382382 ch -= 0x10000 ;
Original file line number Diff line number Diff line change @@ -106,7 +106,7 @@ static char unicode_default_encoding[100];
106106Py_UNICODE
107107PyUnicode_GetMax ()
108108{
109- #ifdef USE_UCS4_STORAGE
109+ #ifdef Py_UNICODE_WIDE
110110 return 0x10FFFF ;
111111#else
112112 /* This is actually an illegal character, so it should
@@ -791,7 +791,7 @@ PyObject *PyUnicode_DecodeUTF8(const char *s,
791791 errmsg = "illegal encoding" ;
792792 goto utf8Error ;
793793 }
794- #if Py_UNICODE_SIZE == 4
794+ #ifdef Py_UNICODE_WIDE
795795 * p ++ = (Py_UNICODE )ch ;
796796#else
797797 /* compute and append the two surrogates: */
@@ -1080,7 +1080,7 @@ PyObject *PyUnicode_DecodeUTF16(const char *s,
10801080 ch2 = (ch2 >> 8 ) | (ch2 << 8 );
10811081#endif
10821082 if (0xDC00 <= ch2 && ch2 <= 0xDFFF ) {
1083- #if Py_UNICODE_SIZE == 2
1083+ #ifndef Py_UNICODE_WIDE
10841084 /* This is valid data (a UTF-16 surrogate pair), but
10851085 we are not able to store this information since our
10861086 Py_UNICODE type only has 16 bits... this might
@@ -1326,7 +1326,7 @@ PyObject *PyUnicode_DecodeUnicodeEscape(const char *s,
13261326 * p ++ = (Py_UNICODE ) chr ;
13271327 else if (chr <= 0x10ffff ) {
13281328 /* UCS-4 character. Either store directly, or as surrogate pair. */
1329- #if Py_UNICODE_SIZE == 4
1329+ #ifdef Py_UNICODE_WIDE
13301330 * p ++ = chr ;
13311331#else
13321332 chr -= 0x10000L ;
Original file line number Diff line number Diff line change @@ -325,7 +325,7 @@ builtin_unichr(PyObject *self, PyObject *args)
325325 return PyUnicode_FromUnicode (s , 1 );
326326 }
327327 else {
328- #if Py_UNICODE_SIZE == 2
328+ #ifndef Py_UNICODE_WIDE
329329 /* UCS-4 character. store as two surrogate characters */
330330 x -= 0x10000L ;
331331 s [0 ] = 0xD800 + (Py_UNICODE ) (x >> 10 );
You can’t perform that action at this time.
0 commit comments