#5127: Even on narrow unicode builds, the C functions that access the Unicode

amauryfa · amauryfa · commit 324ac65cebf4 · 2010-08-18T20:44:58.000Z
Database (Py_UNICODE_TOLOWER, Py_UNICODE_ISDECIMAL, and others) now accept
and return characters from the full Unicode range (Py_UCS4).

The differences from Python code are few:
- unicodedata.numeric(), unicodedata.decimal() and unicodedata.digit()
  now return the correct value for large code points
- repr() may consider more characters as printable.
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h
@@ -221,24 +221,6 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
 # define _PyUnicode_AsDefaultEncodedString _PyUnicodeUCS2_AsDefaultEncodedString
 # define _PyUnicode_Fini _PyUnicodeUCS2_Fini
 # define _PyUnicode_Init _PyUnicodeUCS2_Init
-# define _PyUnicode_IsAlpha _PyUnicodeUCS2_IsAlpha
-# define _PyUnicode_IsDecimalDigit _PyUnicodeUCS2_IsDecimalDigit
-# define _PyUnicode_IsDigit _PyUnicodeUCS2_IsDigit
-# define _PyUnicode_IsLinebreak _PyUnicodeUCS2_IsLinebreak
-# define _PyUnicode_IsLowercase _PyUnicodeUCS2_IsLowercase
-# define _PyUnicode_IsNumeric _PyUnicodeUCS2_IsNumeric
-# define _PyUnicode_IsPrintable _PyUnicodeUCS2_IsPrintable
-# define _PyUnicode_IsTitlecase _PyUnicodeUCS2_IsTitlecase
-# define _PyUnicode_IsXidStart _PyUnicodeUCS2_IsXidStart
-# define _PyUnicode_IsXidContinue _PyUnicodeUCS2_IsXidContinue
-# define _PyUnicode_IsUppercase _PyUnicodeUCS2_IsUppercase
-# define _PyUnicode_IsWhitespace _PyUnicodeUCS2_IsWhitespace
-# define _PyUnicode_ToDecimalDigit _PyUnicodeUCS2_ToDecimalDigit
-# define _PyUnicode_ToDigit _PyUnicodeUCS2_ToDigit
-# define _PyUnicode_ToLowercase _PyUnicodeUCS2_ToLowercase
-# define _PyUnicode_ToNumeric _PyUnicodeUCS2_ToNumeric
-# define _PyUnicode_ToTitlecase _PyUnicodeUCS2_ToTitlecase
-# define _PyUnicode_ToUppercase _PyUnicodeUCS2_ToUppercase
 
 #else
 
@@ -322,24 +304,6 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
 # define _PyUnicode_AsDefaultEncodedString _PyUnicodeUCS4_AsDefaultEncodedString
 # define _PyUnicode_Fini _PyUnicodeUCS4_Fini
 # define _PyUnicode_Init _PyUnicodeUCS4_Init
-# define _PyUnicode_IsAlpha _PyUnicodeUCS4_IsAlpha
-# define _PyUnicode_IsDecimalDigit _PyUnicodeUCS4_IsDecimalDigit
-# define _PyUnicode_IsDigit _PyUnicodeUCS4_IsDigit
-# define _PyUnicode_IsLinebreak _PyUnicodeUCS4_IsLinebreak
-# define _PyUnicode_IsLowercase _PyUnicodeUCS4_IsLowercase
-# define _PyUnicode_IsNumeric _PyUnicodeUCS4_IsNumeric
-# define _PyUnicode_IsPrintable _PyUnicodeUCS4_IsPrintable
-# define _PyUnicode_IsTitlecase _PyUnicodeUCS4_IsTitlecase
-# define _PyUnicode_IsXidStart _PyUnicodeUCS4_IsXidStart
-# define _PyUnicode_IsXidContinue _PyUnicodeUCS4_IsXidContinue
-# define _PyUnicode_IsUppercase _PyUnicodeUCS4_IsUppercase
-# define _PyUnicode_IsWhitespace _PyUnicodeUCS4_IsWhitespace
-# define _PyUnicode_ToDecimalDigit _PyUnicodeUCS4_ToDecimalDigit
-# define _PyUnicode_ToDigit _PyUnicodeUCS4_ToDigit
-# define _PyUnicode_ToLowercase _PyUnicodeUCS4_ToLowercase
-# define _PyUnicode_ToNumeric _PyUnicodeUCS4_ToNumeric
-# define _PyUnicode_ToTitlecase _PyUnicodeUCS4_ToTitlecase
-# define _PyUnicode_ToUppercase _PyUnicodeUCS4_ToUppercase
 
 
 #endif
@@ -351,7 +315,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
    configure Python using --with-wctype-functions.  This reduces the
    interpreter's code size. */
 
-#if defined(HAVE_USABLE_WCHAR_T) && defined(WANT_WCTYPE_FUNCTIONS)
+#if defined(Py_UNICODE_WIDE) && defined(HAVE_USABLE_WCHAR_T) && defined(WANT_WCTYPE_FUNCTIONS)
 
 #include <wctype.h>
 
@@ -1542,75 +1506,75 @@ PyAPI_DATA(const unsigned char) _Py_ascii_whitespace[];
 */
 
 PyAPI_FUNC(int) _PyUnicode_IsLowercase(
-    Py_UNICODE ch       /* Unicode character */
+    Py_UCS4 ch       /* Unicode character */
     );
 
 PyAPI_FUNC(int) _PyUnicode_IsUppercase(
-    Py_UNICODE ch       /* Unicode character */
+    Py_UCS4 ch       /* Unicode character */
     );
 
 PyAPI_FUNC(int) _PyUnicode_IsTitlecase(
-    Py_UNICODE ch       /* Unicode character */
+    Py_UCS4 ch       /* Unicode character */
     );
 
 PyAPI_FUNC(int) _PyUnicode_IsXidStart(
-    Py_UNICODE ch       /* Unicode character */
+    Py_UCS4 ch       /* Unicode character */
     );
 
 PyAPI_FUNC(int) _PyUnicode_IsXidContinue(
-    Py_UNICODE ch       /* Unicode character */
+    Py_UCS4 ch       /* Unicode character */
     );
 
 PyAPI_FUNC(int) _PyUnicode_IsWhitespace(
-    const Py_UNICODE ch         /* Unicode character */
+    const Py_UCS4 ch         /* Unicode character */
     );
 
 PyAPI_FUNC(int) _PyUnicode_IsLinebreak(
-    const Py_UNICODE ch         /* Unicode character */
+    const Py_UCS4 ch         /* Unicode character */
     );
 
-PyAPI_FUNC(Py_UNICODE) _PyUnicode_ToLowercase(
-    Py_UNICODE ch       /* Unicode character */
+PyAPI_FUNC(Py_UCS4) _PyUnicode_ToLowercase(
+    Py_UCS4 ch       /* Unicode character */
     );
 
-PyAPI_FUNC(Py_UNICODE) _PyUnicode_ToUppercase(
-    Py_UNICODE ch       /* Unicode character */
+PyAPI_FUNC(Py_UCS4) _PyUnicode_ToUppercase(
+    Py_UCS4 ch       /* Unicode character */
     );
 
-PyAPI_FUNC(Py_UNICODE) _PyUnicode_ToTitlecase(
-    Py_UNICODE ch       /* Unicode character */
+PyAPI_FUNC(Py_UCS4) _PyUnicode_ToTitlecase(
+    Py_UCS4 ch       /* Unicode character */
     );
 
 PyAPI_FUNC(int) _PyUnicode_ToDecimalDigit(
-    Py_UNICODE ch       /* Unicode character */
+    Py_UCS4 ch       /* Unicode character */
     );
 
 PyAPI_FUNC(int) _PyUnicode_ToDigit(
-    Py_UNICODE ch       /* Unicode character */
+    Py_UCS4 ch       /* Unicode character */
     );
 
 PyAPI_FUNC(double) _PyUnicode_ToNumeric(
-    Py_UNICODE ch       /* Unicode character */
+    Py_UCS4 ch       /* Unicode character */
     );
 
 PyAPI_FUNC(int) _PyUnicode_IsDecimalDigit(
-    Py_UNICODE ch       /* Unicode character */
+    Py_UCS4 ch       /* Unicode character */
     );
 
 PyAPI_FUNC(int) _PyUnicode_IsDigit(
-    Py_UNICODE ch       /* Unicode character */
+    Py_UCS4 ch       /* Unicode character */
     );
 
 PyAPI_FUNC(int) _PyUnicode_IsNumeric(
-    Py_UNICODE ch       /* Unicode character */
+    Py_UCS4 ch       /* Unicode character */
     );
 
 PyAPI_FUNC(int) _PyUnicode_IsPrintable(
-    Py_UNICODE ch       /* Unicode character */
+    Py_UCS4 ch       /* Unicode character */
     );
 
 PyAPI_FUNC(int) _PyUnicode_IsAlpha(
-    Py_UNICODE ch       /* Unicode character */
+    Py_UCS4 ch       /* Unicode character */
     );
 
 PyAPI_FUNC(size_t) Py_UNICODE_strlen(
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py
@@ -1353,6 +1353,10 @@ def __repr__(self):
         self.assertEqual(repr(s1()), '\\n')
         self.assertEqual(repr(s2()), '\\n')
 
+    def test_printable_repr(self):
+        self.assertEqual(repr('\U00010000'), "'%c'" % (0x10000,)) # printable
+        self.assertEqual(repr('\U00011000'), "'\\U00011000'")     # nonprintable
+
     def test_expandtabs_overflows_gracefully(self):
         # This test only affects 32-bit platforms because expandtabs can only take
         # an int as the max value, not a 64-bit C long.  If expandtabs is changed
diff --git a/Lib/test/test_unicodedata.py b/Lib/test/test_unicodedata.py
@@ -294,6 +294,12 @@ def test_linebreak_7643(self):
                 self.assertEqual(len(lines), 1,
                                  r"\u%.4x should not be a linebreak" % i)
 
+    def test_UCS4(self):
+        # unicodedata should work with code points outside the BMP
+        # even on a narrow Unicode build
+        self.assertEqual(self.db.category(u"\U0001012A"), "No")
+        self.assertEqual(self.db.numeric(u"\U0001012A"), 9000)
+
 def test_main():
     test.support.run_unittest(
         UnicodeMiscTest,
diff --git a/Misc/NEWS b/Misc/NEWS
@@ -12,6 +12,12 @@ What's New in Python 3.2 Alpha 2?
 Core and Builtins
 -----------------
 
+- Issue #5127: The C functions that access the Unicode Database now accept and
+  return characters from the full Unicode range, even on narrow unicode builds
+  (Py_UNICODE_TOLOWER, Py_UNICODE_ISDECIMAL, and others).  A visible difference
+  in Python is that unicodedata.numeric() now returns the correct value for
+  large code points, and repr() may consider more characters as printable.
+
 - Issue #9425: Create PyModule_GetFilenameObject() function to get the filename
   as a unicode object, instead of a byte string. Function needed to support
   unencodable filenames. Deprecate PyModule_GetFilename() in favor on the new
diff --git a/Objects/unicodectype.c b/Objects/unicodectype.c
@@ -26,9 +26,9 @@
 #define NUMERIC_MASK 0x1000
 
 typedef struct {
-    const Py_UNICODE upper;
-    const Py_UNICODE lower;
-    const Py_UNICODE title;
+    const Py_UCS4 upper;
+    const Py_UCS4 lower;
+    const Py_UCS4 title;
     const unsigned char decimal;
     const unsigned char digit;
     const unsigned short flags;
@@ -37,15 +37,13 @@ typedef struct {
 #include "unicodetype_db.h"
 
 static const _PyUnicode_TypeRecord *
-gettyperecord(Py_UNICODE code)
+gettyperecord(Py_UCS4 code)
 {
     int index;
 
-#ifdef Py_UNICODE_WIDE
     if (code >= 0x110000)
         index = 0;
     else
-#endif
     {
         index = index1[(code>>SHIFT)];
         index = index2[(index<<SHIFT)+(code&((1<<SHIFT)-1))];
@@ -57,7 +55,7 @@ gettyperecord(Py_UNICODE code)
 /* Returns the titlecase Unicode characters corresponding to ch or just
    ch if no titlecase mapping is known. */
 
-Py_UNICODE _PyUnicode_ToTitlecase(register Py_UNICODE ch)
+Py_UCS4 _PyUnicode_ToTitlecase(register Py_UCS4 ch)
 {
     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
     int delta = ctype->title;
@@ -74,7 +72,7 @@ Py_UNICODE _PyUnicode_ToTitlecase(register Py_UNICODE ch)
 /* Returns 1 for Unicode characters having the category 'Lt', 0
    otherwise. */
 
-int _PyUnicode_IsTitlecase(Py_UNICODE ch)
+int _PyUnicode_IsTitlecase(Py_UCS4 ch)
 {
     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
 
@@ -84,7 +82,7 @@ int _PyUnicode_IsTitlecase(Py_UNICODE ch)
 /* Returns 1 for Unicode characters having the XID_Start property, 0
    otherwise. */
 
-int _PyUnicode_IsXidStart(Py_UNICODE ch)
+int _PyUnicode_IsXidStart(Py_UCS4 ch)
 {
     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
 
@@ -94,7 +92,7 @@ int _PyUnicode_IsXidStart(Py_UNICODE ch)
 /* Returns 1 for Unicode characters having the XID_Continue property,
    0 otherwise. */
 
-int _PyUnicode_IsXidContinue(Py_UNICODE ch)
+int _PyUnicode_IsXidContinue(Py_UCS4 ch)
 {
     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
 
@@ -104,14 +102,14 @@ int _PyUnicode_IsXidContinue(Py_UNICODE ch)
 /* Returns the integer decimal (0-9) for Unicode characters having
    this property, -1 otherwise. */
 
-int _PyUnicode_ToDecimalDigit(Py_UNICODE ch)
+int _PyUnicode_ToDecimalDigit(Py_UCS4 ch)
 {
     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
 
     return (ctype->flags & DECIMAL_MASK) ? ctype->decimal : -1;
 }
 
-int _PyUnicode_IsDecimalDigit(Py_UNICODE ch)
+int _PyUnicode_IsDecimalDigit(Py_UCS4 ch)
 {
     if (_PyUnicode_ToDecimalDigit(ch) < 0)
         return 0;
@@ -121,14 +119,14 @@ int _PyUnicode_IsDecimalDigit(Py_UNICODE ch)
 /* Returns the integer digit (0-9) for Unicode characters having
    this property, -1 otherwise. */
 
-int _PyUnicode_ToDigit(Py_UNICODE ch)
+int _PyUnicode_ToDigit(Py_UCS4 ch)
 {
     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
 
     return (ctype->flags & DIGIT_MASK) ? ctype->digit : -1;
 }
 
-int _PyUnicode_IsDigit(Py_UNICODE ch)
+int _PyUnicode_IsDigit(Py_UCS4 ch)
 {
     if (_PyUnicode_ToDigit(ch) < 0)
         return 0;
@@ -138,7 +136,7 @@ int _PyUnicode_IsDigit(Py_UNICODE ch)
 /* Returns the numeric value as double for Unicode characters having
    this property, -1.0 otherwise. */
 
-int _PyUnicode_IsNumeric(Py_UNICODE ch)
+int _PyUnicode_IsNumeric(Py_UCS4 ch)
 {
     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
 
@@ -158,7 +156,7 @@ int _PyUnicode_IsNumeric(Py_UNICODE ch)
       * Zp Separator, Paragraph ('\u2029', PARAGRAPH SEPARATOR)
       * Zs (Separator, Space) other than ASCII space('\x20').
 */
-int _PyUnicode_IsPrintable(Py_UNICODE ch)
+int _PyUnicode_IsPrintable(Py_UCS4 ch)
 {
     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
 
@@ -170,7 +168,7 @@ int _PyUnicode_IsPrintable(Py_UNICODE ch)
 /* Returns 1 for Unicode characters having the category 'Ll', 0
    otherwise. */
 
-int _PyUnicode_IsLowercase(Py_UNICODE ch)
+int _PyUnicode_IsLowercase(Py_UCS4 ch)
 {
     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
 
@@ -180,7 +178,7 @@ int _PyUnicode_IsLowercase(Py_UNICODE ch)
 /* Returns 1 for Unicode characters having the category 'Lu', 0
    otherwise. */
 
-int _PyUnicode_IsUppercase(Py_UNICODE ch)
+int _PyUnicode_IsUppercase(Py_UCS4 ch)
 {
     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
 
@@ -190,7 +188,7 @@ int _PyUnicode_IsUppercase(Py_UNICODE ch)
 /* Returns the uppercase Unicode characters corresponding to ch or just
    ch if no uppercase mapping is known. */
 
-Py_UNICODE _PyUnicode_ToUppercase(Py_UNICODE ch)
+Py_UCS4 _PyUnicode_ToUppercase(Py_UCS4 ch)
 {
     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
     int delta = ctype->upper;
@@ -204,7 +202,7 @@ Py_UNICODE _PyUnicode_ToUppercase(Py_UNICODE ch)
 /* Returns the lowercase Unicode characters corresponding to ch or just
    ch if no lowercase mapping is known. */
 
-Py_UNICODE _PyUnicode_ToLowercase(Py_UNICODE ch)
+Py_UCS4 _PyUnicode_ToLowercase(Py_UCS4 ch)
 {
     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
     int delta = ctype->lower;
@@ -218,7 +216,7 @@ Py_UNICODE _PyUnicode_ToLowercase(Py_UNICODE ch)
 /* Returns 1 for Unicode characters having the category 'Ll', 'Lu', 'Lt',
    'Lo' or 'Lm',  0 otherwise. */
 
-int _PyUnicode_IsAlpha(Py_UNICODE ch)
+int _PyUnicode_IsAlpha(Py_UCS4 ch)
 {
     const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
 
@@ -230,27 +228,27 @@ int _PyUnicode_IsAlpha(Py_UNICODE ch)
 /* Export the interfaces using the wchar_t type for portability
    reasons:  */
 
-int _PyUnicode_IsLowercase(Py_UNICODE ch)
+int _PyUnicode_IsLowercase(Py_UCS4 ch)
 {
     return iswlower(ch);
 }
 
-int _PyUnicode_IsUppercase(Py_UNICODE ch)
+int _PyUnicode_IsUppercase(Py_UCS4 ch)
 {
     return iswupper(ch);
 }
 
-Py_UNICODE _PyUnicode_ToLowercase(Py_UNICODE ch)
+Py_UCS4 _PyUnicode_ToLowercase(Py_UCS4 ch)
 {
     return towlower(ch);
 }
 
-Py_UNICODE _PyUnicode_ToUppercase(Py_UNICODE ch)
+Py_UCS4 _PyUnicode_ToUppercase(Py_UCS4 ch)
 {
     return towupper(ch);
 }
 
-int _PyUnicode_IsAlpha(Py_UNICODE ch)
+int _PyUnicode_IsAlpha(Py_UCS4 ch)
 {
     return iswalpha(ch);
 }
diff --git a/Objects/unicodetype_db.h b/Objects/unicodetype_db.h
diff --git a/Tools/unicode/makeunicodedata.py b/Tools/unicode/makeunicodedata.py

Original file line number	Diff line number	Diff line change
`@@ -26,9 +26,9 @@`
`26`	`26`	`#define NUMERIC_MASK 0x1000`
`27`	`27`
`28`	`28`	`typedef struct {`
`29`		`- const Py_UNICODE upper;`
`30`		`- const Py_UNICODE lower;`
`31`		`- const Py_UNICODE title;`
	`29`	`+ const Py_UCS4 upper;`
	`30`	`+ const Py_UCS4 lower;`
	`31`	`+ const Py_UCS4 title;`
`32`	`32`	`const unsigned char decimal;`
`33`	`33`	`const unsigned char digit;`
`34`	`34`	`const unsigned short flags;`
`@@ -37,15 +37,13 @@ typedef struct {`
`37`	`37`	`#include "unicodetype_db.h"`
`38`	`38`
`39`	`39`	`static const _PyUnicode_TypeRecord *`
`40`		`-gettyperecord(Py_UNICODE code)`
	`40`	`+gettyperecord(Py_UCS4 code)`
`41`	`41`	`{`
`42`	`42`	`int index;`
`43`	`43`
`44`		`-#ifdef Py_UNICODE_WIDE`
`45`	`44`	`if (code >= 0x110000)`
`46`	`45`	`index = 0;`
`47`	`46`	`else`
`48`		`-#endif`
`49`	`47`	`{`
`50`	`48`	`index = index1[(code>>SHIFT)];`
`51`	`49`	`index = index2[(index<<SHIFT)+(code&((1<<SHIFT)-1))];`
`@@ -57,7 +55,7 @@ gettyperecord(Py_UNICODE code)`
`57`	`55`	`/* Returns the titlecase Unicode characters corresponding to ch or just`
`58`	`56`	`ch if no titlecase mapping is known. */`
`59`	`57`
`60`		`-Py_UNICODE _PyUnicode_ToTitlecase(register Py_UNICODE ch)`
	`58`	`+Py_UCS4 _PyUnicode_ToTitlecase(register Py_UCS4 ch)`
`61`	`59`	`{`
`62`	`60`	`const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);`
`63`	`61`	`int delta = ctype->title;`
`@@ -74,7 +72,7 @@ Py_UNICODE _PyUnicode_ToTitlecase(register Py_UNICODE ch)`
`74`	`72`	`/* Returns 1 for Unicode characters having the category 'Lt', 0`
`75`	`73`	`otherwise. */`
`76`	`74`
`77`		`-int _PyUnicode_IsTitlecase(Py_UNICODE ch)`
	`75`	`+int _PyUnicode_IsTitlecase(Py_UCS4 ch)`
`78`	`76`	`{`
`79`	`77`	`const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);`
`80`	`78`
`@@ -84,7 +82,7 @@ int _PyUnicode_IsTitlecase(Py_UNICODE ch)`
`84`	`82`	`/* Returns 1 for Unicode characters having the XID_Start property, 0`
`85`	`83`	`otherwise. */`
`86`	`84`
`87`		`-int _PyUnicode_IsXidStart(Py_UNICODE ch)`
	`85`	`+int _PyUnicode_IsXidStart(Py_UCS4 ch)`
`88`	`86`	`{`
`89`	`87`	`const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);`
`90`	`88`
`@@ -94,7 +92,7 @@ int _PyUnicode_IsXidStart(Py_UNICODE ch)`
`94`	`92`	`/* Returns 1 for Unicode characters having the XID_Continue property,`
`95`	`93`	`0 otherwise. */`
`96`	`94`
`97`		`-int _PyUnicode_IsXidContinue(Py_UNICODE ch)`
	`95`	`+int _PyUnicode_IsXidContinue(Py_UCS4 ch)`
`98`	`96`	`{`
`99`	`97`	`const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);`
`100`	`98`
`@@ -104,14 +102,14 @@ int _PyUnicode_IsXidContinue(Py_UNICODE ch)`
`104`	`102`	`/* Returns the integer decimal (0-9) for Unicode characters having`
`105`	`103`	`this property, -1 otherwise. */`
`106`	`104`
`107`		`-int _PyUnicode_ToDecimalDigit(Py_UNICODE ch)`
	`105`	`+int _PyUnicode_ToDecimalDigit(Py_UCS4 ch)`
`108`	`106`	`{`
`109`	`107`	`const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);`
`110`	`108`
`111`	`109`	`return (ctype->flags & DECIMAL_MASK) ? ctype->decimal : -1;`
`112`	`110`	`}`
`113`	`111`
`114`		`-int _PyUnicode_IsDecimalDigit(Py_UNICODE ch)`
	`112`	`+int _PyUnicode_IsDecimalDigit(Py_UCS4 ch)`
`115`	`113`	`{`
`116`	`114`	`if (_PyUnicode_ToDecimalDigit(ch) < 0)`
`117`	`115`	`return 0;`
`@@ -121,14 +119,14 @@ int _PyUnicode_IsDecimalDigit(Py_UNICODE ch)`
`121`	`119`	`/* Returns the integer digit (0-9) for Unicode characters having`
`122`	`120`	`this property, -1 otherwise. */`
`123`	`121`
`124`		`-int _PyUnicode_ToDigit(Py_UNICODE ch)`
	`122`	`+int _PyUnicode_ToDigit(Py_UCS4 ch)`
`125`	`123`	`{`
`126`	`124`	`const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);`
`127`	`125`
`128`	`126`	`return (ctype->flags & DIGIT_MASK) ? ctype->digit : -1;`
`129`	`127`	`}`
`130`	`128`
`131`		`-int _PyUnicode_IsDigit(Py_UNICODE ch)`
	`129`	`+int _PyUnicode_IsDigit(Py_UCS4 ch)`
`132`	`130`	`{`
`133`	`131`	`if (_PyUnicode_ToDigit(ch) < 0)`
`134`	`132`	`return 0;`
`@@ -138,7 +136,7 @@ int _PyUnicode_IsDigit(Py_UNICODE ch)`
`138`	`136`	`/* Returns the numeric value as double for Unicode characters having`
`139`	`137`	`this property, -1.0 otherwise. */`
`140`	`138`
`141`		`-int _PyUnicode_IsNumeric(Py_UNICODE ch)`
	`139`	`+int _PyUnicode_IsNumeric(Py_UCS4 ch)`
`142`	`140`	`{`
`143`	`141`	`const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);`
`144`	`142`
`@@ -158,7 +156,7 @@ int _PyUnicode_IsNumeric(Py_UNICODE ch)`
`158`	`156`	`* Zp Separator, Paragraph ('\u2029', PARAGRAPH SEPARATOR)`
`159`	`157`	`* Zs (Separator, Space) other than ASCII space('\x20').`
`160`	`158`	`*/`
`161`		`-int _PyUnicode_IsPrintable(Py_UNICODE ch)`
	`159`	`+int _PyUnicode_IsPrintable(Py_UCS4 ch)`
`162`	`160`	`{`
`163`	`161`	`const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);`
`164`	`162`
`@@ -170,7 +168,7 @@ int _PyUnicode_IsPrintable(Py_UNICODE ch)`
`170`	`168`	`/* Returns 1 for Unicode characters having the category 'Ll', 0`
`171`	`169`	`otherwise. */`
`172`	`170`
`173`		`-int _PyUnicode_IsLowercase(Py_UNICODE ch)`
	`171`	`+int _PyUnicode_IsLowercase(Py_UCS4 ch)`
`174`	`172`	`{`
`175`	`173`	`const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);`
`176`	`174`
`@@ -180,7 +178,7 @@ int _PyUnicode_IsLowercase(Py_UNICODE ch)`
`180`	`178`	`/* Returns 1 for Unicode characters having the category 'Lu', 0`
`181`	`179`	`otherwise. */`
`182`	`180`
`183`		`-int _PyUnicode_IsUppercase(Py_UNICODE ch)`
	`181`	`+int _PyUnicode_IsUppercase(Py_UCS4 ch)`
`184`	`182`	`{`
`185`	`183`	`const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);`
`186`	`184`
`@@ -190,7 +188,7 @@ int _PyUnicode_IsUppercase(Py_UNICODE ch)`
`190`	`188`	`/* Returns the uppercase Unicode characters corresponding to ch or just`
`191`	`189`	`ch if no uppercase mapping is known. */`
`192`	`190`
`193`		`-Py_UNICODE _PyUnicode_ToUppercase(Py_UNICODE ch)`
	`191`	`+Py_UCS4 _PyUnicode_ToUppercase(Py_UCS4 ch)`
`194`	`192`	`{`
`195`	`193`	`const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);`
`196`	`194`	`int delta = ctype->upper;`
`@@ -204,7 +202,7 @@ Py_UNICODE _PyUnicode_ToUppercase(Py_UNICODE ch)`
`204`	`202`	`/* Returns the lowercase Unicode characters corresponding to ch or just`
`205`	`203`	`ch if no lowercase mapping is known. */`
`206`	`204`
`207`		`-Py_UNICODE _PyUnicode_ToLowercase(Py_UNICODE ch)`
	`205`	`+Py_UCS4 _PyUnicode_ToLowercase(Py_UCS4 ch)`
`208`	`206`	`{`
`209`	`207`	`const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);`
`210`	`208`	`int delta = ctype->lower;`
`@@ -218,7 +216,7 @@ Py_UNICODE _PyUnicode_ToLowercase(Py_UNICODE ch)`
`218`	`216`	`/* Returns 1 for Unicode characters having the category 'Ll', 'Lu', 'Lt',`
`219`	`217`	`'Lo' or 'Lm', 0 otherwise. */`
`220`	`218`
`221`		`-int _PyUnicode_IsAlpha(Py_UNICODE ch)`
	`219`	`+int _PyUnicode_IsAlpha(Py_UCS4 ch)`
`222`	`220`	`{`
`223`	`221`	`const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);`
`224`	`222`
`@@ -230,27 +228,27 @@ int _PyUnicode_IsAlpha(Py_UNICODE ch)`
`230`	`228`	`/* Export the interfaces using the wchar_t type for portability`
`231`	`229`	`reasons: */`
`232`	`230`
`233`		`-int _PyUnicode_IsLowercase(Py_UNICODE ch)`
	`231`	`+int _PyUnicode_IsLowercase(Py_UCS4 ch)`
`234`	`232`	`{`
`235`	`233`	`return iswlower(ch);`
`236`	`234`	`}`
`237`	`235`
`238`		`-int _PyUnicode_IsUppercase(Py_UNICODE ch)`
	`236`	`+int _PyUnicode_IsUppercase(Py_UCS4 ch)`
`239`	`237`	`{`
`240`	`238`	`return iswupper(ch);`
`241`	`239`	`}`
`242`	`240`
`243`		`-Py_UNICODE _PyUnicode_ToLowercase(Py_UNICODE ch)`
	`241`	`+Py_UCS4 _PyUnicode_ToLowercase(Py_UCS4 ch)`
`244`	`242`	`{`
`245`	`243`	`return towlower(ch);`
`246`	`244`	`}`
`247`	`245`
`248`		`-Py_UNICODE _PyUnicode_ToUppercase(Py_UNICODE ch)`
	`246`	`+Py_UCS4 _PyUnicode_ToUppercase(Py_UCS4 ch)`
`249`	`247`	`{`
`250`	`248`	`return towupper(ch);`
`251`	`249`	`}`
`252`	`250`
`253`		`-int _PyUnicode_IsAlpha(Py_UNICODE ch)`
	`251`	`+int _PyUnicode_IsAlpha(Py_UCS4 ch)`
`254`	`252`	`{`
`255`	`253`	`return iswalpha(ch);`
`256`	`254`	`}`