Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit edf368c

Browse files
committed
Make lower/upper/title work for non-BMP characters.
1 parent ddc369a commit edf368c

1 file changed

Lines changed: 15 additions & 24 deletions

File tree

Objects/unicodectype.c

Lines changed: 15 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -62,18 +62,17 @@ int _PyUnicode_IsLinebreak(Py_UNICODE ch)
6262
Py_UNICODE _PyUnicode_ToTitlecase(register Py_UNICODE ch)
6363
{
6464
const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
65+
int delta;
6566

6667
if (ctype->title)
67-
ch += ctype->title;
68+
delta = ctype->title;
6869
else
69-
ch += ctype->upper;
70+
delta = ctype->upper;
7071

71-
#ifdef Py_UNICODE_WIDE
72-
/* The database assumes that the values wrap around at 0x10000. */
73-
if (ch > 0x10000)
74-
ch -= 0x10000;
75-
#endif
76-
return ch;
72+
if (delta >= 32768)
73+
delta -= 65536;
74+
75+
return ch + delta;
7776
}
7877

7978
/* Returns 1 for Unicode characters having the category 'Lt', 0
@@ -358,14 +357,10 @@ int _PyUnicode_IsUppercase(Py_UNICODE ch)
358357
Py_UNICODE _PyUnicode_ToUppercase(Py_UNICODE ch)
359358
{
360359
const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
361-
362-
ch += ctype->upper;
363-
#ifdef Py_UNICODE_WIDE
364-
/* The database assumes that the values wrap around at 0x10000. */
365-
if (ch > 0x10000)
366-
ch -= 0x10000;
367-
#endif
368-
return ch;
360+
int delta = ctype->upper;
361+
if (delta >= 32768)
362+
delta -= 65536;
363+
return ch + delta;
369364
}
370365

371366
/* Returns the lowercase Unicode characters corresponding to ch or just
@@ -374,14 +369,10 @@ Py_UNICODE _PyUnicode_ToUppercase(Py_UNICODE ch)
374369
Py_UNICODE _PyUnicode_ToLowercase(Py_UNICODE ch)
375370
{
376371
const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
377-
378-
ch += ctype->lower;
379-
#ifdef Py_UNICODE_WIDE
380-
/* The database assumes that the values wrap around at 0x10000. */
381-
if (ch > 0x10000)
382-
ch -= 0x10000;
383-
#endif
384-
return ch;
372+
int delta = ctype->lower;
373+
if (delta >= 32768)
374+
delta -= 65536;
375+
return ch + delta;
385376
}
386377

387378
/* Returns 1 for Unicode characters having the category 'Ll', 'Lu', 'Lt',

0 commit comments

Comments
 (0)