Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 45714e9

Browse files
author
Fredrik Lundh
committed
experimental UCS-4 support: made compare a bit more robust, in case
sizeof(Py_UNICODE) >= sizeof(long). also changed surrogate expansion to work if sizeof(Py_UNICODE) > 2.
1 parent 3083163 commit 45714e9

1 file changed

Lines changed: 14 additions & 11 deletions

File tree

Objects/unicodeobject.c

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -787,7 +787,7 @@ PyObject *PyUnicode_DecodeUTF8(const char *s,
787787
*p++ = (Py_UNICODE)(0xD800 + (ch >> 10));
788788

789789
/* low surrogate = bottom 10 bits added to DC00 */
790-
*p++ = (Py_UNICODE)(0xDC00 + (ch & ~0xFC00));
790+
*p++ = (Py_UNICODE)(0xDC00 + (ch & 0x03FF));
791791
break;
792792

793793
default:
@@ -1274,7 +1274,7 @@ PyObject *PyUnicode_DecodeUnicodeEscape(const char *s,
12741274
/* UCS-4 character. store as two surrogate characters */
12751275
chr -= 0x10000L;
12761276
*p++ = 0xD800 + (Py_UNICODE) (chr >> 10);
1277-
*p++ = 0xDC00 + (Py_UNICODE) (chr & ~0xFC00);
1277+
*p++ = 0xDC00 + (Py_UNICODE) (chr & 0x03FF);
12781278
} else {
12791279
if (unicodeescape_decoding_error(
12801280
&s, &x, errors,
@@ -3260,19 +3260,19 @@ unicode_compare(PyUnicodeObject *str1, PyUnicodeObject *str2)
32603260

32613261
while (len1 > 0 && len2 > 0) {
32623262
Py_UNICODE c1, c2;
3263-
long diff;
32643263

32653264
c1 = *s1++;
32663265
c2 = *s2++;
3266+
32673267
if (c1 > (1<<11) * 26)
32683268
c1 += utf16Fixup[c1>>11];
32693269
if (c2 > (1<<11) * 26)
32703270
c2 += utf16Fixup[c2>>11];
3271-
32723271
/* now c1 and c2 are in UTF-32-compatible order */
3273-
diff = (long)c1 - (long)c2;
3274-
if (diff)
3275-
return (diff < 0) ? -1 : (diff != 0);
3272+
3273+
if (c1 != c2)
3274+
return (c1 < c2) ? -1 : 1;
3275+
32763276
len1--; len2--;
32773277
}
32783278

@@ -3293,11 +3293,14 @@ unicode_compare(PyUnicodeObject *str1, PyUnicodeObject *str2)
32933293
len2 = str2->length;
32943294

32953295
while (len1 > 0 && len2 > 0) {
3296-
register long diff;
3296+
Py_UNICODE c1, c2;
3297+
3298+
c1 = *s1++;
3299+
c2 = *s2++;
3300+
3301+
if (c1 != c2)
3302+
return (c1 < c2) ? -1 : 1;
32973303

3298-
diff = (long)*s1++ - (long)*s2++;
3299-
if (diff)
3300-
return (diff < 0) ? -1 : (diff != 0);
33013304
len1--; len2--;
33023305
}
33033306

0 commit comments

Comments
 (0)