Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 76df43d

Browse files
committed
Issue #16330: Use surrogate-related macros
Patch written by Serhiy Storchaka.
1 parent a5e7cd0 commit 76df43d

6 files changed

Lines changed: 25 additions & 28 deletions

File tree

Include/unicodeobject.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -180,9 +180,9 @@ typedef unsigned char Py_UCS1;
180180
} while (0)
181181

182182
/* macros to work with surrogates */
183-
#define Py_UNICODE_IS_SURROGATE(ch) (0xD800 <= ch && ch <= 0xDFFF)
184-
#define Py_UNICODE_IS_HIGH_SURROGATE(ch) (0xD800 <= ch && ch <= 0xDBFF)
185-
#define Py_UNICODE_IS_LOW_SURROGATE(ch) (0xDC00 <= ch && ch <= 0xDFFF)
183+
#define Py_UNICODE_IS_SURROGATE(ch) (0xD800 <= (ch) && (ch) <= 0xDFFF)
184+
#define Py_UNICODE_IS_HIGH_SURROGATE(ch) (0xD800 <= (ch) && (ch) <= 0xDBFF)
185+
#define Py_UNICODE_IS_LOW_SURROGATE(ch) (0xDC00 <= (ch) && (ch) <= 0xDFFF)
186186
/* Join two surrogate characters and return a single Py_UCS4 value. */
187187
#define Py_UNICODE_JOIN_SURROGATES(high, low) \
188188
(((((Py_UCS4)(high) & 0x03FF) << 10) | \

Modules/_json.c

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -174,14 +174,13 @@ ascii_escape_unichar(Py_UCS4 c, unsigned char *output, Py_ssize_t chars)
174174
default:
175175
if (c >= 0x10000) {
176176
/* UTF-16 surrogate pair */
177-
Py_UCS4 v = c - 0x10000;
178-
c = 0xd800 | ((v >> 10) & 0x3ff);
177+
Py_UCS4 v = Py_UNICODE_HIGH_SURROGATE(c);
179178
output[chars++] = 'u';
180-
output[chars++] = Py_hexdigits[(c >> 12) & 0xf];
181-
output[chars++] = Py_hexdigits[(c >> 8) & 0xf];
182-
output[chars++] = Py_hexdigits[(c >> 4) & 0xf];
183-
output[chars++] = Py_hexdigits[(c ) & 0xf];
184-
c = 0xdc00 | (v & 0x3ff);
179+
output[chars++] = Py_hexdigits[(v >> 12) & 0xf];
180+
output[chars++] = Py_hexdigits[(v >> 8) & 0xf];
181+
output[chars++] = Py_hexdigits[(v >> 4) & 0xf];
182+
output[chars++] = Py_hexdigits[(v ) & 0xf];
183+
c = Py_UNICODE_LOW_SURROGATE(c);
185184
output[chars++] = '\\';
186185
}
187186
output[chars++] = 'u';
@@ -431,7 +430,7 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next
431430
}
432431
}
433432
/* Surrogate pair */
434-
if ((c & 0xfc00) == 0xd800) {
433+
if (Py_UNICODE_IS_HIGH_SURROGATE(c)) {
435434
Py_UCS4 c2 = 0;
436435
if (end + 6 >= len) {
437436
raise_errmsg("Unpaired high surrogate", pystr, end - 5);
@@ -462,13 +461,13 @@ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next
462461
goto bail;
463462
}
464463
}
465-
if ((c2 & 0xfc00) != 0xdc00) {
464+
if (!Py_UNICODE_IS_LOW_SURROGATE(c2)) {
466465
raise_errmsg("Unpaired high surrogate", pystr, end - 5);
467466
goto bail;
468467
}
469-
c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
468+
c = Py_UNICODE_JOIN_SURROGATES(c, c2);
470469
}
471-
else if ((c & 0xfc00) == 0xdc00) {
470+
else if (Py_UNICODE_IS_LOW_SURROGATE(c)) {
472471
raise_errmsg("Unpaired low surrogate", pystr, end - 5);
473472
goto bail;
474473
}

Modules/cjkcodecs/cjkcodecs.h

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -148,8 +148,8 @@ static const struct dbcs_map *mapping_list;
148148
#if Py_UNICODE_SIZE == 2
149149
# define WRITEUCS4(c) \
150150
REQUIRE_OUTBUF(2) \
151-
(*outbuf)[0] = 0xd800 + (((c) - 0x10000) >> 10); \
152-
(*outbuf)[1] = 0xdc00 + (((c) - 0x10000) & 0x3ff); \
151+
(*outbuf)[0] = Py_UNICODE_HIGH_SURROGATE(c); \
152+
(*outbuf)[1] = Py_UNICODE_LOW_SURROGATE(c); \
153153
NEXT_OUT(2)
154154
#else
155155
# define WRITEUCS4(c) \
@@ -188,11 +188,10 @@ static const struct dbcs_map *mapping_list;
188188

189189
#if Py_UNICODE_SIZE == 2
190190
#define DECODE_SURROGATE(c) \
191-
if (c >> 10 == 0xd800 >> 10) { /* high surrogate */ \
191+
if (Py_UNICODE_IS_HIGH_SURROGATE(c)) { \
192192
REQUIRE_INBUF(2) \
193-
if (IN2 >> 10 == 0xdc00 >> 10) { /* low surrogate */ \
194-
c = 0x10000 + ((ucs4_t)(c - 0xd800) << 10) + \
195-
((ucs4_t)(IN2) - 0xdc00); \
193+
if (Py_UNICODE_IS_LOW_SURROGATE(IN2)) { \
194+
c = Py_UNICODE_JOIN_SURROGATES(c, IN2) \
196195
} \
197196
}
198197
#define GET_INSIZE(c) ((c) > 0xffff ? 2 : 1)

Objects/unicodeobject.c

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4412,7 +4412,7 @@ _PyUnicode_EncodeUTF7(PyObject *str,
44124412

44134413
/* code first surrogate */
44144414
base64bits += 16;
4415-
base64buffer = (base64buffer << 16) | 0xd800 | ((ch-0x10000) >> 10);
4415+
base64buffer = (base64buffer << 16) | Py_UNICODE_HIGH_SURROGATE(ch);
44164416
while (base64bits >= 6) {
44174417
*out++ = TO_BASE64(base64buffer >> (base64bits-6));
44184418
base64bits -= 6;
@@ -7052,9 +7052,8 @@ encode_code_page_errors(UINT code_page, PyObject **outbytes,
70527052
charsize = 1;
70537053
}
70547054
else {
7055-
ch -= 0x10000;
7056-
chars[0] = 0xd800 + (ch >> 10);
7057-
chars[1] = 0xdc00 + (ch & 0x3ff);
7055+
chars[0] = Py_UNICODE_HIGH_SURROGATE(ch);
7056+
chars[1] = Py_UNICODE_LOW_SURROGATE(ch);
70587057
charsize = 2;
70597058
}
70607059

Python/codecs.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -761,7 +761,7 @@ PyCodec_SurrogatePassErrors(PyObject *exc)
761761
for (i = start; i < end; i++) {
762762
/* object is guaranteed to be "ready" */
763763
Py_UCS4 ch = PyUnicode_READ_CHAR(object, i);
764-
if (ch < 0xd800 || ch > 0xdfff) {
764+
if (!Py_UNICODE_IS_SURROGATE(ch)) {
765765
/* Not a surrogate, fail with original exception */
766766
PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
767767
Py_DECREF(res);
@@ -797,7 +797,7 @@ PyCodec_SurrogatePassErrors(PyObject *exc)
797797
(p[2] & 0xc0) == 0x80)) {
798798
/* it's a three-byte code */
799799
ch = ((p[0] & 0x0f) << 12) + ((p[1] & 0x3f) << 6) + (p[2] & 0x3f);
800-
if (ch < 0xd800 || ch > 0xdfff)
800+
if (!Py_UNICODE_IS_SURROGATE(ch))
801801
/* it's not a surrogate - fail */
802802
ch = 0;
803803
}

Python/fileutils.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ _Py_char2wchar(const char* arg, size_t *size)
8585
/* Only use the result if it contains no
8686
surrogate characters. */
8787
for (tmp = res; *tmp != 0 &&
88-
(*tmp < 0xd800 || *tmp > 0xdfff); tmp++)
88+
!Py_UNICODE_IS_SURROGATE(*tmp); tmp++)
8989
;
9090
if (*tmp == 0) {
9191
if (size != NULL)
@@ -131,7 +131,7 @@ _Py_char2wchar(const char* arg, size_t *size)
131131
memset(&mbs, 0, sizeof mbs);
132132
continue;
133133
}
134-
if (*out >= 0xd800 && *out <= 0xdfff) {
134+
if (Py_UNICODE_IS_SURROGATE(*out)) {
135135
/* Surrogate character. Escape the original
136136
byte sequence with surrogateescape. */
137137
argsize -= converted;

0 commit comments

Comments
 (0)