Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 0dcf67e

Browse files
author
Fredrik Lundh
committed
more unicode tweaks: make unichr(0xdddddddd) behave like u"\Udddddddd"
wrt surrogates. (this extends the valid range from 65535 to 1114111)
1 parent 5b97935 commit 0dcf67e

1 file changed

Lines changed: 17 additions & 6 deletions

File tree

Python/bltinmodule.c

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -308,23 +308,34 @@ static PyObject *
308308
builtin_unichr(PyObject *self, PyObject *args)
309309
{
310310
long x;
311-
Py_UNICODE s[1];
311+
Py_UNICODE s[2];
312312

313313
if (!PyArg_ParseTuple(args, "l:unichr", &x))
314314
return NULL;
315-
if (x < 0 || x >= 65536) {
315+
316+
if (x < 0 || x > 0x10ffff) {
316317
PyErr_SetString(PyExc_ValueError,
317-
"unichr() arg not in range(65536)");
318+
"unichr() arg not in range(0x10ffff)");
318319
return NULL;
319320
}
320-
s[0] = (Py_UNICODE)x;
321-
return PyUnicode_FromUnicode(s, 1);
321+
322+
if (x <= 0xffff) {
323+
/* UCS-2 character */
324+
s[0] = (Py_UNICODE) x;
325+
return PyUnicode_FromUnicode(s, 1);
326+
} else {
327+
/* UCS-4 character. store as two surrogate characters */
328+
x -= 0x10000L;
329+
s[0] = 0xD800 + (Py_UNICODE) (x >> 10);
330+
s[1] = 0xDC00 + (Py_UNICODE) (x & 0x03FF);
331+
return PyUnicode_FromUnicode(s, 2);
332+
}
322333
}
323334

324335
static char unichr_doc[] =
325336
"unichr(i) -> Unicode character\n\
326337
\n\
327-
Return a Unicode string of one character with ordinal i; 0 <= i < 65536.";
338+
Return a Unicode string of one character with ordinal i; 0 <= i <= 0x10ffff.";
328339

329340

330341
static PyObject *

0 commit comments

Comments
 (0)