Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit a4db686

Browse files
committed
Issue #3280: like chr() already does, the "%c" format now accepts the full unicode range
even on "narrow Unicode" builds; the result is a pair of UTF-16 surrogates.
1 parent 142957c commit a4db686

4 files changed

Lines changed: 43 additions & 27 deletions

File tree

Lib/test/test_unicode.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -717,7 +717,10 @@ def test_formatting(self):
717717
self.assertEqual("%(x)s, %(\xfc)s" % {'x':"abc", '\xfc':"def"}, 'abc, def')
718718

719719
self.assertEqual('%c' % 0x1234, '\u1234')
720-
self.assertRaises(OverflowError, "%c".__mod__, (sys.maxunicode+1,))
720+
self.assertEqual('%c' % 0x21483, '\U00021483')
721+
self.assertRaises(OverflowError, "%c".__mod__, (0x110000,))
722+
self.assertEqual('%c' % '\U00021483', '\U00021483')
723+
self.assertRaises(TypeError, "%c".__mod__, "aa")
721724

722725
# formatting jobs delegated from the string implementation:
723726
self.assertEqual('...%(foo)s...' % {'foo':"abc"}, '...abc...')

Misc/NEWS

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,11 @@ What's new in Python 3.0b2?
1212
Core and Builtins
1313
-----------------
1414

15+
- Issue #3280: like chr(), the "%c" format now accepts unicode code points
16+
beyond the Basic Multilingual Plane (above 0xffff) on all configurations. On
17+
"narrow Unicode" builds, the result is a string of 2 code units, forming a
18+
UTF-16 surrogate pair.
19+
1520
- Issue #3282: str.isprintable() should return False for undefined
1621
Unicode characters.
1722

Objects/unicodeobject.c

Lines changed: 32 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -8730,37 +8730,54 @@ formatchar(Py_UNICODE *buf,
87308730
size_t buflen,
87318731
PyObject *v)
87328732
{
8733-
/* presume that the buffer is at least 2 characters long */
8733+
/* presume that the buffer is at least 3 characters long */
87348734
if (PyUnicode_Check(v)) {
8735-
if (PyUnicode_GET_SIZE(v) != 1)
8736-
goto onError;
8737-
buf[0] = PyUnicode_AS_UNICODE(v)[0];
8735+
if (PyUnicode_GET_SIZE(v) == 1) {
8736+
buf[0] = PyUnicode_AS_UNICODE(v)[0];
8737+
buf[1] = '\0';
8738+
return 1;
8739+
}
8740+
#ifndef Py_UNICODE_WIDE
8741+
if (PyUnicode_GET_SIZE(v) == 2) {
8742+
/* Decode a valid surrogate pair */
8743+
int c0 = PyUnicode_AS_UNICODE(v)[0];
8744+
int c1 = PyUnicode_AS_UNICODE(v)[1];
8745+
if (0xD800 <= c0 && c0 <= 0xDBFF &&
8746+
0xDC00 <= c1 && c1 <= 0xDFFF) {
8747+
buf[0] = c0;
8748+
buf[1] = c1;
8749+
buf[2] = '\0';
8750+
return 2;
8751+
}
8752+
}
8753+
#endif
8754+
goto onError;
87388755
}
87398756
else {
87408757
/* Integer input truncated to a character */
87418758
long x;
87428759
x = PyLong_AsLong(v);
87438760
if (x == -1 && PyErr_Occurred())
87448761
goto onError;
8745-
#ifdef Py_UNICODE_WIDE
8762+
87468763
if (x < 0 || x > 0x10ffff) {
87478764
PyErr_SetString(PyExc_OverflowError,
8748-
"%c arg not in range(0x110000) "
8749-
"(wide Python build)");
8765+
"%c arg not in range(0x110000)");
87508766
return -1;
87518767
}
8752-
#else
8753-
if (x < 0 || x > 0xffff) {
8754-
PyErr_SetString(PyExc_OverflowError,
8755-
"%c arg not in range(0x10000) "
8756-
"(narrow Python build)");
8757-
return -1;
8768+
8769+
#ifndef Py_UNICODE_WIDE
8770+
if (x > 0xffff) {
8771+
x -= 0x10000;
8772+
buf[0] = (Py_UNICODE)(0xD800 | (x >> 10));
8773+
buf[1] = (Py_UNICODE)(0xDC00 | (x & 0x3FF));
8774+
return 2;
87588775
}
87598776
#endif
87608777
buf[0] = (Py_UNICODE) x;
8778+
buf[1] = '\0';
8779+
return 1;
87618780
}
8762-
buf[1] = '\0';
8763-
return 1;
87648781

87658782
onError:
87668783
PyErr_SetString(PyExc_TypeError,

Python/modsupport.c

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -294,21 +294,12 @@ do_mkvalue(const char **p_format, va_list *p_va, int flags)
294294
case 'C':
295295
{
296296
int i = va_arg(*p_va, int);
297-
Py_UNICODE c;
298297
if (i < 0 || i > PyUnicode_GetMax()) {
299-
#ifdef Py_UNICODE_WIDE
300298
PyErr_SetString(PyExc_OverflowError,
301-
"%c arg not in range(0x110000) "
302-
"(wide Python build)");
303-
#else
304-
PyErr_SetString(PyExc_OverflowError,
305-
"%c arg not in range(0x10000) "
306-
"(narrow Python build)");
307-
#endif
299+
"%c arg not in range(0x110000)";
308300
return NULL;
309301
}
310-
c = i;
311-
return PyUnicode_FromUnicode(&c, 1);
302+
return PyUnicode_FromOrdinal(i);
312303
}
313304

314305
case 's':

0 commit comments

Comments
 (0)