Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit f8e7543

Browse files
committed
merge 3.2 (#12732)
2 parents 98d95a5 + f413b80 commit f8e7543

4 files changed

Lines changed: 30 additions & 8 deletions

File tree

Lib/test/test_pep3131.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,12 @@ class T:
88
ä = 1
99
µ = 2 # this is a compatibility character
1010
= 3
11+
𝔘𝔫𝔦𝔠𝔬𝔡𝔢 = 4
1112
self.assertEqual(getattr(T, "\xe4"), 1)
1213
self.assertEqual(getattr(T, "\u03bc"), 2)
1314
self.assertEqual(getattr(T, '\u87d2'), 3)
15+
v = getattr(T, "\U0001d518\U0001d52b\U0001d526\U0001d520\U0001d52c\U0001d521\U0001d522")
16+
self.assertEqual(v, 4)
1417

1518
def test_invalid(self):
1619
try:

Lib/test/test_unicode.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -404,6 +404,7 @@ def test_isidentifier(self):
404404
self.assertTrue("bc".isidentifier())
405405
self.assertTrue("b_".isidentifier())
406406
self.assertTrue("µ".isidentifier())
407+
self.assertTrue("𝔘𝔫𝔦𝔠𝔬𝔡𝔢".isidentifier())
407408

408409
self.assertFalse(" ".isidentifier())
409410
self.assertFalse("[".isidentifier())

Misc/NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@ What's New in Python 3.3 Alpha 1?
1010
Core and Builtins
1111
-----------------
1212

13+
- Issue #12732: In narrow unicode builds, allow Unicode identifiers which fall
14+
outside the BMP.
15+
1316
- Issue #12575: Validate user-generated AST before it is compiled.
1417

1518
- Make type(None), type(Ellipsis), and type(NotImplemented) callable. They

Objects/unicodeobject.c

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8044,14 +8044,30 @@ unicode_isnumeric(PyUnicodeObject *self)
80448044
return PyBool_FromLong(1);
80458045
}
80468046

8047+
static Py_UCS4
8048+
decode_ucs4(const Py_UNICODE *s, Py_ssize_t *i, Py_ssize_t size)
8049+
{
8050+
Py_UCS4 ch;
8051+
assert(*i < size);
8052+
ch = s[(*i)++];
8053+
#ifndef Py_UNICODE_WIDE
8054+
if ((ch & 0xfffffc00) == 0xd800 &&
8055+
*i < size
8056+
&& (s[*i] & 0xFFFFFC00) == 0xDC00)
8057+
ch = ((Py_UCS4)ch << 10UL) + (Py_UCS4)(s[(*i)++]) - 0x35fdc00;
8058+
#endif
8059+
return ch;
8060+
}
8061+
80478062
int
80488063
PyUnicode_IsIdentifier(PyObject *self)
80498064
{
8050-
register const Py_UNICODE *p = PyUnicode_AS_UNICODE((PyUnicodeObject*)self);
8051-
register const Py_UNICODE *e;
8065+
Py_ssize_t i = 0, size = PyUnicode_GET_SIZE(self);
8066+
Py_UCS4 first;
8067+
const Py_UNICODE *p = PyUnicode_AS_UNICODE((PyUnicodeObject*)self);
80528068

80538069
/* Special case for empty strings */
8054-
if (PyUnicode_GET_SIZE(self) == 0)
8070+
if (!size)
80558071
return 0;
80568072

80578073
/* PEP 3131 says that the first character must be in
@@ -8062,14 +8078,13 @@ PyUnicode_IsIdentifier(PyObject *self)
80628078
definition of XID_Start and XID_Continue, it is sufficient
80638079
to check just for these, except that _ must be allowed
80648080
as starting an identifier. */
8065-
if (!_PyUnicode_IsXidStart(*p) && *p != 0x5F /* LOW LINE */)
8081+
first = decode_ucs4(p, &i, size);
8082+
if (!_PyUnicode_IsXidStart(first) && first != 0x5F /* LOW LINE */)
80668083
return 0;
80678084

8068-
e = p + PyUnicode_GET_SIZE(self);
8069-
for (p++; p < e; p++) {
8070-
if (!_PyUnicode_IsXidContinue(*p))
8085+
while (i < size)
8086+
if (!_PyUnicode_IsXidContinue(decode_ucs4(p, &i, size)))
80718087
return 0;
8072-
}
80738088
return 1;
80748089
}
80758090

0 commit comments

Comments
 (0)