@@ -8044,14 +8044,30 @@ unicode_isnumeric(PyUnicodeObject *self)
80448044 return PyBool_FromLong (1 );
80458045}
80468046
8047+ static Py_UCS4
8048+ decode_ucs4 (const Py_UNICODE * s , Py_ssize_t * i , Py_ssize_t size )
8049+ {
8050+ Py_UCS4 ch ;
8051+ assert (* i < size );
8052+ ch = s [(* i )++ ];
8053+ #ifndef Py_UNICODE_WIDE
8054+ if ((ch & 0xfffffc00 ) == 0xd800 &&
8055+ * i < size
8056+ && (s [* i ] & 0xFFFFFC00 ) == 0xDC00 )
8057+ ch = ((Py_UCS4 )ch << 10UL ) + (Py_UCS4 )(s [(* i )++ ]) - 0x35fdc00 ;
8058+ #endif
8059+ return ch ;
8060+ }
8061+
80478062int
80488063PyUnicode_IsIdentifier (PyObject * self )
80498064{
8050- register const Py_UNICODE * p = PyUnicode_AS_UNICODE ((PyUnicodeObject * )self );
8051- register const Py_UNICODE * e ;
8065+ Py_ssize_t i = 0 , size = PyUnicode_GET_SIZE (self );
8066+ Py_UCS4 first ;
8067+ const Py_UNICODE * p = PyUnicode_AS_UNICODE ((PyUnicodeObject * )self );
80528068
80538069 /* Special case for empty strings */
8054- if (PyUnicode_GET_SIZE ( self ) == 0 )
8070+ if (! size )
80558071 return 0 ;
80568072
80578073 /* PEP 3131 says that the first character must be in
@@ -8062,14 +8078,13 @@ PyUnicode_IsIdentifier(PyObject *self)
80628078 definition of XID_Start and XID_Continue, it is sufficient
80638079 to check just for these, except that _ must be allowed
80648080 as starting an identifier. */
8065- if (!_PyUnicode_IsXidStart (* p ) && * p != 0x5F /* LOW LINE */ )
8081+ first = decode_ucs4 (p , & i , size );
8082+ if (!_PyUnicode_IsXidStart (first ) && first != 0x5F /* LOW LINE */ )
80668083 return 0 ;
80678084
8068- e = p + PyUnicode_GET_SIZE (self );
8069- for (p ++ ; p < e ; p ++ ) {
8070- if (!_PyUnicode_IsXidContinue (* p ))
8085+ while (i < size )
8086+ if (!_PyUnicode_IsXidContinue (decode_ucs4 (p , & i , size )))
80718087 return 0 ;
8072- }
80738088 return 1 ;
80748089}
80758090
0 commit comments