@@ -7972,14 +7972,30 @@ unicode_isnumeric(PyUnicodeObject *self)
79727972 return PyBool_FromLong (1 );
79737973}
79747974
7975+ static Py_UCS4
7976+ decode_ucs4 (const Py_UNICODE * s , Py_ssize_t * i , Py_ssize_t size )
7977+ {
7978+ Py_UCS4 ch ;
7979+ assert (* i < size );
7980+ ch = s [(* i )++ ];
7981+ #ifndef Py_UNICODE_WIDE
7982+ if ((ch & 0xfffffc00 ) == 0xd800 &&
7983+ * i < size
7984+ && (s [* i ] & 0xFFFFFC00 ) == 0xDC00 )
7985+ ch = ((Py_UCS4 )ch << 10UL ) + (Py_UCS4 )(s [(* i )++ ]) - 0x35fdc00 ;
7986+ #endif
7987+ return ch ;
7988+ }
7989+
79757990int
79767991PyUnicode_IsIdentifier (PyObject * self )
79777992{
7978- register const Py_UNICODE * p = PyUnicode_AS_UNICODE ((PyUnicodeObject * )self );
7979- register const Py_UNICODE * e ;
7993+ Py_ssize_t i = 0 , size = PyUnicode_GET_SIZE (self );
7994+ Py_UCS4 first ;
7995+ const Py_UNICODE * p = PyUnicode_AS_UNICODE ((PyUnicodeObject * )self );
79807996
79817997 /* Special case for empty strings */
7982- if (PyUnicode_GET_SIZE ( self ) == 0 )
7998+ if (! size )
79837999 return 0 ;
79848000
79858001 /* PEP 3131 says that the first character must be in
@@ -7990,14 +8006,13 @@ PyUnicode_IsIdentifier(PyObject *self)
79908006 definition of XID_Start and XID_Continue, it is sufficient
79918007 to check just for these, except that _ must be allowed
79928008 as starting an identifier. */
7993- if (!_PyUnicode_IsXidStart (* p ) && * p != 0x5F /* LOW LINE */ )
8009+ first = decode_ucs4 (p , & i , size );
8010+ if (!_PyUnicode_IsXidStart (first ) && first != 0x5F /* LOW LINE */ )
79948011 return 0 ;
79958012
7996- e = p + PyUnicode_GET_SIZE (self );
7997- for (p ++ ; p < e ; p ++ ) {
7998- if (!_PyUnicode_IsXidContinue (* p ))
8013+ while (i < size )
8014+ if (!_PyUnicode_IsXidContinue (decode_ucs4 (p , & i , size )))
79998015 return 0 ;
8000- }
80018016 return 1 ;
80028017}
80038018
0 commit comments