@@ -348,6 +348,16 @@ _getucname(Py_UCS4 code, char* buffer, int buflen)
348348 return 1 ;
349349 }
350350
351+ if ((0x3400 <= code && code <= 0x4DB5 ) || /* CJK Ideograph Extension A */
352+ (0x4E00 <= code && code <= 0x9FA5 ) || /* CJK Ideograph */
353+ (0x20000 <= code && code <= 0x2A6D6 )) {/* CJK Ideograph Extension B */
354+ if (buflen < 28 )
355+ /* Worst case: CJK UNIFIED IDEOGRAPH-20000 */
356+ return 0 ;
357+ sprintf (buffer , "CJK UNIFIED IDEOGRAPH-%X" , code );
358+ return 1 ;
359+ }
360+
351361 if (code >= 0x110000 )
352362 return 0 ;
353363
@@ -449,6 +459,30 @@ _getcode(const char* name, int namelen, Py_UCS4* code)
449459 * code = SBase + (L * VCount + V )* TCount + T ;
450460 return 1 ;
451461 }
462+ /* Otherwise, it's an illegal syllable name. */
463+ return 0 ;
464+ }
465+
466+ /* Check for unified ideographs. */
467+ if (strncmp (name , "CJK UNIFIED IDEOGRAPH-" , 22 ) == 0 ) {
468+ /* Four or five hexdigits must follow. */
469+ v = 0 ;
470+ name += 22 ;
471+ namelen -= 22 ;
472+ if (namelen != 4 && namelen != 5 )
473+ return 0 ;
474+ while (namelen -- ) {
475+ v *= 16 ;
476+ if (* name >= '0' && * name <= '9' )
477+ v += * name - '0' ;
478+ else if (* name >= 'A' && * name <= 'F' )
479+ v += * name - 'A' + 10 ;
480+ else
481+ return 0 ;
482+ name ++ ;
483+ }
484+ * code = v ;
485+ return 1 ;
452486 }
453487
454488 /* the following is the same as python's dictionary lookup, with
@@ -535,7 +569,11 @@ unicodedata_lookup(PyObject* self, PyObject* args)
535569 return NULL ;
536570
537571 if (!_getcode (name , namelen , & code )) {
538- PyErr_SetString (PyExc_KeyError , "undefined character name" );
572+ char fmt [] = "undefined character name '%s'" ;
573+ char * buf = PyMem_MALLOC (sizeof (fmt ) + namelen );
574+ sprintf (buf , fmt , name );
575+ PyErr_SetString (PyExc_KeyError , buf );
576+ PyMem_FREE (buf );
539577 return NULL ;
540578 }
541579
0 commit comments