Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Prev Previous commit
Next Next commit
Add lookup tests and fix case-insensitivity for Tangut ideographs.
  • Loading branch information
serhiy-storchaka committed Feb 13, 2026
commit b69fdfc483e45cf8b72804547a939a5f57a43e8d
24 changes: 24 additions & 0 deletions Lib/test/test_ucn.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,30 @@ def test_cjk_unified_ideographs(self):
self.checkletter("cjK UniFIeD idEogRAph-2aBcD", "\U0002abcd")
self.checkletter("CJk uNIfiEd IDeOGraPH-2AbCd", "\U0002abcd")

def test_tangut_ideographs(self):
self.checkletter("TANGUT IDEOGRAPH-17000", "\U00017000")
self.checkletter("TANGUT IDEOGRAPH-187FF", "\U000187ff")
self.checkletter("TANGUT IDEOGRAPH-18D00", "\U00018D00")
self.checkletter("TANGUT IDEOGRAPH-18D1E", "\U00018d1e")
self.checkletter("tangut ideograph-18d1e", "\U00018d1e")

def test_egyptian_hieroglyphs(self):
self.checkletter("EGYPTIAN HIEROGLYPH-13460", "\U00013460")
self.checkletter("EGYPTIAN HIEROGLYPH-143FA", "\U000143fa")
self.checkletter("egyptian hieroglyph-143fa", "\U000143fa")

def test_khitan_small_script_characters(self):
self.checkletter("KHITAN SMALL SCRIPT CHARACTER-18B00", "\U00018b00")
self.checkletter("KHITAN SMALL SCRIPT CHARACTER-18CD5", "\U00018cd5")
self.checkletter("KHITAN SMALL SCRIPT CHARACTER-18CFF", "\U00018cff")
self.checkletter("KHITAN SMALL SCRIPT CHARACTER-18CFF", "\U00018cff")
self.checkletter("khitan small script character-18cff", "\U00018cff")

def test_nushu_characters(self):
self.checkletter("NUSHU CHARACTER-1B170", "\U0001b170")
self.checkletter("NUSHU CHARACTER-1B2FB", "\U0001b2fb")
self.checkletter("nushu character-1b2fb", "\U0001b2fb")

def test_bmp_characters(self):
for code in range(0x10000):
char = chr(code)
Expand Down
11 changes: 6 additions & 5 deletions Modules/unicodedata.c
Original file line number Diff line number Diff line change
Expand Up @@ -1498,7 +1498,7 @@ _getcode(const char* name, int namelen, Py_UCS4* code)
}

/* Check for Tangut ideographs. */
if (strncmp(name, "TANGUT IDEOGRAPH-", 17) == 0) {
if (PyOS_strnicmp(name, "TANGUT IDEOGRAPH-", 17) == 0) {
/* Five hexdigits must follow. */
unsigned int v = 0;
name += 17;
Expand All @@ -1507,10 +1507,11 @@ _getcode(const char* name, int namelen, Py_UCS4* code)
return 0;
while (namelen--) {
v *= 16;
if (*name >= '0' && *name <= '9')
v += *name - '0';
else if (*name >= 'A' && *name <= 'F')
v += *name - 'A' + 10;
Py_UCS1 c = Py_TOUPPER(*name);
if (c >= '0' && c <= '9')
v += c - '0';
else if (c >= 'A' && c <= 'F')
v += c - 'A' + 10;
else
return 0;
name++;
Expand Down