3838VERSION = "3.2"
3939
4040# The Unicode Database
41- UNIDATA_VERSION = "6.0 .0"
41+ UNIDATA_VERSION = "6.1 .0"
4242UNICODE_DATA = "UnicodeData%s.txt"
4343COMPOSITION_EXCLUSIONS = "CompositionExclusions%s.txt"
4444EASTASIAN_WIDTH = "EastAsianWidth%s.txt"
5858
5959# we use this ranges of PUA_15 to store name aliases and named sequences
6060NAME_ALIASES_START = 0xF0000
61- NAMED_SEQUENCES_START = 0xF0100
61+ NAMED_SEQUENCES_START = 0xF0200
6262
6363old_versions = ["3.2.0" ]
6464
9595# these ranges need to match unicodedata.c:is_unified_ideograph
9696cjk_ranges = [
9797 ('3400' , '4DB5' ),
98- ('4E00' , '9FCB ' ),
98+ ('4E00' , '9FCC ' ),
9999 ('20000' , '2A6D6' ),
100100 ('2A700' , '2B734' ),
101101 ('2B740' , '2B81D' )
@@ -958,7 +958,7 @@ def __init__(self, version,
958958 s = s .strip ()
959959 if not s or s .startswith ('#' ):
960960 continue
961- char , name = s .split (';' )
961+ char , name , abbrev = s .split (';' )
962962 char = int (char , 16 )
963963 self .aliases .append ((name , char ))
964964 # also store the name in the PUA 1
@@ -971,6 +971,7 @@ def __init__(self, version,
971971 # in order to take advantage of the compression and lookup
972972 # algorithms used for the other characters.
973973
974+ assert pua_index < NAMED_SEQUENCES_START
974975 pua_index = NAMED_SEQUENCES_START
975976 with open_data (NAMED_SEQUENCES , version ) as file :
976977 for s in file :
0 commit comments