Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 71f660e

Browse files
committed
update to Unicode 6.1
1 parent 16fa2a1 commit 71f660e

7 files changed

Lines changed: 24458 additions & 22968 deletions

File tree

Lib/test/test_unicodedata.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
class UnicodeMethodsTest(unittest.TestCase):
2222

2323
# update this, if the database changes
24-
expectedchecksum = 'df0b3ca6785a070b21f837b227dbdbdff3c2e921'
24+
expectedchecksum = 'bf7a78f1a532421b5033600102e23a92044dbba9'
2525

2626
def test_method_checksum(self):
2727
h = hashlib.sha1()
@@ -80,7 +80,7 @@ def tearDown(self):
8080
class UnicodeFunctionsTest(UnicodeDatabaseTest):
8181

8282
# update this, if the database changes
83-
expectedchecksum = 'c23dfc0b5eaf3ca2aad32d733de96bb182ccda50'
83+
expectedchecksum = '17fe2f12b788e4fff5479b469c4404bb6ecf841f'
8484
def test_function_checksum(self):
8585
data = []
8686
h = hashlib.sha1()

Misc/NEWS

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ What's New in Python 3.3 Alpha 1?
1010
Core and Builtins
1111
-----------------
1212

13+
- Upgrade Unicode data to Unicode 6.1.
14+
1315
- Issue #14040: Remove rarely used file name suffixes for C extensions
1416
(under POSIX mainly).
1517

Modules/unicodedata.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -921,7 +921,7 @@ is_unified_ideograph(Py_UCS4 code)
921921
{
922922
return
923923
(0x3400 <= code && code <= 0x4DB5) || /* CJK Ideograph Extension A */
924-
(0x4E00 <= code && code <= 0x9FCB) || /* CJK Ideograph */
924+
(0x4E00 <= code && code <= 0x9FCC) || /* CJK Ideograph */
925925
(0x20000 <= code && code <= 0x2A6D6) || /* CJK Ideograph Extension B */
926926
(0x2A700 <= code && code <= 0x2B734) || /* CJK Ideograph Extension C */
927927
(0x2B740 <= code && code <= 0x2B81D); /* CJK Ideograph Extension D */

Modules/unicodedata_db.h

Lines changed: 3687 additions & 3509 deletions
Large diffs are not rendered by default.

Modules/unicodename_db.h

Lines changed: 19488 additions & 18286 deletions
Large diffs are not rendered by default.

Objects/unicodetype_db.h

Lines changed: 1273 additions & 1166 deletions
Large diffs are not rendered by default.

Tools/unicode/makeunicodedata.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
VERSION = "3.2"
3939

4040
# The Unicode Database
41-
UNIDATA_VERSION = "6.0.0"
41+
UNIDATA_VERSION = "6.1.0"
4242
UNICODE_DATA = "UnicodeData%s.txt"
4343
COMPOSITION_EXCLUSIONS = "CompositionExclusions%s.txt"
4444
EASTASIAN_WIDTH = "EastAsianWidth%s.txt"
@@ -58,7 +58,7 @@
5858

5959
# we use this ranges of PUA_15 to store name aliases and named sequences
6060
NAME_ALIASES_START = 0xF0000
61-
NAMED_SEQUENCES_START = 0xF0100
61+
NAMED_SEQUENCES_START = 0xF0200
6262

6363
old_versions = ["3.2.0"]
6464

@@ -95,7 +95,7 @@
9595
# these ranges need to match unicodedata.c:is_unified_ideograph
9696
cjk_ranges = [
9797
('3400', '4DB5'),
98-
('4E00', '9FCB'),
98+
('4E00', '9FCC'),
9999
('20000', '2A6D6'),
100100
('2A700', '2B734'),
101101
('2B740', '2B81D')
@@ -958,7 +958,7 @@ def __init__(self, version,
958958
s = s.strip()
959959
if not s or s.startswith('#'):
960960
continue
961-
char, name = s.split(';')
961+
char, name, abbrev = s.split(';')
962962
char = int(char, 16)
963963
self.aliases.append((name, char))
964964
# also store the name in the PUA 1
@@ -971,6 +971,7 @@ def __init__(self, version,
971971
# in order to take advantage of the compression and lookup
972972
# algorithms used for the other characters.
973973

974+
assert pua_index < NAMED_SEQUENCES_START
974975
pua_index = NAMED_SEQUENCES_START
975976
with open_data(NAMED_SEQUENCES, version) as file:
976977
for s in file:

0 commit comments

Comments
 (0)