Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 480f1bb

Browse files
committed
Update Unicode database to Unicode 4.1.
1 parent e2b4677 commit 480f1bb

12 files changed

Lines changed: 17104 additions & 13167 deletions

File tree

Doc/lib/libunicodedata.tex

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,11 @@ \section{\module{unicodedata} ---
1414
This module provides access to the Unicode Character Database which
1515
defines character properties for all Unicode characters. The data in
1616
this database is based on the \file{UnicodeData.txt} file version
17-
3.2.0 which is publically available from \url{ftp://ftp.unicode.org/}.
17+
4.1.0 which is publically available from \url{ftp://ftp.unicode.org/}.
1818

1919
The module uses the same names and symbols as defined by the
20-
UnicodeData File Format 3.2.0 (see
21-
\url{http://www.unicode.org/Public/3.2-Update/UnicodeData-3.2.0.html}). It
20+
UnicodeData File Format 4.1.0 (see
21+
\url{http://www.unicode.org/Public/4.1-Update/UnicodeData-4.1.0.html}). It
2222
defines the following functions:
2323

2424
\begin{funcdesc}{lookup}{name}
@@ -130,3 +130,12 @@ \section{\module{unicodedata} ---
130130

131131
\versionadded{2.3}
132132
\end{datadesc}
133+
134+
\begin{datadesc}{db_3_2_0}
135+
This is an object that has the same methods as the entire
136+
module, but uses the Unicode database version 3.2 instead,
137+
for applications that require this specific version of
138+
the Unicode database (such as IDNA).
139+
140+
\versionadded{2.5}
141+
\end{datadesc}

Include/ucnhash.h

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,14 @@ typedef struct {
1414
int size;
1515

1616
/* Get name for a given character code. Returns non-zero if
17-
success, zero if not. Does not set Python exceptions. */
18-
int (*getname)(Py_UCS4 code, char* buffer, int buflen);
17+
success, zero if not. Does not set Python exceptions.
18+
If self is NULL, data come from the default version of the database.
19+
If it is not NULL, it should be a unicodedata.db_X_Y_Z object */
20+
int (*getname)(PyObject *self, Py_UCS4 code, char* buffer, int buflen);
1921

2022
/* Get character code for a given name. Same error handling
2123
as for getname. */
22-
int (*getcode)(const char* name, int namelen, Py_UCS4* code);
24+
int (*getcode)(PyObject *self, const char* name, int namelen, Py_UCS4* code);
2325

2426
} _PyUnicode_Name_CAPI;
2527

Lib/encodings/idna.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# This module implements the RFCs 3490 (IDNA) and 3491 (Nameprep)
22

3-
import stringprep, unicodedata, re, codecs
3+
import stringprep, re, codecs
4+
from unicodedata import db_3_2_0 as unicodedata
45

56
# IDNA section 3.1
67
dots = re.compile(u"[\u002E\u3002\uFF0E\uFF61]")

Lib/stringprep.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
and mappings, for which a mapping function is provided.
66
"""
77

8-
import unicodedata
8+
from unicodedata import db_3_2_0 as unicodedata
99

1010
assert unicodedata.unidata_version == '3.2.0'
1111

Lib/test/test_unicodedata.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
class UnicodeMethodsTest(unittest.TestCase):
1717

1818
# update this, if the database changes
19-
expectedchecksum = 'a37276dc2c158bef6dfd908ad34525c97180fad9'
19+
expectedchecksum = 'a6555cd209d960dcfa17bfdce0c96d91cfa9a9ba'
2020

2121
def test_method_checksum(self):
2222
h = sha.sha()
@@ -75,7 +75,7 @@ def tearDown(self):
7575
class UnicodeFunctionsTest(UnicodeDatabaseTest):
7676

7777
# update this, if the database changes
78-
expectedchecksum = 'cfe20a967a450ebc82ca68c3e4eed344164e11af'
78+
expectedchecksum = 'b45b79f3203ee1a896d9b5655484adaff5d4964b'
7979

8080
def test_function_checksum(self):
8181
data = []

Misc/NEWS

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -279,6 +279,10 @@ Core and builtins
279279
Extension Modules
280280
-----------------
281281

282+
- The unicodedata module was updated to the 4.1 version of the Unicode
283+
database. The 3.2 version is still available as unicodedata.db_3_2_0
284+
for applications that require this specific version (such as IDNA).
285+
282286
- The timing module is no longer built by default. It was deprecated
283287
in PEP 4 in Python 2.0 or earlier.
284288

0 commit comments

Comments
 (0)