Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 93cbca3

Browse files
committed
Merged revisions 66362 via svnmerge from
svn+ssh://[email protected]/python/trunk ........ r66362 | martin.v.loewis | 2008-09-10 15:38:12 +0200 (Mi, 10 Sep 2008) | 3 lines Issue #3811: The Unicode database was updated to 5.1. Reviewed by Fredrik Lundh and Marc-Andre Lemburg. ........
1 parent 1009d39 commit 93cbca3

8 files changed

Lines changed: 18691 additions & 15442 deletions

File tree

Doc/library/unicodedata.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,11 @@
1616

1717
This module provides access to the Unicode Character Database which defines
1818
character properties for all Unicode characters. The data in this database is
19-
based on the :file:`UnicodeData.txt` file version 4.1.0 which is publicly
19+
based on the :file:`UnicodeData.txt` file version 5.1.0 which is publicly
2020
available from ftp://ftp.unicode.org/.
2121

2222
The module uses the same names and symbols as defined by the UnicodeData File
23-
Format 4.1.0 (see http://www.unicode.org/Public/4.1.0/ucd/UCD.html). It defines
23+
Format 5.1.0 (see http://www.unicode.org/Public/5.1.0/ucd/UCD.html). It defines
2424
the following functions:
2525

2626

Lib/test/test_unicodedata.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
class UnicodeMethodsTest(unittest.TestCase):
1717

1818
# update this, if the database changes
19-
expectedchecksum = 'c198ed264497f108434b3f576d4107237221cc8a'
19+
expectedchecksum = 'aef99984a58c8e1e5363a3175f2ff9608599a93e'
2020

2121
def test_method_checksum(self):
2222
h = hashlib.sha1()
@@ -75,7 +75,7 @@ def tearDown(self):
7575
class UnicodeFunctionsTest(UnicodeDatabaseTest):
7676

7777
# update this, if the database changes
78-
expectedchecksum = '4e389f97e9f88b8b7ab743121fd643089116f9f2'
78+
expectedchecksum = '3136d5afd787dc2bcb1bdcac95e385349fbebbca'
7979

8080
def test_function_checksum(self):
8181
data = []
@@ -226,6 +226,16 @@ def test_digit_numeric_consistent(self):
226226
def test_bug_1704793(self):
227227
self.assertEquals(self.db.lookup("GOTHIC LETTER FAIHU"), '\U00010346')
228228

229+
def test_ucd_510(self):
230+
import unicodedata
231+
# In UCD 5.1.0, a mirrored property changed wrt. UCD 3.2.0
232+
self.assert_(unicodedata.mirrored("\u0f3a"))
233+
self.assert_(not unicodedata.ucd_3_2_0.mirrored("\u0f3a"))
234+
# Also, we now have two ways of representing
235+
# the upper-case mapping: as delta, or as absolute value
236+
self.assert_("a".upper()=='A')
237+
self.assert_("\u1d79".upper()=='\ua77d')
238+
229239
def test_main():
230240
test.support.run_unittest(
231241
UnicodeMiscTest,

Modules/unicodedata.c

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
/* ------------------------------------------------------------------------
22
3-
unicodedata -- Provides access to the Unicode 4.1 data base.
3+
unicodedata -- Provides access to the Unicode 5.1 data base.
44
5-
Data was extracted from the Unicode 4.1 UnicodeData.txt file.
5+
Data was extracted from the Unicode 5.1 UnicodeData.txt file.
66
77
Written by Marc-Andre Lemburg ([email protected]).
88
Modified for Python 2.0 by Fredrik Lundh ([email protected])
@@ -34,6 +34,7 @@ typedef struct change_record {
3434
const unsigned char bidir_changed;
3535
const unsigned char category_changed;
3636
const unsigned char decimal_changed;
37+
const unsigned char mirrored_changed;
3738
const int numeric_changed;
3839
} change_record;
3940

@@ -355,6 +356,8 @@ unicodedata_mirrored(PyObject *self, PyObject *args)
355356
const change_record *old = get_old_record(self, c);
356357
if (old->category_changed == 0)
357358
index = 0; /* unassigned */
359+
else if (old->mirrored_changed != 0xFF)
360+
index = old->mirrored_changed;
358361
}
359362
return PyLong_FromLong(index);
360363
}
@@ -1179,11 +1182,11 @@ PyDoc_STRVAR(unicodedata_docstring,
11791182
"This module provides access to the Unicode Character Database which\n\
11801183
defines character properties for all Unicode characters. The data in\n\
11811184
this database is based on the UnicodeData.txt file version\n\
1182-
4.1.0 which is publically available from ftp://ftp.unicode.org/.\n\
1185+
5.1.0 which is publically available from ftp://ftp.unicode.org/.\n\
11831186
\n\
11841187
The module uses the same names and symbols as defined by the\n\
1185-
UnicodeData File Format 4.1.0 (see\n\
1186-
http://www.unicode.org/Public/4.1.0/ucd/UCD.html).");
1188+
UnicodeData File Format 5.1.0 (see\n\
1189+
http://www.unicode.org/Public/5.1.0/ucd/UCD.html).");
11871190

11881191

11891192
static struct PyModuleDef unicodedatamodule = {

0 commit comments

Comments
 (0)