Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 8a16054

Browse files
committed
Marc-Andre Lemburg: The large unicode database table is broken in
pages of 4k entries each. This should fix compiler problems on some platforms.
1 parent b95de4f commit 8a16054

3 files changed

Lines changed: 118 additions & 10 deletions

File tree

Modules/unicodedata.c

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,18 @@ Written by Marc-Andre Lemburg ([email protected]).
1313
#include "Python.h"
1414
#include "unicodedatabase.h"
1515

16+
/* --- Helpers ------------------------------------------------------------ */
17+
18+
static
19+
const _PyUnicode_DatabaseRecord *unicode_db(register int i)
20+
{
21+
register int page = i >> 12;
22+
23+
if (page < sizeof(_PyUnicode_Database))
24+
return &_PyUnicode_Database[page][i & 0x0fff];
25+
return &_PyUnicode_Database[0][0];
26+
}
27+
1628
/* --- Module API --------------------------------------------------------- */
1729

1830
static PyObject *
@@ -132,7 +144,7 @@ unicodedata_category(PyObject *self,
132144
"need a single Unicode character as parameter");
133145
goto onError;
134146
}
135-
index = (int)_PyUnicode_Database[(int)*PyUnicode_AS_UNICODE(v)].category;
147+
index = (int)unicode_db((int)*PyUnicode_AS_UNICODE(v))->category;
136148
if (index < 0 ||
137149
index > sizeof(_PyUnicode_CategoryNames) /
138150
sizeof(_PyUnicode_CategoryNames[0])) {
@@ -162,8 +174,7 @@ unicodedata_bidirectional(PyObject *self,
162174
"need a single Unicode character as parameter");
163175
goto onError;
164176
}
165-
index = (int)_PyUnicode_Database[
166-
(int)*PyUnicode_AS_UNICODE(v)].bidirectional;
177+
index = (int)unicode_db((int)*PyUnicode_AS_UNICODE(v))->bidirectional;
167178
if (index < 0 ||
168179
index > sizeof(_PyUnicode_CategoryNames) /
169180
sizeof(_PyUnicode_CategoryNames[0])) {
@@ -193,8 +204,7 @@ unicodedata_combining(PyObject *self,
193204
"need a single Unicode character as parameter");
194205
goto onError;
195206
}
196-
value = (int)_PyUnicode_Database[
197-
(int)*PyUnicode_AS_UNICODE(v)].combining;
207+
value = (int)unicode_db((int)*PyUnicode_AS_UNICODE(v))->combining;
198208
return PyInt_FromLong(value);
199209

200210
onError:
@@ -216,7 +226,7 @@ unicodedata_mirrored(PyObject *self,
216226
"need a single Unicode character as parameter");
217227
goto onError;
218228
}
219-
value = (int)_PyUnicode_Database[(int)*PyUnicode_AS_UNICODE(v)].mirrored;
229+
value = (int)unicode_db((int)*PyUnicode_AS_UNICODE(v))->mirrored;
220230
return PyInt_FromLong(value);
221231

222232
onError:
@@ -238,7 +248,7 @@ unicodedata_decomposition(PyObject *self,
238248
"need a single Unicode character as parameter");
239249
goto onError;
240250
}
241-
value = _PyUnicode_Database[(int)*PyUnicode_AS_UNICODE(v)].decomposition;
251+
value = unicode_db((int)*PyUnicode_AS_UNICODE(v))->decomposition;
242252
if (value == NULL)
243253
return PyString_FromString("");
244254
else

Modules/unicodedatabase.c

Lines changed: 100 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,9 +87,9 @@ const char *_PyUnicode_BidirectionalNames[21] = {
8787
0 /* Sentinel */
8888
};
8989

90-
/* --- Unicode Database --------------------------------------------------- */
90+
/* --- Unicode Database Pages --------------------------------------------- */
9191

92-
const _PyUnicode_DatabaseRecord _PyUnicode_Database[65536] = {
92+
const _PyUnicode_DatabaseRecord _PyUnicode_Database_0[4096] = {
9393

9494
/* U+0000 */ { 13, 0, 15, 0, 0 },
9595
/* U+0001 */ { 13, 0, 15, 0, 0 },
@@ -4187,6 +4187,11 @@ const _PyUnicode_DatabaseRecord _PyUnicode_Database[65536] = {
41874187
/* U+0ffd */ { 0, 0, 0, 0, 0 },
41884188
/* U+0ffe */ { 0, 0, 0, 0, 0 },
41894189
/* U+0fff */ { 0, 0, 0, 0, 0 },
4190+
4191+
};
4192+
4193+
const _PyUnicode_DatabaseRecord _PyUnicode_Database_1[4096] = {
4194+
41904195
/* U+1000 */ { 19, 0, 1, 0, 0 },
41914196
/* U+1001 */ { 19, 0, 1, 0, 0 },
41924197
/* U+1002 */ { 19, 0, 1, 0, 0 },
@@ -8283,6 +8288,11 @@ const _PyUnicode_DatabaseRecord _PyUnicode_Database[65536] = {
82838288
/* U+1ffd */ { 29, 0, 19, 0, "00B4" },
82848289
/* U+1ffe */ { 29, 0, 19, 0, "<compat> 0020 0314" },
82858290
/* U+1fff */ { 0, 0, 0, 0, 0 },
8291+
8292+
};
8293+
8294+
const _PyUnicode_DatabaseRecord _PyUnicode_Database_2[4096] = {
8295+
82868296
/* U+2000 */ { 10, 0, 18, 0, "2002" },
82878297
/* U+2001 */ { 10, 0, 18, 0, "2003" },
82888298
/* U+2002 */ { 10, 0, 18, 0, "<compat> 0020" },
@@ -12379,6 +12389,11 @@ const _PyUnicode_DatabaseRecord _PyUnicode_Database[65536] = {
1237912389
/* U+2ffd */ { 0, 0, 0, 0, 0 },
1238012390
/* U+2ffe */ { 0, 0, 0, 0, 0 },
1238112391
/* U+2fff */ { 0, 0, 0, 0, 0 },
12392+
12393+
};
12394+
12395+
const _PyUnicode_DatabaseRecord _PyUnicode_Database_3[4096] = {
12396+
1238212397
/* U+3000 */ { 10, 0, 18, 0, "<wide> 0020" },
1238312398
/* U+3001 */ { 26, 0, 19, 0, 0 },
1238412399
/* U+3002 */ { 26, 0, 19, 0, 0 },
@@ -16475,6 +16490,11 @@ const _PyUnicode_DatabaseRecord _PyUnicode_Database[65536] = {
1647516490
/* U+3ffd */ { 0, 0, 0, 0, 0 },
1647616491
/* U+3ffe */ { 0, 0, 0, 0, 0 },
1647716492
/* U+3fff */ { 0, 0, 0, 0, 0 },
16493+
16494+
};
16495+
16496+
const _PyUnicode_DatabaseRecord _PyUnicode_Database_4[4096] = {
16497+
1647816498
/* U+4000 */ { 0, 0, 0, 0, 0 },
1647916499
/* U+4001 */ { 0, 0, 0, 0, 0 },
1648016500
/* U+4002 */ { 0, 0, 0, 0, 0 },
@@ -20571,6 +20591,11 @@ const _PyUnicode_DatabaseRecord _PyUnicode_Database[65536] = {
2057120591
/* U+4ffd */ { 0, 0, 0, 0, 0 },
2057220592
/* U+4ffe */ { 0, 0, 0, 0, 0 },
2057320593
/* U+4fff */ { 0, 0, 0, 0, 0 },
20594+
20595+
};
20596+
20597+
const _PyUnicode_DatabaseRecord _PyUnicode_Database_5[4096] = {
20598+
2057420599
/* U+5000 */ { 0, 0, 0, 0, 0 },
2057520600
/* U+5001 */ { 0, 0, 0, 0, 0 },
2057620601
/* U+5002 */ { 0, 0, 0, 0, 0 },
@@ -24667,6 +24692,11 @@ const _PyUnicode_DatabaseRecord _PyUnicode_Database[65536] = {
2466724692
/* U+5ffd */ { 0, 0, 0, 0, 0 },
2466824693
/* U+5ffe */ { 0, 0, 0, 0, 0 },
2466924694
/* U+5fff */ { 0, 0, 0, 0, 0 },
24695+
24696+
};
24697+
24698+
const _PyUnicode_DatabaseRecord _PyUnicode_Database_6[4096] = {
24699+
2467024700
/* U+6000 */ { 0, 0, 0, 0, 0 },
2467124701
/* U+6001 */ { 0, 0, 0, 0, 0 },
2467224702
/* U+6002 */ { 0, 0, 0, 0, 0 },
@@ -28763,6 +28793,11 @@ const _PyUnicode_DatabaseRecord _PyUnicode_Database[65536] = {
2876328793
/* U+6ffd */ { 0, 0, 0, 0, 0 },
2876428794
/* U+6ffe */ { 0, 0, 0, 0, 0 },
2876528795
/* U+6fff */ { 0, 0, 0, 0, 0 },
28796+
28797+
};
28798+
28799+
const _PyUnicode_DatabaseRecord _PyUnicode_Database_7[4096] = {
28800+
2876628801
/* U+7000 */ { 0, 0, 0, 0, 0 },
2876728802
/* U+7001 */ { 0, 0, 0, 0, 0 },
2876828803
/* U+7002 */ { 0, 0, 0, 0, 0 },
@@ -32859,6 +32894,11 @@ const _PyUnicode_DatabaseRecord _PyUnicode_Database[65536] = {
3285932894
/* U+7ffd */ { 0, 0, 0, 0, 0 },
3286032895
/* U+7ffe */ { 0, 0, 0, 0, 0 },
3286132896
/* U+7fff */ { 0, 0, 0, 0, 0 },
32897+
32898+
};
32899+
32900+
const _PyUnicode_DatabaseRecord _PyUnicode_Database_8[4096] = {
32901+
3286232902
/* U+8000 */ { 0, 0, 0, 0, 0 },
3286332903
/* U+8001 */ { 0, 0, 0, 0, 0 },
3286432904
/* U+8002 */ { 0, 0, 0, 0, 0 },
@@ -36955,6 +36995,11 @@ const _PyUnicode_DatabaseRecord _PyUnicode_Database[65536] = {
3695536995
/* U+8ffd */ { 0, 0, 0, 0, 0 },
3695636996
/* U+8ffe */ { 0, 0, 0, 0, 0 },
3695736997
/* U+8fff */ { 0, 0, 0, 0, 0 },
36998+
36999+
};
37000+
37001+
const _PyUnicode_DatabaseRecord _PyUnicode_Database_9[4096] = {
37002+
3695837003
/* U+9000 */ { 0, 0, 0, 0, 0 },
3695937004
/* U+9001 */ { 0, 0, 0, 0, 0 },
3696037005
/* U+9002 */ { 0, 0, 0, 0, 0 },
@@ -41051,6 +41096,11 @@ const _PyUnicode_DatabaseRecord _PyUnicode_Database[65536] = {
4105141096
/* U+9ffd */ { 0, 0, 0, 0, 0 },
4105241097
/* U+9ffe */ { 0, 0, 0, 0, 0 },
4105341098
/* U+9fff */ { 0, 0, 0, 0, 0 },
41099+
41100+
};
41101+
41102+
const _PyUnicode_DatabaseRecord _PyUnicode_Database_10[4096] = {
41103+
4105441104
/* U+a000 */ { 19, 0, 1, 0, 0 },
4105541105
/* U+a001 */ { 19, 0, 1, 0, 0 },
4105641106
/* U+a002 */ { 19, 0, 1, 0, 0 },
@@ -45147,6 +45197,11 @@ const _PyUnicode_DatabaseRecord _PyUnicode_Database[65536] = {
4514745197
/* U+affd */ { 0, 0, 0, 0, 0 },
4514845198
/* U+affe */ { 0, 0, 0, 0, 0 },
4514945199
/* U+afff */ { 0, 0, 0, 0, 0 },
45200+
45201+
};
45202+
45203+
const _PyUnicode_DatabaseRecord _PyUnicode_Database_11[4096] = {
45204+
4515045205
/* U+b000 */ { 0, 0, 0, 0, 0 },
4515145206
/* U+b001 */ { 0, 0, 0, 0, 0 },
4515245207
/* U+b002 */ { 0, 0, 0, 0, 0 },
@@ -49243,6 +49298,11 @@ const _PyUnicode_DatabaseRecord _PyUnicode_Database[65536] = {
4924349298
/* U+bffd */ { 0, 0, 0, 0, 0 },
4924449299
/* U+bffe */ { 0, 0, 0, 0, 0 },
4924549300
/* U+bfff */ { 0, 0, 0, 0, 0 },
49301+
49302+
};
49303+
49304+
const _PyUnicode_DatabaseRecord _PyUnicode_Database_12[4096] = {
49305+
4924649306
/* U+c000 */ { 0, 0, 0, 0, 0 },
4924749307
/* U+c001 */ { 0, 0, 0, 0, 0 },
4924849308
/* U+c002 */ { 0, 0, 0, 0, 0 },
@@ -53339,6 +53399,11 @@ const _PyUnicode_DatabaseRecord _PyUnicode_Database[65536] = {
5333953399
/* U+cffd */ { 0, 0, 0, 0, 0 },
5334053400
/* U+cffe */ { 0, 0, 0, 0, 0 },
5334153401
/* U+cfff */ { 0, 0, 0, 0, 0 },
53402+
53403+
};
53404+
53405+
const _PyUnicode_DatabaseRecord _PyUnicode_Database_13[4096] = {
53406+
5334253407
/* U+d000 */ { 0, 0, 0, 0, 0 },
5334353408
/* U+d001 */ { 0, 0, 0, 0, 0 },
5334453409
/* U+d002 */ { 0, 0, 0, 0, 0 },
@@ -57435,6 +57500,11 @@ const _PyUnicode_DatabaseRecord _PyUnicode_Database[65536] = {
5743557500
/* U+dffd */ { 0, 0, 0, 0, 0 },
5743657501
/* U+dffe */ { 0, 0, 0, 0, 0 },
5743757502
/* U+dfff */ { 15, 0, 1, 0, 0 },
57503+
57504+
};
57505+
57506+
const _PyUnicode_DatabaseRecord _PyUnicode_Database_14[4096] = {
57507+
5743857508
/* U+e000 */ { 16, 0, 1, 0, 0 },
5743957509
/* U+e001 */ { 0, 0, 0, 0, 0 },
5744057510
/* U+e002 */ { 0, 0, 0, 0, 0 },
@@ -61531,6 +61601,11 @@ const _PyUnicode_DatabaseRecord _PyUnicode_Database[65536] = {
6153161601
/* U+effd */ { 0, 0, 0, 0, 0 },
6153261602
/* U+effe */ { 0, 0, 0, 0, 0 },
6153361603
/* U+efff */ { 0, 0, 0, 0, 0 },
61604+
61605+
};
61606+
61607+
const _PyUnicode_DatabaseRecord _PyUnicode_Database_15[4096] = {
61608+
6153461609
/* U+f000 */ { 0, 0, 0, 0, 0 },
6153561610
/* U+f001 */ { 0, 0, 0, 0, 0 },
6153661611
/* U+f002 */ { 0, 0, 0, 0, 0 },
@@ -65627,4 +65702,27 @@ const _PyUnicode_DatabaseRecord _PyUnicode_Database[65536] = {
6562765702
/* U+fffd */ { 30, 0, 19, 0, 0 },
6562865703
/* U+fffe */ { 0, 0, 0, 0, 0 },
6562965704
/* U+ffff */ { 0, 0, 0, 0, 0 },
65705+
65706+
};
65707+
65708+
/* --- Unicode Database --------------------------------------------------- */
65709+
65710+
const _PyUnicode_DatabaseRecord *_PyUnicode_Database[16] = {
65711+
_PyUnicode_Database_0,
65712+
_PyUnicode_Database_1,
65713+
_PyUnicode_Database_2,
65714+
_PyUnicode_Database_3,
65715+
_PyUnicode_Database_4,
65716+
_PyUnicode_Database_5,
65717+
_PyUnicode_Database_6,
65718+
_PyUnicode_Database_7,
65719+
_PyUnicode_Database_8,
65720+
_PyUnicode_Database_9,
65721+
_PyUnicode_Database_10,
65722+
_PyUnicode_Database_11,
65723+
_PyUnicode_Database_12,
65724+
_PyUnicode_Database_13,
65725+
_PyUnicode_Database_14,
65726+
_PyUnicode_Database_15,
6563065727
};
65728+

Modules/unicodedatabase.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,4 +30,4 @@ extern const char *_PyUnicode_BidirectionalNames[21];
3030

3131
/* --- Unicode Database --------------------------------------------------- */
3232

33-
extern const _PyUnicode_DatabaseRecord _PyUnicode_Database[65536];
33+
extern const _PyUnicode_DatabaseRecord *_PyUnicode_Database[16];

0 commit comments

Comments
 (0)