Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit cfcea49

Browse files
author
Fredrik Lundh
committed
unicode database compression, step 2:
- fixed attributions - moved decomposition data to a separate table, in preparation for step 3 (which won't happen before 2.0 final, promise!) - use relative paths in the generator script I have a lot more stuff in the works for 2.1, but let's leave that for another day...
1 parent 2101348 commit cfcea49

5 files changed

Lines changed: 4611 additions & 4328 deletions

File tree

Modules/unicodedata.c

Lines changed: 18 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -4,17 +4,16 @@
44
55
Data was extracted from the Unicode 3.0 UnicodeData.txt file.
66
7-
Written by Marc-Andre Lemburg ([email protected]).
7+
Written by Marc-Andre Lemburg ([email protected]).
8+
Modified for Python 2.0 by Fredrik Lundh ([email protected])
89
9-
Copyright (c) Corporation for National Research Initiatives.
10+
Copyright (c) Corporation for National Research Initiatives.
1011
1112
------------------------------------------------------------------------ */
1213

1314
#include "Python.h"
1415
#include "unicodedatabase.h"
1516

16-
#define unicode_db _PyUnicode_Database_GetRecord
17-
1817
/* --- Module API --------------------------------------------------------- */
1918

2019
static PyObject *
@@ -134,15 +133,9 @@ unicodedata_category(PyObject *self,
134133
"need a single Unicode character as parameter");
135134
goto onError;
136135
}
137-
index = (int)unicode_db((int)*PyUnicode_AS_UNICODE(v))->category;
138-
if (index < 0 ||
139-
index > sizeof(_PyUnicode_CategoryNames) /
140-
sizeof(_PyUnicode_CategoryNames[0])) {
141-
PyErr_Format(PyExc_SystemError,
142-
"category index out of range: %i",
143-
index);
144-
goto onError;
145-
}
136+
index = (int) _PyUnicode_Database_GetRecord(
137+
(int) *PyUnicode_AS_UNICODE(v)
138+
)->category;
146139
return PyString_FromString(_PyUnicode_CategoryNames[index]);
147140

148141
onError:
@@ -164,15 +157,9 @@ unicodedata_bidirectional(PyObject *self,
164157
"need a single Unicode character as parameter");
165158
goto onError;
166159
}
167-
index = (int)unicode_db((int)*PyUnicode_AS_UNICODE(v))->bidirectional;
168-
if (index < 0 ||
169-
index > sizeof(_PyUnicode_CategoryNames) /
170-
sizeof(_PyUnicode_CategoryNames[0])) {
171-
PyErr_Format(PyExc_SystemError,
172-
"bidirectional index out of range: %i",
173-
index);
174-
goto onError;
175-
}
160+
index = (int) _PyUnicode_Database_GetRecord(
161+
(int) *PyUnicode_AS_UNICODE(v)
162+
)->bidirectional;
176163
return PyString_FromString(_PyUnicode_BidirectionalNames[index]);
177164

178165
onError:
@@ -194,7 +181,9 @@ unicodedata_combining(PyObject *self,
194181
"need a single Unicode character as parameter");
195182
goto onError;
196183
}
197-
value = (int)unicode_db((int)*PyUnicode_AS_UNICODE(v))->combining;
184+
value = (int) _PyUnicode_Database_GetRecord(
185+
(int) *PyUnicode_AS_UNICODE(v)
186+
)->combining;
198187
return PyInt_FromLong(value);
199188

200189
onError:
@@ -216,7 +205,9 @@ unicodedata_mirrored(PyObject *self,
216205
"need a single Unicode character as parameter");
217206
goto onError;
218207
}
219-
value = (int)unicode_db((int)*PyUnicode_AS_UNICODE(v))->mirrored;
208+
value = (int) _PyUnicode_Database_GetRecord(
209+
(int) *PyUnicode_AS_UNICODE(v)
210+
)->mirrored;
220211
return PyInt_FromLong(value);
221212

222213
onError:
@@ -238,10 +229,9 @@ unicodedata_decomposition(PyObject *self,
238229
"need a single Unicode character as parameter");
239230
goto onError;
240231
}
241-
value = unicode_db((int)*PyUnicode_AS_UNICODE(v))->decomposition;
242-
if (value == NULL)
243-
return PyString_FromString("");
244-
else
232+
value = _PyUnicode_Database_GetDecomposition(
233+
(int) *PyUnicode_AS_UNICODE(v)
234+
);
245235
return PyString_FromString(value);
246236

247237
onError:

0 commit comments

Comments
 (0)