|
14 | 14 | #include "Python.h" |
15 | 15 | #include "unicodedatabase.h" |
16 | 16 |
|
| 17 | +typedef struct { |
| 18 | + const unsigned char category; /* index into |
| 19 | + _PyUnicode_CategoryNames */ |
| 20 | + const unsigned char combining; /* combining class value 0 - 255 */ |
| 21 | + const unsigned char bidirectional; /* index into |
| 22 | + _PyUnicode_BidirectionalNames */ |
| 23 | + const unsigned char mirrored; /* true if mirrored in bidir mode */ |
| 24 | +} _PyUnicode_DatabaseRecord; |
| 25 | + |
| 26 | +/* data file generated by Tools/unicode/makeunicodedata.py */ |
| 27 | +#include "unicodedata_db.h" |
| 28 | + |
| 29 | +static const _PyUnicode_DatabaseRecord* |
| 30 | +getrecord(PyUnicodeObject* v) |
| 31 | +{ |
| 32 | + int code; |
| 33 | + int index; |
| 34 | + |
| 35 | + code = (int) *PyUnicode_AS_UNICODE(v); |
| 36 | + |
| 37 | + if (code < 0 || code >= 65536) |
| 38 | + index = 0; |
| 39 | + else { |
| 40 | + index = index1[(code>>SHIFT)]; |
| 41 | + index = index2[(index<<SHIFT)+(code&((1<<SHIFT)-1))]; |
| 42 | + } |
| 43 | + |
| 44 | + return &_PyUnicode_Database_Records[index]; |
| 45 | +} |
| 46 | + |
17 | 47 | /* --- Module API --------------------------------------------------------- */ |
18 | 48 |
|
19 | 49 | static PyObject * |
20 | | -unicodedata_decimal(PyObject *self, |
21 | | - PyObject *args) |
| 50 | +unicodedata_decimal(PyObject *self, PyObject *args) |
22 | 51 | { |
23 | 52 | PyUnicodeObject *v; |
24 | 53 | PyObject *defobj = NULL; |
25 | 54 | long rc; |
26 | 55 |
|
27 | 56 | if (!PyArg_ParseTuple(args, "O!|O:decimal", |
28 | 57 | &PyUnicode_Type, &v, &defobj)) |
29 | | - goto onError; |
| 58 | + return NULL; |
30 | 59 | if (PyUnicode_GET_SIZE(v) != 1) { |
31 | 60 | PyErr_SetString(PyExc_TypeError, |
32 | 61 | "need a single Unicode character as parameter"); |
33 | | - goto onError; |
| 62 | + return NULL; |
34 | 63 | } |
35 | 64 | rc = Py_UNICODE_TODECIMAL(*PyUnicode_AS_UNICODE(v)); |
36 | 65 | if (rc < 0) { |
37 | 66 | if (defobj == NULL) { |
38 | 67 | PyErr_SetString(PyExc_ValueError, |
39 | 68 | "not a decimal"); |
40 | | - goto onError; |
| 69 | + return NULL; |
41 | 70 | } |
42 | 71 | else { |
43 | 72 | Py_INCREF(defobj); |
44 | 73 | return defobj; |
45 | 74 | } |
46 | 75 | } |
47 | 76 | return PyInt_FromLong(rc); |
48 | | - |
49 | | - onError: |
50 | | - return NULL; |
51 | 77 | } |
52 | 78 |
|
53 | 79 | static PyObject * |
54 | | -unicodedata_digit(PyObject *self, |
55 | | - PyObject *args) |
| 80 | +unicodedata_digit(PyObject *self, PyObject *args) |
56 | 81 | { |
57 | 82 | PyUnicodeObject *v; |
58 | 83 | PyObject *defobj = NULL; |
59 | 84 | long rc; |
60 | 85 |
|
61 | 86 | if (!PyArg_ParseTuple(args, "O!|O:digit", |
62 | 87 | &PyUnicode_Type, &v, &defobj)) |
63 | | - goto onError; |
| 88 | + return NULL; |
64 | 89 | if (PyUnicode_GET_SIZE(v) != 1) { |
65 | 90 | PyErr_SetString(PyExc_TypeError, |
66 | 91 | "need a single Unicode character as parameter"); |
67 | | - goto onError; |
| 92 | + return NULL; |
68 | 93 | } |
69 | 94 | rc = Py_UNICODE_TODIGIT(*PyUnicode_AS_UNICODE(v)); |
70 | 95 | if (rc < 0) { |
71 | 96 | if (defobj == NULL) { |
72 | 97 | PyErr_SetString(PyExc_ValueError, |
73 | 98 | "not a digit"); |
74 | | - goto onError; |
| 99 | + return NULL; |
75 | 100 | } |
76 | 101 | else { |
77 | 102 | Py_INCREF(defobj); |
78 | 103 | return defobj; |
79 | 104 | } |
80 | 105 | } |
81 | 106 | return PyInt_FromLong(rc); |
82 | | - |
83 | | - onError: |
84 | | - return NULL; |
85 | 107 | } |
86 | 108 |
|
87 | 109 | static PyObject * |
88 | | -unicodedata_numeric(PyObject *self, |
89 | | - PyObject *args) |
| 110 | +unicodedata_numeric(PyObject *self, PyObject *args) |
90 | 111 | { |
91 | 112 | PyUnicodeObject *v; |
92 | 113 | PyObject *defobj = NULL; |
93 | 114 | double rc; |
94 | 115 |
|
95 | 116 | if (!PyArg_ParseTuple(args, "O!|O:numeric", |
96 | 117 | &PyUnicode_Type, &v, &defobj)) |
97 | | - goto onError; |
| 118 | + return NULL; |
98 | 119 | if (PyUnicode_GET_SIZE(v) != 1) { |
99 | 120 | PyErr_SetString(PyExc_TypeError, |
100 | 121 | "need a single Unicode character as parameter"); |
101 | | - goto onError; |
| 122 | + return NULL; |
102 | 123 | } |
103 | 124 | rc = Py_UNICODE_TONUMERIC(*PyUnicode_AS_UNICODE(v)); |
104 | 125 | if (rc < 0) { |
105 | 126 | if (defobj == NULL) { |
106 | 127 | PyErr_SetString(PyExc_ValueError, |
107 | 128 | "not a numeric character"); |
108 | | - goto onError; |
| 129 | + return NULL; |
109 | 130 | } |
110 | 131 | else { |
111 | 132 | Py_INCREF(defobj); |
112 | 133 | return defobj; |
113 | 134 | } |
114 | 135 | } |
115 | 136 | return PyFloat_FromDouble(rc); |
116 | | - |
117 | | - onError: |
118 | | - return NULL; |
119 | 137 | } |
120 | 138 |
|
121 | 139 | static PyObject * |
122 | | -unicodedata_category(PyObject *self, |
123 | | - PyObject *args) |
| 140 | +unicodedata_category(PyObject *self, PyObject *args) |
124 | 141 | { |
125 | 142 | PyUnicodeObject *v; |
126 | 143 | int index; |
127 | 144 |
|
128 | 145 | if (!PyArg_ParseTuple(args, "O!:category", |
129 | 146 | &PyUnicode_Type, &v)) |
130 | | - goto onError; |
| 147 | + return NULL; |
131 | 148 | if (PyUnicode_GET_SIZE(v) != 1) { |
132 | 149 | PyErr_SetString(PyExc_TypeError, |
133 | 150 | "need a single Unicode character as parameter"); |
134 | | - goto onError; |
| 151 | + return NULL; |
135 | 152 | } |
136 | | - index = (int) _PyUnicode_Database_GetRecord( |
137 | | - (int) *PyUnicode_AS_UNICODE(v) |
138 | | - )->category; |
| 153 | + index = (int) getrecord(v)->category; |
139 | 154 | return PyString_FromString(_PyUnicode_CategoryNames[index]); |
140 | | - |
141 | | - onError: |
142 | | - return NULL; |
143 | 155 | } |
144 | 156 |
|
145 | 157 | static PyObject * |
146 | | -unicodedata_bidirectional(PyObject *self, |
147 | | - PyObject *args) |
| 158 | +unicodedata_bidirectional(PyObject *self, PyObject *args) |
148 | 159 | { |
149 | 160 | PyUnicodeObject *v; |
150 | 161 | int index; |
151 | 162 |
|
152 | 163 | if (!PyArg_ParseTuple(args, "O!:bidirectional", |
153 | 164 | &PyUnicode_Type, &v)) |
154 | | - goto onError; |
| 165 | + return NULL; |
155 | 166 | if (PyUnicode_GET_SIZE(v) != 1) { |
156 | 167 | PyErr_SetString(PyExc_TypeError, |
157 | 168 | "need a single Unicode character as parameter"); |
158 | | - goto onError; |
| 169 | + return NULL; |
159 | 170 | } |
160 | | - index = (int) _PyUnicode_Database_GetRecord( |
161 | | - (int) *PyUnicode_AS_UNICODE(v) |
162 | | - )->bidirectional; |
| 171 | + index = (int) getrecord(v)->bidirectional; |
163 | 172 | return PyString_FromString(_PyUnicode_BidirectionalNames[index]); |
164 | | - |
165 | | - onError: |
166 | | - return NULL; |
167 | 173 | } |
168 | 174 |
|
169 | 175 | static PyObject * |
170 | | -unicodedata_combining(PyObject *self, |
171 | | - PyObject *args) |
| 176 | +unicodedata_combining(PyObject *self, PyObject *args) |
172 | 177 | { |
173 | 178 | PyUnicodeObject *v; |
174 | | - int value; |
175 | 179 |
|
176 | 180 | if (!PyArg_ParseTuple(args, "O!:combining", |
177 | 181 | &PyUnicode_Type, &v)) |
178 | | - goto onError; |
| 182 | + return NULL; |
179 | 183 | if (PyUnicode_GET_SIZE(v) != 1) { |
180 | 184 | PyErr_SetString(PyExc_TypeError, |
181 | 185 | "need a single Unicode character as parameter"); |
182 | | - goto onError; |
| 186 | + return NULL; |
183 | 187 | } |
184 | | - value = (int) _PyUnicode_Database_GetRecord( |
185 | | - (int) *PyUnicode_AS_UNICODE(v) |
186 | | - )->combining; |
187 | | - return PyInt_FromLong(value); |
188 | | - |
189 | | - onError: |
190 | | - return NULL; |
| 188 | + return PyInt_FromLong((int) getrecord(v)->combining); |
191 | 189 | } |
192 | 190 |
|
193 | 191 | static PyObject * |
194 | | -unicodedata_mirrored(PyObject *self, |
195 | | - PyObject *args) |
| 192 | +unicodedata_mirrored(PyObject *self, PyObject *args) |
196 | 193 | { |
197 | 194 | PyUnicodeObject *v; |
198 | | - int value; |
199 | 195 |
|
200 | 196 | if (!PyArg_ParseTuple(args, "O!:mirrored", |
201 | 197 | &PyUnicode_Type, &v)) |
202 | | - goto onError; |
| 198 | + return NULL; |
203 | 199 | if (PyUnicode_GET_SIZE(v) != 1) { |
204 | 200 | PyErr_SetString(PyExc_TypeError, |
205 | 201 | "need a single Unicode character as parameter"); |
206 | | - goto onError; |
| 202 | + return NULL; |
207 | 203 | } |
208 | | - value = (int) _PyUnicode_Database_GetRecord( |
209 | | - (int) *PyUnicode_AS_UNICODE(v) |
210 | | - )->mirrored; |
211 | | - return PyInt_FromLong(value); |
212 | | - |
213 | | - onError: |
214 | | - return NULL; |
| 204 | + return PyInt_FromLong((int) getrecord(v)->mirrored); |
215 | 205 | } |
216 | 206 |
|
217 | 207 | static PyObject * |
218 | | -unicodedata_decomposition(PyObject *self, |
219 | | - PyObject *args) |
| 208 | +unicodedata_decomposition(PyObject *self, PyObject *args) |
220 | 209 | { |
221 | 210 | PyUnicodeObject *v; |
222 | | - const char *value; |
| 211 | + char decomp[256]; |
| 212 | + int code, index, count, i; |
223 | 213 |
|
224 | 214 | if (!PyArg_ParseTuple(args, "O!:decomposition", |
225 | 215 | &PyUnicode_Type, &v)) |
226 | | - goto onError; |
| 216 | + return NULL; |
227 | 217 | if (PyUnicode_GET_SIZE(v) != 1) { |
228 | 218 | PyErr_SetString(PyExc_TypeError, |
229 | 219 | "need a single Unicode character as parameter"); |
230 | | - goto onError; |
| 220 | + return NULL; |
| 221 | + } |
| 222 | + |
| 223 | + code = (int) *PyUnicode_AS_UNICODE(v); |
| 224 | + |
| 225 | + if (code < 0 || code >= 65536) |
| 226 | + index = 0; |
| 227 | + else { |
| 228 | + index = decomp_index1[(code>>DECOMP_SHIFT)]; |
| 229 | + index = decomp_index2[(index<<DECOMP_SHIFT)+ |
| 230 | + (code&((1<<DECOMP_SHIFT)-1))]; |
| 231 | + } |
| 232 | + |
| 233 | + /* high byte is of hex bytes (usually one or two), low byte |
| 234 | + is prefix code (from*/ |
| 235 | + count = decomp_data[index] >> 8; |
| 236 | + |
| 237 | + /* XXX: could allocate the PyString up front instead |
| 238 | + (strlen(prefix) + 5 * count + 1 bytes) */ |
| 239 | + |
| 240 | + /* copy prefix */ |
| 241 | + i = strlen(decomp_prefix[decomp_data[index] & 255]); |
| 242 | + memcpy(decomp, decomp_prefix[decomp_data[index] & 255], i); |
| 243 | + |
| 244 | + while (count-- > 0) { |
| 245 | + if (i) |
| 246 | + decomp[i++] = ' '; |
| 247 | + sprintf(decomp + i, "%04X", decomp_data[++index]); |
| 248 | + i += strlen(decomp + i); |
231 | 249 | } |
232 | | - value = _PyUnicode_Database_GetDecomposition( |
233 | | - (int) *PyUnicode_AS_UNICODE(v) |
234 | | - ); |
235 | | - return PyString_FromString(value); |
236 | 250 |
|
237 | | - onError: |
238 | | - return NULL; |
| 251 | + decomp[i] = '\0'; |
| 252 | + |
| 253 | + return PyString_FromString(decomp); |
239 | 254 | } |
240 | 255 |
|
241 | 256 | /* XXX Add doc strings. */ |
|
0 commit comments