Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 0fdb90c

Browse files
author
Fredrik Lundh
committed
refactored the unicodeobject/ucnhash interface, to hide the
implementation details inside the ucnhash module. also cleaned up the unicode copyright blurb a little; Secret Labs' internal revision history isn't that interesting...
1 parent a2bf270 commit 0fdb90c

4 files changed

Lines changed: 146 additions & 210 deletions

File tree

Include/ucnhash.h

Lines changed: 27 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,29 @@
1+
/* Unicode name database interface */
12

2-
#include "Python.h"
3-
#include <stdlib.h>
4-
5-
/* --- C API ----------------------------------------------------*/
6-
/* C API for usage by other Python modules */
7-
typedef struct _Py_UCNHashAPI
8-
{
9-
unsigned long cKeys;
10-
unsigned long cchMax;
11-
unsigned long (*hash)(const char *key, unsigned int cch);
12-
const void *(*getValue)(unsigned long iKey);
13-
} _Py_UCNHashAPI;
14-
15-
typedef struct
16-
{
17-
const char *pszUCN;
18-
Py_UCS4 value;
19-
} _Py_UnicodeCharacterName;
3+
#ifndef Py_UCNHASH_H
4+
#define Py_UCNHASH_H
5+
#ifdef __cplusplus
6+
extern "C" {
7+
#endif
208

9+
/* revised ucnhash CAPI interface (exported through a PyCObject) */
10+
11+
typedef struct {
12+
13+
/* Size of this struct */
14+
int size;
15+
16+
/* Get name for a given character code. Returns non-zero if
17+
success, zero if not. Does not set Python exceptions. */
18+
int (*getname)(Py_UCS4 code, char* buffer, int buflen);
19+
20+
/* Get character code for a given name. Same error handling
21+
as for getname. */
22+
int (*getcode)(const char* name, int namelen, Py_UCS4* code);
23+
24+
} _PyUnicode_Name_CAPI;
25+
26+
#ifdef __cplusplus
27+
}
28+
#endif
29+
#endif /* !Py_UCNHASH_H */

Lib/test/test_ucn.py

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -50,22 +50,20 @@
5050

5151
# strict error testing:
5252
print "Testing unicode character name expansion strict error handling....",
53-
k_cchMaxUnicodeName = 83
54-
55-
s = "\N{" + "1" * (k_cchMaxUnicodeName + 2) + "}"
5653
try:
57-
unicode(s, 'unicode-escape', 'strict')
54+
unicode("\N{blah}", 'unicode-escape', 'strict')
5855
except UnicodeError:
5956
pass
6057
else:
61-
raise AssertionError, "failed to raise an exception when presented " \
62-
"with a UCN > k_cchMaxUnicodeName"
58+
raise AssertionError, "failed to raise an exception when given a bogus character name"
59+
6360
try:
64-
unicode("\N{blah}", 'unicode-escape', 'strict')
61+
unicode("\N{" + "x" * 100000 + "}", 'unicode-escape', 'strict')
6562
except UnicodeError:
6663
pass
6764
else:
68-
raise AssertionError, "failed to raise an exception when given a bogus character name"
65+
raise AssertionError, "failed to raise an exception when given a very " \
66+
"long bogus character name"
6967

7068
try:
7169
unicode("\N{SPACE", 'unicode-escape', 'strict')

Modules/ucnhash.c

Lines changed: 74 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,13 @@
1+
#include "Python.h"
12
#include "ucnhash.h"
23

4+
/* Modified for Python 2.1 by Fredrik Lundh ([email protected]) */
5+
6+
typedef struct {
7+
const char* pszUCN;
8+
Py_UCS4 value;
9+
}_Py_UnicodeCharacterName;
10+
311
/*
412
* The hash is produced using the algorithm described in
513
* "Optimal algorithms for minimal perfect hashing",
@@ -14,11 +22,11 @@
1422
* Generated on: Fri Jul 14 08:00:58 2000
1523
*/
1624

25+
#define cKeys 10538
1726
#define k_cHashElements 18836
1827
#define k_cchMaxKey 83
1928
#define k_cKeys 10538
2029

21-
2230
staticforward const unsigned short G[k_cHashElements];
2331
staticforward const _Py_UnicodeCharacterName aucn[k_cKeys];
2432

@@ -34,8 +42,7 @@ static long f1(const char *key, unsigned int cch)
3442
while (--len >= 0)
3543
{
3644
/* (1000003 * x) ^ toupper(*(p++))
37-
* translated to handle > 32 bit longs
38-
*/
45+
* translated to handle > 32 bit longs */
3946
x = (0xf4243 * x);
4047
x = x & 0xFFFFFFFF;
4148
x = x ^ toupper(*(p++));
@@ -98,110 +105,96 @@ static long f2(const char *key, unsigned int cch)
98105
}
99106

100107

101-
static unsigned long hash(const char *key, unsigned int cch)
108+
static unsigned long
109+
hash(const char *key, unsigned int cch)
102110
{
103111
return ((unsigned long)(G[ f1(key, cch) ]) + (unsigned long)(G[ f2(key, cch) ]) ) % k_cHashElements;
104112
}
105113

106-
const void *getValue(unsigned long iKey)
114+
const _Py_UnicodeCharacterName *
115+
getValue(unsigned long iKey)
107116
{
108-
return &aucn[iKey];
117+
return (_Py_UnicodeCharacterName *) &aucn[iKey];
109118
}
110119

111-
/* Helper for adding objects to dictionaries. Check for errors with
112-
PyErr_Occurred() */
113-
static
114-
void insobj(PyObject *dict,
115-
char *name,
116-
PyObject *v)
120+
static int
121+
mystrnicmp(const char *s1, const char *s2, size_t count)
117122
{
118-
PyDict_SetItemString(dict, name, v);
119-
Py_XDECREF(v);
123+
char c1, c2;
124+
125+
if (count) {
126+
do {
127+
c1 = tolower(*(s1++));
128+
c2 = tolower(*(s2++));
129+
} while (--count && c1 == c2);
130+
return c1 - c2;
131+
}
132+
133+
return 0;
120134
}
121135

122-
static const _Py_UCNHashAPI hashAPI =
136+
/* bindings for the new API */
137+
138+
static int
139+
ucnhash_getname(Py_UCS4 code, char* buffer, int buflen)
123140
{
124-
k_cKeys,
125-
k_cchMaxKey,
126-
&hash,
127-
&getValue,
141+
return 0;
142+
}
143+
144+
static int
145+
ucnhash_getcode(const char* name, int namelen, Py_UCS4* code)
146+
{
147+
unsigned long j;
148+
149+
j = hash(name, namelen);
150+
151+
if (j > cKeys || mystrnicmp(name, getValue(j)->pszUCN, namelen) != 0)
152+
return 0;
153+
154+
*code = getValue(j)->value;
155+
156+
return 1;
157+
}
158+
159+
static const _PyUnicode_Name_CAPI hashAPI =
160+
{
161+
sizeof(_PyUnicode_Name_CAPI),
162+
ucnhash_getname,
163+
ucnhash_getcode
128164
};
129165

130166
static
131-
PyMethodDef Module_methods[] =
167+
PyMethodDef ucnhash_methods[] =
132168
{
133169
{NULL, NULL},
134170
};
135171

136-
static char *Module_docstring = "ucnhash hash function module";
137-
138-
/* Error reporting for module init functions */
139-
140-
#define Py_ReportModuleInitError(modname) { \
141-
PyObject *exc_type, *exc_value, *exc_tb; \
142-
PyObject *str_type, *str_value; \
143-
\
144-
/* Fetch error objects and convert them to strings */ \
145-
PyErr_Fetch(&exc_type, &exc_value, &exc_tb); \
146-
if (exc_type && exc_value) { \
147-
str_type = PyObject_Str(exc_type); \
148-
str_value = PyObject_Str(exc_value); \
149-
} \
150-
else { \
151-
str_type = NULL; \
152-
str_value = NULL; \
153-
} \
154-
/* Try to format a more informative error message using the \
155-
original error */ \
156-
if (str_type && str_value && \
157-
PyString_Check(str_type) && PyString_Check(str_value)) \
158-
PyErr_Format( \
159-
PyExc_ImportError, \
160-
"initialization of module "modname" failed " \
161-
"(%s:%s)", \
162-
PyString_AS_STRING(str_type), \
163-
PyString_AS_STRING(str_value)); \
164-
else \
165-
PyErr_SetString( \
166-
PyExc_ImportError, \
167-
"initialization of module "modname" failed"); \
168-
Py_XDECREF(str_type); \
169-
Py_XDECREF(str_value); \
170-
Py_XDECREF(exc_type); \
171-
Py_XDECREF(exc_value); \
172-
Py_XDECREF(exc_tb); \
173-
}
172+
static char *ucnhash_docstring = "ucnhash hash function module";
174173

175174

176175
/* Create PyMethodObjects and register them in the module's dict */
177176
DL_EXPORT(void)
178177
initucnhash(void)
179178
{
180-
PyObject *module, *moddict;
181-
/* Create module */
182-
module = Py_InitModule4("ucnhash", /* Module name */
183-
Module_methods, /* Method list */
184-
Module_docstring, /* Module doc-string */
185-
(PyObject *)NULL, /* always pass this as *self */
186-
PYTHON_API_VERSION); /* API Version */
187-
if (module == NULL)
188-
goto onError;
189-
/* Add some constants to the module's dict */
190-
moddict = PyModule_GetDict(module);
191-
if (moddict == NULL)
192-
goto onError;
179+
PyObject *m, *d, *v;
180+
181+
m = Py_InitModule4(
182+
"ucnhash", /* Module name */
183+
ucnhash_methods, /* Method list */
184+
ucnhash_docstring, /* Module doc-string */
185+
(PyObject *)NULL, /* always pass this as *self */
186+
PYTHON_API_VERSION); /* API Version */
187+
if (!m)
188+
return;
189+
190+
d = PyModule_GetDict(m);
191+
if (!d)
192+
return;
193193

194194
/* Export C API */
195-
insobj(
196-
moddict,
197-
"ucnhashAPI",
198-
PyCObject_FromVoidPtr((void *)&hashAPI, NULL));
199-
200-
onError:
201-
/* Check for errors and report them */
202-
if (PyErr_Occurred())
203-
Py_ReportModuleInitError("ucnhash");
204-
return;
195+
v = PyCObject_FromVoidPtr((void *) &hashAPI, NULL);
196+
PyDict_SetItemString(d, "Unicode_Names_CAPI", v);
197+
Py_XDECREF(v);
205198
}
206199

207200
static const unsigned short G[] =

0 commit comments

Comments
 (0)