Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit feee4b9

Browse files
committed
Python Codec Registry and support functions, written by Marc-Andre
Lemburg.
1 parent 86016cb commit feee4b9

1 file changed

Lines changed: 382 additions & 0 deletions

File tree

Python/codecs.c

Lines changed: 382 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,382 @@
1+
/* ------------------------------------------------------------------------
2+
3+
Python Codec Registry and support functions
4+
5+
Written by Marc-Andre Lemburg ([email protected]).
6+
7+
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
8+
9+
------------------------------------------------------------------------ */
10+
11+
#include "Python.h"
12+
#include <ctype.h>
13+
14+
/* --- Globals ------------------------------------------------------------ */
15+
16+
static PyObject *_PyCodec_SearchPath;
17+
static PyObject *_PyCodec_SearchCache;
18+
19+
/* Flag used for lazy import of the standard encodings package */
20+
static int import_encodings_called = 0;
21+
22+
/* --- Codec Registry ----------------------------------------------------- */
23+
24+
/* Import the standard encodings package which will register the first
25+
codec search function.
26+
27+
This is done in a lazy way so that the Unicode implementation does
28+
not downgrade startup time of scripts not needing it.
29+
30+
Errors are silently ignored by this function. Only one try is made.
31+
32+
*/
33+
34+
static
35+
void import_encodings()
36+
{
37+
PyObject *mod;
38+
39+
import_encodings_called = 1;
40+
mod = PyImport_ImportModule("encodings");
41+
if (mod == NULL) {
42+
PyErr_Clear();
43+
return;
44+
}
45+
Py_DECREF(mod);
46+
}
47+
48+
/* Register a new codec search function.
49+
50+
The search_function's refcount is incremented by this function. */
51+
52+
int PyCodec_Register(PyObject *search_function)
53+
{
54+
if (!import_encodings_called)
55+
import_encodings();
56+
if (search_function == NULL) {
57+
PyErr_BadArgument();
58+
return -1;
59+
}
60+
if (!PyCallable_Check(search_function)) {
61+
PyErr_SetString(PyExc_TypeError,
62+
"argument must be callable");
63+
return -1;
64+
}
65+
return PyList_Append(_PyCodec_SearchPath, search_function);
66+
}
67+
68+
static
69+
PyObject *lowercasestring(const char *string)
70+
{
71+
register int i;
72+
int len = strlen(string);
73+
char *p;
74+
PyObject *v;
75+
76+
v = PyString_FromStringAndSize(NULL, len);
77+
if (v == NULL)
78+
return NULL;
79+
p = PyString_AS_STRING(v);
80+
for (i = 0; i < len; i++)
81+
p[i] = tolower(string[i]);
82+
return v;
83+
}
84+
85+
/* Lookup the given encoding and return a tuple providing the codec
86+
facilities.
87+
88+
The encoding string is looked up converted to all lower-case
89+
characters. This makes encodings looked up through this mechanism
90+
effectively case-insensitive.
91+
92+
If no codec is found, a KeyError is set and NULL returned. */
93+
94+
PyObject *_PyCodec_Lookup(const char *encoding)
95+
{
96+
PyObject *result, *args = NULL, *v;
97+
int i, len;
98+
99+
if (!import_encodings_called)
100+
import_encodings();
101+
102+
/* Convert the encoding to a lower-cased Python string */
103+
v = lowercasestring(encoding);
104+
if (v == NULL)
105+
goto onError;
106+
PyString_InternInPlace(&v);
107+
108+
/* First, try to lookup the name in the registry dictionary */
109+
result = PyDict_GetItem(_PyCodec_SearchCache, v);
110+
if (result != NULL) {
111+
Py_INCREF(result);
112+
return result;
113+
}
114+
115+
/* Next, scan the search functions in order of registration */
116+
len = PyList_Size(_PyCodec_SearchPath);
117+
if (len < 0)
118+
goto onError;
119+
120+
args = PyTuple_New(1);
121+
if (args == NULL)
122+
goto onError;
123+
PyTuple_SET_ITEM(args,0,v);
124+
125+
for (i = 0; i < len; i++) {
126+
PyObject *func;
127+
128+
func = PyList_GetItem(_PyCodec_SearchPath, i);
129+
if (func == NULL)
130+
goto onError;
131+
result = PyEval_CallObject(func,args);
132+
if (result == NULL)
133+
goto onError;
134+
if (result == Py_None) {
135+
Py_DECREF(result);
136+
continue;
137+
}
138+
if (!PyTuple_Check(result) || PyTuple_GET_SIZE(result) != 4) {
139+
PyErr_SetString(PyExc_TypeError,
140+
"codec search functions must return 4-tuples");
141+
Py_DECREF(result);
142+
goto onError;
143+
}
144+
break;
145+
}
146+
if (i == len) {
147+
/* XXX Perhaps we should cache misses too ? */
148+
PyErr_SetString(PyExc_LookupError,
149+
"unkown encoding");
150+
goto onError;
151+
}
152+
153+
/* Cache and return the result */
154+
PyDict_SetItem(_PyCodec_SearchCache, v, result);
155+
Py_DECREF(args);
156+
return result;
157+
158+
onError:
159+
Py_XDECREF(args);
160+
return NULL;
161+
}
162+
163+
static
164+
PyObject *args_tuple(PyObject *object,
165+
const char *errors)
166+
{
167+
PyObject *args;
168+
169+
args = PyTuple_New(1 + (errors != NULL));
170+
if (args == NULL)
171+
return NULL;
172+
Py_INCREF(object);
173+
PyTuple_SET_ITEM(args,0,object);
174+
if (errors) {
175+
PyObject *v;
176+
177+
v = PyString_FromString(errors);
178+
if (v == NULL) {
179+
Py_DECREF(args);
180+
return NULL;
181+
}
182+
PyTuple_SET_ITEM(args, 1, v);
183+
}
184+
return args;
185+
}
186+
187+
/* Build a codec by calling factory(stream[,errors]) or just
188+
factory(errors) depending on whether the given parameters are
189+
non-NULL. */
190+
191+
static
192+
PyObject *build_stream_codec(PyObject *factory,
193+
PyObject *stream,
194+
const char *errors)
195+
{
196+
PyObject *args, *codec;
197+
198+
args = args_tuple(stream, errors);
199+
if (args == NULL)
200+
return NULL;
201+
202+
codec = PyEval_CallObject(factory, args);
203+
Py_DECREF(args);
204+
return codec;
205+
}
206+
207+
/* Convenience APIs to query the Codec registry.
208+
209+
All APIs return a codec object with incremented refcount.
210+
211+
*/
212+
213+
PyObject *PyCodec_Encoder(const char *encoding)
214+
{
215+
PyObject *codecs;
216+
PyObject *v;
217+
218+
codecs = _PyCodec_Lookup(encoding);
219+
if (codecs == NULL)
220+
goto onError;
221+
v = PyTuple_GET_ITEM(codecs,0);
222+
Py_INCREF(v);
223+
return v;
224+
225+
onError:
226+
return NULL;
227+
}
228+
229+
PyObject *PyCodec_Decoder(const char *encoding)
230+
{
231+
PyObject *codecs;
232+
PyObject *v;
233+
234+
codecs = _PyCodec_Lookup(encoding);
235+
if (codecs == NULL)
236+
goto onError;
237+
v = PyTuple_GET_ITEM(codecs,1);
238+
Py_INCREF(v);
239+
return v;
240+
241+
onError:
242+
return NULL;
243+
}
244+
245+
PyObject *PyCodec_StreamReader(const char *encoding,
246+
PyObject *stream,
247+
const char *errors)
248+
{
249+
PyObject *codecs;
250+
251+
codecs = _PyCodec_Lookup(encoding);
252+
if (codecs == NULL)
253+
goto onError;
254+
return build_stream_codec(PyTuple_GET_ITEM(codecs,2),stream,errors);
255+
256+
onError:
257+
return NULL;
258+
}
259+
260+
PyObject *PyCodec_StreamWriter(const char *encoding,
261+
PyObject *stream,
262+
const char *errors)
263+
{
264+
PyObject *codecs;
265+
266+
codecs = _PyCodec_Lookup(encoding);
267+
if (codecs == NULL)
268+
goto onError;
269+
return build_stream_codec(PyTuple_GET_ITEM(codecs,3),stream,errors);
270+
271+
onError:
272+
return NULL;
273+
}
274+
275+
/* Encode an object (e.g. an Unicode object) using the given encoding
276+
and return the resulting encoded object (usually a Python string).
277+
278+
errors is passed to the encoder factory as argument if non-NULL. */
279+
280+
PyObject *PyCodec_Encode(PyObject *object,
281+
const char *encoding,
282+
const char *errors)
283+
{
284+
PyObject *encoder = NULL;
285+
PyObject *args = NULL, *result;
286+
PyObject *v;
287+
288+
encoder = PyCodec_Encoder(encoding);
289+
if (encoder == NULL)
290+
goto onError;
291+
292+
args = args_tuple(object, errors);
293+
if (args == NULL)
294+
goto onError;
295+
296+
result = PyEval_CallObject(encoder,args);
297+
if (result == NULL)
298+
goto onError;
299+
300+
if (!PyTuple_Check(result) ||
301+
PyTuple_GET_SIZE(result) != 2) {
302+
PyErr_SetString(PyExc_TypeError,
303+
"encoder must return a tuple (object,integer)");
304+
goto onError;
305+
}
306+
v = PyTuple_GET_ITEM(result,0);
307+
Py_INCREF(v);
308+
/* We don't check or use the second (integer) entry. */
309+
310+
Py_DECREF(args);
311+
Py_DECREF(encoder);
312+
Py_DECREF(result);
313+
return v;
314+
315+
onError:
316+
Py_XDECREF(args);
317+
Py_XDECREF(encoder);
318+
return NULL;
319+
}
320+
321+
/* Decode an object (usually a Python string) using the given encoding
322+
and return an equivalent object (e.g. an Unicode object).
323+
324+
errors is passed to the decoder factory as argument if non-NULL. */
325+
326+
PyObject *PyCodec_Decode(PyObject *object,
327+
const char *encoding,
328+
const char *errors)
329+
{
330+
PyObject *decoder = NULL;
331+
PyObject *args = NULL, *result = NULL;
332+
PyObject *v;
333+
334+
decoder = PyCodec_Decoder(encoding);
335+
if (decoder == NULL)
336+
goto onError;
337+
338+
args = args_tuple(object, errors);
339+
if (args == NULL)
340+
goto onError;
341+
342+
result = PyEval_CallObject(decoder,args);
343+
if (result == NULL)
344+
goto onError;
345+
if (!PyTuple_Check(result) ||
346+
PyTuple_GET_SIZE(result) != 2) {
347+
PyErr_SetString(PyExc_TypeError,
348+
"decoder must return a tuple (object,integer)");
349+
goto onError;
350+
}
351+
v = PyTuple_GET_ITEM(result,0);
352+
Py_INCREF(v);
353+
/* We don't check or use the second (integer) entry. */
354+
355+
Py_DECREF(args);
356+
Py_DECREF(decoder);
357+
Py_DECREF(result);
358+
return v;
359+
360+
onError:
361+
Py_XDECREF(args);
362+
Py_XDECREF(decoder);
363+
Py_XDECREF(result);
364+
return NULL;
365+
}
366+
367+
void _PyCodecRegistry_Init()
368+
{
369+
if (_PyCodec_SearchPath == NULL)
370+
_PyCodec_SearchPath = PyList_New(0);
371+
if (_PyCodec_SearchCache == NULL)
372+
_PyCodec_SearchCache = PyDict_New();
373+
if (_PyCodec_SearchPath == NULL ||
374+
_PyCodec_SearchCache == NULL)
375+
Py_FatalError("can't intialize codec registry");
376+
}
377+
378+
void _PyCodecRegistry_Fini()
379+
{
380+
Py_XDECREF(_PyCodec_SearchPath);
381+
Py_XDECREF(_PyCodec_SearchCache);
382+
}

0 commit comments

Comments
 (0)