Python Codec Registry and support functions, written by Marc-Andre

gvanrossum · gvanrossum · commit feee4b994fa4 · 2000-03-10T22:57:27.000Z
Lemburg.
diff --git a/Python/codecs.c b/Python/codecs.c
@@ -0,0 +1,382 @@
+/* ------------------------------------------------------------------------
+
+   Python Codec Registry and support functions
+
+Written by Marc-Andre Lemburg (mal@lemburg.com).
+
+(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
+
+   ------------------------------------------------------------------------ */
+
+#include "Python.h"
+#include <ctype.h>
+
+/* --- Globals ------------------------------------------------------------ */
+
+static PyObject *_PyCodec_SearchPath;
+static PyObject *_PyCodec_SearchCache;
+
+/* Flag used for lazy import of the standard encodings package */
+static int import_encodings_called = 0;
+
+/* --- Codec Registry ----------------------------------------------------- */
+
+/* Import the standard encodings package which will register the first
+   codec search function. 
+
+   This is done in a lazy way so that the Unicode implementation does
+   not downgrade startup time of scripts not needing it.
+
+   Errors are silently ignored by this function. Only one try is made.
+
+*/
+
+static
+void import_encodings() 
+{
+    PyObject *mod;
+    
+    import_encodings_called = 1;
+    mod = PyImport_ImportModule("encodings");
+    if (mod == NULL) {
+	PyErr_Clear();
+	return;
+    }
+    Py_DECREF(mod);
+}
+
+/* Register a new codec search function.
+
+   The search_function's refcount is incremented by this function. */
+
+int PyCodec_Register(PyObject *search_function)
+{
+    if (!import_encodings_called)
+	import_encodings();
+    if (search_function == NULL) {
+	PyErr_BadArgument();
+	return -1;
+    }
+    if (!PyCallable_Check(search_function)) {
+	PyErr_SetString(PyExc_TypeError,
+			"argument must be callable");
+	return -1;
+    }
+    return PyList_Append(_PyCodec_SearchPath, search_function);
+}
+
+static
+PyObject *lowercasestring(const char *string)
+{
+    register int i;
+    int len = strlen(string);
+    char *p;
+    PyObject *v;
+    
+    v = PyString_FromStringAndSize(NULL, len);
+    if (v == NULL)
+	return NULL;
+    p = PyString_AS_STRING(v);
+    for (i = 0; i < len; i++)
+	p[i] = tolower(string[i]);
+    return v;
+}
+
+/* Lookup the given encoding and return a tuple providing the codec
+   facilities.
+
+   The encoding string is looked up converted to all lower-case
+   characters. This makes encodings looked up through this mechanism
+   effectively case-insensitive.
+
+   If no codec is found, a KeyError is set and NULL returned.  */
+
+PyObject *_PyCodec_Lookup(const char *encoding)
+{
+    PyObject *result, *args = NULL, *v;
+    int i, len;
+
+    if (!import_encodings_called)
+	import_encodings();
+
+    /* Convert the encoding to a lower-cased Python string */
+    v = lowercasestring(encoding);
+    if (v == NULL)
+	goto onError;
+    PyString_InternInPlace(&v);
+
+    /* First, try to lookup the name in the registry dictionary */
+    result = PyDict_GetItem(_PyCodec_SearchCache, v);
+    if (result != NULL) {
+	Py_INCREF(result);
+	return result;
+    }
+    
+    /* Next, scan the search functions in order of registration */
+    len = PyList_Size(_PyCodec_SearchPath);
+    if (len < 0)
+	goto onError;
+
+    args = PyTuple_New(1);
+    if (args == NULL)
+	goto onError;
+    PyTuple_SET_ITEM(args,0,v);
+
+    for (i = 0; i < len; i++) {
+	PyObject *func;
+	
+	func = PyList_GetItem(_PyCodec_SearchPath, i);
+	if (func == NULL)
+	    goto onError;
+	result = PyEval_CallObject(func,args);
+	if (result == NULL)
+	    goto onError;
+	if (result == Py_None) {
+	    Py_DECREF(result);
+	    continue;
+	}
+	if (!PyTuple_Check(result) || PyTuple_GET_SIZE(result) != 4) {
+	    PyErr_SetString(PyExc_TypeError,
+			    "codec search functions must return 4-tuples");
+	    Py_DECREF(result);
+	    goto onError;
+	}
+	break;
+    }
+    if (i == len) {
+	/* XXX Perhaps we should cache misses too ? */
+	PyErr_SetString(PyExc_LookupError,
+			"unkown encoding");
+	goto onError;
+    }
+
+    /* Cache and return the result */
+    PyDict_SetItem(_PyCodec_SearchCache, v, result);
+    Py_DECREF(args);
+    return result;
+
+ onError:
+    Py_XDECREF(args);
+    return NULL;
+}
+
+static
+PyObject *args_tuple(PyObject *object,
+		     const char *errors)
+{
+    PyObject *args;
+    
+    args = PyTuple_New(1 + (errors != NULL));
+    if (args == NULL)
+	return NULL;
+    Py_INCREF(object);
+    PyTuple_SET_ITEM(args,0,object);
+    if (errors) {
+	PyObject *v;
+	
+	v = PyString_FromString(errors);
+	if (v == NULL) {
+	    Py_DECREF(args);
+	    return NULL;
+	}
+	PyTuple_SET_ITEM(args, 1, v);
+    }
+    return args;
+}
+
+/* Build a codec by calling factory(stream[,errors]) or just
+   factory(errors) depending on whether the given parameters are
+   non-NULL. */
+
+static
+PyObject *build_stream_codec(PyObject *factory,
+			     PyObject *stream,
+			     const char *errors)
+{
+    PyObject *args, *codec;
+
+    args = args_tuple(stream, errors);
+    if (args == NULL)
+	return NULL;
+    
+    codec = PyEval_CallObject(factory, args);
+    Py_DECREF(args);
+    return codec;
+}
+
+/* Convenience APIs to query the Codec registry. 
+   
+   All APIs return a codec object with incremented refcount.
+   
+ */
+
+PyObject *PyCodec_Encoder(const char *encoding)
+{
+    PyObject *codecs;
+    PyObject *v;
+
+    codecs = _PyCodec_Lookup(encoding);
+    if (codecs == NULL)
+	goto onError;
+    v = PyTuple_GET_ITEM(codecs,0);
+    Py_INCREF(v);
+    return v;
+
+ onError:
+    return NULL;
+}
+
+PyObject *PyCodec_Decoder(const char *encoding)
+{
+    PyObject *codecs;
+    PyObject *v;
+
+    codecs = _PyCodec_Lookup(encoding);
+    if (codecs == NULL)
+	goto onError;
+    v = PyTuple_GET_ITEM(codecs,1);
+    Py_INCREF(v);
+    return v;
+
+ onError:
+    return NULL;
+}
+
+PyObject *PyCodec_StreamReader(const char *encoding,
+			       PyObject *stream,
+			       const char *errors)
+{
+    PyObject *codecs;
+
+    codecs = _PyCodec_Lookup(encoding);
+    if (codecs == NULL)
+	goto onError;
+    return build_stream_codec(PyTuple_GET_ITEM(codecs,2),stream,errors);
+
+ onError:
+    return NULL;
+}
+
+PyObject *PyCodec_StreamWriter(const char *encoding,
+			       PyObject *stream,
+			       const char *errors)
+{
+    PyObject *codecs;
+
+    codecs = _PyCodec_Lookup(encoding);
+    if (codecs == NULL)
+	goto onError;
+    return build_stream_codec(PyTuple_GET_ITEM(codecs,3),stream,errors);
+
+ onError:
+    return NULL;
+}
+
+/* Encode an object (e.g. an Unicode object) using the given encoding
+   and return the resulting encoded object (usually a Python string).
+
+   errors is passed to the encoder factory as argument if non-NULL. */
+
+PyObject *PyCodec_Encode(PyObject *object,
+			 const char *encoding,
+			 const char *errors)
+{
+    PyObject *encoder = NULL;
+    PyObject *args = NULL, *result;
+    PyObject *v;
+
+    encoder = PyCodec_Encoder(encoding);
+    if (encoder == NULL)
+	goto onError;
+
+    args = args_tuple(object, errors);
+    if (args == NULL)
+	goto onError;
+    
+    result = PyEval_CallObject(encoder,args);
+    if (result == NULL)
+	goto onError;
+
+    if (!PyTuple_Check(result) || 
+	PyTuple_GET_SIZE(result) != 2) {
+	PyErr_SetString(PyExc_TypeError,
+			"encoder must return a tuple (object,integer)");
+	goto onError;
+    }
+    v = PyTuple_GET_ITEM(result,0);
+    Py_INCREF(v);
+    /* We don't check or use the second (integer) entry. */
+
+    Py_DECREF(args);
+    Py_DECREF(encoder);
+    Py_DECREF(result);
+    return v;
+	
+ onError:
+    Py_XDECREF(args);
+    Py_XDECREF(encoder);
+    return NULL;
+}
+
+/* Decode an object (usually a Python string) using the given encoding
+   and return an equivalent object (e.g. an Unicode object).
+
+   errors is passed to the decoder factory as argument if non-NULL. */
+
+PyObject *PyCodec_Decode(PyObject *object,
+			 const char *encoding,
+			 const char *errors)
+{
+    PyObject *decoder = NULL;
+    PyObject *args = NULL, *result = NULL;
+    PyObject *v;
+
+    decoder = PyCodec_Decoder(encoding);
+    if (decoder == NULL)
+	goto onError;
+
+    args = args_tuple(object, errors);
+    if (args == NULL)
+	goto onError;
+    
+    result = PyEval_CallObject(decoder,args);
+    if (result == NULL)
+	goto onError;
+    if (!PyTuple_Check(result) || 
+	PyTuple_GET_SIZE(result) != 2) {
+	PyErr_SetString(PyExc_TypeError,
+			"decoder must return a tuple (object,integer)");
+	goto onError;
+    }
+    v = PyTuple_GET_ITEM(result,0);
+    Py_INCREF(v);
+    /* We don't check or use the second (integer) entry. */
+
+    Py_DECREF(args);
+    Py_DECREF(decoder);
+    Py_DECREF(result);
+    return v;
+	
+ onError:
+    Py_XDECREF(args);
+    Py_XDECREF(decoder);
+    Py_XDECREF(result);
+    return NULL;
+}
+
+void _PyCodecRegistry_Init()
+{
+    if (_PyCodec_SearchPath == NULL)
+	_PyCodec_SearchPath = PyList_New(0);
+    if (_PyCodec_SearchCache == NULL)
+	_PyCodec_SearchCache = PyDict_New();
+    if (_PyCodec_SearchPath == NULL || 
+	_PyCodec_SearchCache == NULL)
+	Py_FatalError("can't intialize codec registry");
+}
+
+void _PyCodecRegistry_Fini()
+{
+    Py_XDECREF(_PyCodec_SearchPath);
+    Py_XDECREF(_PyCodec_SearchCache);
+}