Added new parser markers 'et' and 'et#' which do not recode string

malemburg · malemburg · commit 6f15e5796e89 · 2001-05-02T17:16:16.000Z
objects but instead assume that they use the requested encoding.

This is needed on Windows to enable opening files by passing in
Unicode file names.
diff --git a/Doc/ext/ext.tex b/Doc/ext/ext.tex
@@ -736,6 +736,12 @@ \section{Extracting Parameters in Extension Functions
 storage. The caller is responsible for calling
 \cfunction{PyMem_Free()} to free the allocated buffer after usage.
 
+\item[\samp{et} (string, Unicode object or character buffer compatible
+object) {[const char *encoding, char **buffer]}]
+Same as \samp{es} except that string objects are passed through without
+recoding them. Instead, the implementation assumes that the string
+object uses the encoding passed in as parameter.
+
 \item[\samp{es\#} (string, Unicode object or character buffer compatible
 object) {[const char *encoding, char **buffer, int *buffer_length]}]
 This variant on \samp{s\#} is used for encoding Unicode and objects
@@ -767,6 +773,12 @@ \section{Extracting Parameters in Extension Functions
 In both cases, \var{*buffer_length} is set to the length of the
 encoded data without the trailing 0-byte.
 
+\item[\samp{et\#} (string, Unicode object or character buffer compatible
+object) {[const char *encoding, char **buffer]}]
+Same as \samp{es\#} except that string objects are passed through without
+recoding them. Instead, the implementation assumes that the string
+object uses the encoding passed in as parameter.
+
 \item[\samp{b} (integer) {[char]}]
 Convert a Python integer to a tiny int, stored in a C \ctype{char}.
 
diff --git a/Python/getargs.c b/Python/getargs.c
@@ -687,25 +687,39 @@ convertsimple1(PyObject *arg, char **p_format, va_list *p_va)
 			char **buffer;
 			const char *encoding;
 			PyObject *u, *s;
-			int size;
+			int size, recode_strings;
 
 			/* Get 'e' parameter: the encoding name */
 			encoding = (const char *)va_arg(*p_va, const char *);
 			if (encoding == NULL)
 			    	encoding = PyUnicode_GetDefaultEncoding();
 			
-			/* Get 's' parameter: the output buffer to use */
+			/* Get output buffer parameter:
+			     's' (recode all objects via Unicode) or
+			     't' (only recode non-string objects) 
+			*/
 			if (*format != 's')
+			    	recode_strings = 1;
+			else if (*format == 't')
+			    	recode_strings = 0;
+			else
 				return "(unknown parser marker combination)";
 			buffer = (char **)va_arg(*p_va, char **);
 			format++;
 			if (buffer == NULL)
 				return "(buffer is NULL)";
 			
+			/* Encode object */
+			if (!recode_strings && PyString_Check(arg)) {
+			    	s = arg;
+				Py_INCREF(s);
+			}
+			else {
 			/* Convert object to Unicode */
 			u = PyUnicode_FromObject(arg);
 			if (u == NULL)
-				return "string or unicode or text buffer";
+					return \
+				     "string or unicode or text buffer";
 			
 			/* Encode object; use default error handling */
 			s = PyUnicode_AsEncodedString(u,
@@ -716,7 +730,9 @@ convertsimple1(PyObject *arg, char **p_format, va_list *p_va)
 				return "(encoding failed)";
 			if (!PyString_Check(s)) {
 				Py_DECREF(s);
-				return "(encoder failed to return a string)";
+					return \
+				     "(encoder failed to return a string)";
+				}
 			}
 			size = PyString_GET_SIZE(s);