Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 3aeb632

Browse files
committed
PEP 293 implemention (from SF patch http://www.python.org/sf/432401)
1 parent 94fab76 commit 3aeb632

12 files changed

Lines changed: 2929 additions & 556 deletions

File tree

Doc/lib/libcodecs.tex

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ \section{\module{codecs} ---
1717

1818
This module defines base classes for standard Python codecs (encoders
1919
and decoders) and provides access to the internal Python codec
20-
registry which manages the codec lookup process.
20+
registry which manages the codec and error handling lookup process.
2121

2222
It defines the following functions:
2323

@@ -98,6 +98,43 @@ \section{\module{codecs} ---
9898
To simplify working with encoded files or stream, the module
9999
also defines these utility functions:
100100

101+
\begin{funcdesc}{register_error}{name, error_handler}
102+
Register the error handling function \var{error_handler} under the
103+
name \var{name}. \vari{error_handler} will be called during encoding
104+
and decoding in case of an error, when \var{name} is specified as the
105+
errors parameter. \var{error_handler} will be called with an
106+
\exception{UnicodeEncodeError}, \exception{UnicodeDecodeError} or
107+
\exception{UnicodeTranslateError} instance and must return a tuple
108+
with a replacement for the unencodable/undecodable part of the input
109+
and a position where encoding/decoding should continue.
110+
\end{funcdesc}
111+
112+
\begin{funcdesc}{lookup_error}{name}
113+
Return the error handler previously register under the name \var{name}.
114+
115+
Raises a \exception{LookupError} in case the handler cannot be found.
116+
\end{funcdesc}
117+
118+
\begin{funcdesc}{strict_errors}{exception}
119+
Implements the \code{strict} error handling.
120+
\end{funcdesc}
121+
122+
\begin{funcdesc}{replace_errors}{exception}
123+
Implements the \code{replace} error handling.
124+
\end{funcdesc}
125+
126+
\begin{funcdesc}{ignore_errors}{exception}
127+
Implements the \code{ignore} error handling.
128+
\end{funcdesc}
129+
130+
\begin{funcdesc}{xmlcharrefreplace_errors_errors}{exception}
131+
Implements the \code{xmlcharrefreplace} error handling.
132+
\end{funcdesc}
133+
134+
\begin{funcdesc}{backslashreplace_errors_errors}{exception}
135+
Implements the \code{backslashreplace} error handling.
136+
\end{funcdesc}
137+
101138
\begin{funcdesc}{open}{filename, mode\optional{, encoding\optional{,
102139
errors\optional{, buffering}}}}
103140
Open an encoded file using the given \var{mode} and return

Doc/lib/libexcs.tex

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -335,6 +335,24 @@ \section{Built-in Exceptions}
335335
\versionadded{2.0}
336336
\end{excdesc}
337337

338+
\begin{excdesc}{UnicodeEncodeError}
339+
Raised when a Unicode-related error occurs during encoding. It
340+
is a subclass of \exception{UnicodeError}.
341+
\versionadded{2.3}
342+
\end{excdesc}
343+
344+
\begin{excdesc}{UnicodeDecodeError}
345+
Raised when a Unicode-related error occurs during decoding. It
346+
is a subclass of \exception{UnicodeError}.
347+
\versionadded{2.3}
348+
\end{excdesc}
349+
350+
\begin{excdesc}{UnicodeTranslateError}
351+
Raised when a Unicode-related error occurs during translating. It
352+
is a subclass of \exception{UnicodeError}.
353+
\versionadded{2.3}
354+
\end{excdesc}
355+
338356
\begin{excdesc}{ValueError}
339357
Raised when a built-in operation or function receives an argument
340358
that has the right type but an inappropriate value, and the
@@ -426,6 +444,9 @@ \section{Built-in Exceptions}
426444
| | +-- FloatingPointError
427445
| +-- ValueError
428446
| | +-- UnicodeError
447+
| | +-- UnicodeEncodeError
448+
| | +-- UnicodeDecodeError
449+
| | +-- UnicodeTranslateError
429450
| +-- ReferenceError
430451
| +-- SystemError
431452
| +-- MemoryError

Include/codecs.h

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,36 @@ PyAPI_FUNC(PyObject *) PyCodec_StreamWriter(
117117
const char *errors
118118
);
119119

120+
/* Unicode encoding error handling callback registry API */
121+
122+
/* Register the error handling callback function error under the name
123+
name. This function will be called by the codec when it encounters
124+
unencodable characters/undecodable bytes and doesn't know the
125+
callback name, when name is specified as the error parameter
126+
in the call to the encode/decode function.
127+
Return 0 on success, -1 on error */
128+
PyAPI_FUNC(int) PyCodec_RegisterError(const char *name, PyObject *error);
129+
130+
/* Lookup the error handling callback function registered under the
131+
name error. As a special case NULL can be passed, in which case
132+
the error handling callback for "strict" will be returned. */
133+
PyAPI_FUNC(PyObject *) PyCodec_LookupError(const char *name);
134+
135+
/* raise exc as an exception */
136+
PyAPI_FUNC(PyObject *) PyCodec_StrictErrors(PyObject *exc);
137+
138+
/* ignore the unicode error, skipping the faulty input */
139+
PyAPI_FUNC(PyObject *) PyCodec_IgnoreErrors(PyObject *exc);
140+
141+
/* replace the unicode error with ? or U+FFFD */
142+
PyAPI_FUNC(PyObject *) PyCodec_ReplaceErrors(PyObject *exc);
143+
144+
/* replace the unicode encode error with XML character references */
145+
PyAPI_FUNC(PyObject *) PyCodec_XMLCharRefReplaceErrors(PyObject *exc);
146+
147+
/* replace the unicode encode error with backslash escapes (\x, \u and \U) */
148+
PyAPI_FUNC(PyObject *) PyCodec_BackslashReplaceErrors(PyObject *exc);
149+
120150
#ifdef __cplusplus
121151
}
122152
#endif

Include/pyerrors.h

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,9 @@ PyAPI_DATA(PyObject *) PyExc_SystemExit;
5454
PyAPI_DATA(PyObject *) PyExc_TypeError;
5555
PyAPI_DATA(PyObject *) PyExc_UnboundLocalError;
5656
PyAPI_DATA(PyObject *) PyExc_UnicodeError;
57+
PyAPI_DATA(PyObject *) PyExc_UnicodeEncodeError;
58+
PyAPI_DATA(PyObject *) PyExc_UnicodeDecodeError;
59+
PyAPI_DATA(PyObject *) PyExc_UnicodeTranslateError;
5760
PyAPI_DATA(PyObject *) PyExc_ValueError;
5861
PyAPI_DATA(PyObject *) PyExc_ZeroDivisionError;
5962
#ifdef MS_WINDOWS
@@ -114,6 +117,69 @@ PyAPI_FUNC(void) PyErr_SetInterrupt(void);
114117
PyAPI_FUNC(void) PyErr_SyntaxLocation(char *, int);
115118
PyAPI_FUNC(PyObject *) PyErr_ProgramText(char *, int);
116119

120+
/* The following functions are used to create and modify unicode
121+
exceptions from C */
122+
/* create a UnicodeDecodeError object */
123+
PyAPI_FUNC(PyObject *) PyUnicodeDecodeError_Create(
124+
const char *, const char *, int, int, int, const char *);
125+
126+
/* create a UnicodeEncodeError object */
127+
PyAPI_FUNC(PyObject *) PyUnicodeEncodeError_Create(
128+
const char *, const Py_UNICODE *, int, int, int, const char *);
129+
130+
/* create a UnicodeTranslateError object */
131+
PyAPI_FUNC(PyObject *) PyUnicodeTranslateError_Create(
132+
const Py_UNICODE *, int, int, int, const char *);
133+
134+
/* get the encoding attribute */
135+
PyAPI_FUNC(PyObject *) PyUnicodeEncodeError_GetEncoding(PyObject *);
136+
PyAPI_FUNC(PyObject *) PyUnicodeDecodeError_GetEncoding(PyObject *);
137+
PyAPI_FUNC(PyObject *) PyUnicodeTranslateError_GetEncoding(PyObject *);
138+
139+
/* get the object attribute */
140+
PyAPI_FUNC(PyObject *) PyUnicodeEncodeError_GetObject(PyObject *);
141+
PyAPI_FUNC(PyObject *) PyUnicodeDecodeError_GetObject(PyObject *);
142+
PyAPI_FUNC(PyObject *) PyUnicodeTranslateError_GetObject(PyObject *);
143+
144+
/* get the value of the start attribute (the int * may not be NULL)
145+
return 0 on success, -1 on failure */
146+
PyAPI_FUNC(int) PyUnicodeEncodeError_GetStart(PyObject *, int *);
147+
PyAPI_FUNC(int) PyUnicodeDecodeError_GetStart(PyObject *, int *);
148+
PyAPI_FUNC(int) PyUnicodeTranslateError_GetStart(PyObject *, int *);
149+
150+
/* assign a new value to the start attribute
151+
return 0 on success, -1 on failure */
152+
PyAPI_FUNC(int) PyUnicodeEncodeError_SetStart(PyObject *, int);
153+
PyAPI_FUNC(int) PyUnicodeDecodeError_SetStart(PyObject *, int);
154+
PyAPI_FUNC(int) PyUnicodeTranslateError_SetStart(PyObject *, int);
155+
156+
/* get the value of the end attribute (the int *may not be NULL)
157+
return 0 on success, -1 on failure */
158+
PyAPI_FUNC(int) PyUnicodeEncodeError_GetEnd(PyObject *, int *);
159+
PyAPI_FUNC(int) PyUnicodeDecodeError_GetEnd(PyObject *, int *);
160+
PyAPI_FUNC(int) PyUnicodeTranslateError_GetEnd(PyObject *, int *);
161+
162+
/* assign a new value to the end attribute
163+
return 0 on success, -1 on failure */
164+
PyAPI_FUNC(int) PyUnicodeEncodeError_SetEnd(PyObject *, int);
165+
PyAPI_FUNC(int) PyUnicodeDecodeError_SetEnd(PyObject *, int);
166+
PyAPI_FUNC(int) PyUnicodeTranslateError_SetEnd(PyObject *, int);
167+
168+
/* get the value of the reason attribute */
169+
PyAPI_FUNC(PyObject *) PyUnicodeEncodeError_GetReason(PyObject *);
170+
PyAPI_FUNC(PyObject *) PyUnicodeDecodeError_GetReason(PyObject *);
171+
PyAPI_FUNC(PyObject *) PyUnicodeTranslateError_GetReason(PyObject *);
172+
173+
/* assign a new value to the reason attribute
174+
return 0 on success, -1 on failure */
175+
PyAPI_FUNC(int) PyUnicodeEncodeError_SetReason(
176+
PyObject *, const char *);
177+
PyAPI_FUNC(int) PyUnicodeDecodeError_SetReason(
178+
PyObject *, const char *);
179+
PyAPI_FUNC(int) PyUnicodeTranslateError_SetReason(
180+
PyObject *, const char *);
181+
182+
117183
/* These APIs aren't really part of the error implementation, but
118184
often needed to format error messages; the native C lib APIs are
119185
not available on all platforms, which is why we provide emulations

Lib/codecs.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,10 @@
2020
__all__ = ["register", "lookup", "open", "EncodedFile", "BOM", "BOM_BE",
2121
"BOM_LE", "BOM32_BE", "BOM32_LE", "BOM64_BE", "BOM64_LE",
2222
"BOM_UTF8", "BOM_UTF16", "BOM_UTF16_LE", "BOM_UTF16_BE",
23-
"BOM_UTF32", "BOM_UTF32_LE", "BOM_UTF32_BE"]
23+
"BOM_UTF32", "BOM_UTF32_LE", "BOM_UTF32_BE",
24+
"strict_errors", "ignore_errors", "replace_errors",
25+
"xmlcharrefreplace_errors",
26+
"register_error", "lookup_error"]
2427

2528
### Constants
2629

@@ -632,6 +635,14 @@ def make_encoding_map(decoding_map):
632635
m[v] = None
633636
return m
634637

638+
### error handlers
639+
640+
strict_errors = lookup_error("strict")
641+
ignore_errors = lookup_error("ignore")
642+
replace_errors = lookup_error("replace")
643+
xmlcharrefreplace_errors = lookup_error("xmlcharrefreplace")
644+
backslashreplace_errors = lookup_error("backslashreplace")
645+
635646
# Tell modulefinder that using codecs probably needs the encodings
636647
# package
637648
_false = 0

0 commit comments

Comments
 (0)