Thanks to visit codestin.com
Credit goes to github.com

Skip to content

gh-102406: replace exception chaining by PEP-678 notes in codecs #102407

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Mar 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 0 additions & 18 deletions Include/cpython/pyerrors.h
Original file line number Diff line number Diff line change
Expand Up @@ -116,24 +116,6 @@ PyAPI_FUNC(int) _PyException_AddNote(
PyObject *exc,
PyObject *note);

/* Helper that attempts to replace the current exception with one of the
* same type but with a prefix added to the exception text. The resulting
* exception description looks like:
*
* prefix (exc_type: original_exc_str)
*
* Only some exceptions can be safely replaced. If the function determines
* it isn't safe to perform the replacement, it will leave the original
* unmodified exception in place.
*
* Returns a borrowed reference to the new exception (if any), NULL if the
* existing exception was left in place.
*/
PyAPI_FUNC(PyObject *) _PyErr_TrySetFromCause(
const char *prefix_format, /* ASCII-encoded string */
...
);

/* In signalmodule.c */

int PySignal_SetWakeupFd(int fd);
Expand Down
106 changes: 42 additions & 64 deletions Lib/test/test_codecs.py
Original file line number Diff line number Diff line change
Expand Up @@ -2819,24 +2819,19 @@ def test_binary_to_text_denylists_text_transforms(self):
self.assertIsNone(failure.exception.__cause__)

@unittest.skipUnless(zlib, "Requires zlib support")
def test_custom_zlib_error_is_wrapped(self):
def test_custom_zlib_error_is_noted(self):
# Check zlib codec gives a good error for malformed input
msg = "^decoding with 'zlib_codec' codec failed"
with self.assertRaisesRegex(Exception, msg) as failure:
msg = "decoding with 'zlib_codec' codec failed"
with self.assertRaises(Exception) as failure:
codecs.decode(b"hello", "zlib_codec")
self.assertIsInstance(failure.exception.__cause__,
type(failure.exception))
self.assertEqual(msg, failure.exception.__notes__[0])

def test_custom_hex_error_is_wrapped(self):
def test_custom_hex_error_is_noted(self):
# Check hex codec gives a good error for malformed input
msg = "^decoding with 'hex_codec' codec failed"
with self.assertRaisesRegex(Exception, msg) as failure:
msg = "decoding with 'hex_codec' codec failed"
with self.assertRaises(Exception) as failure:
codecs.decode(b"hello", "hex_codec")
self.assertIsInstance(failure.exception.__cause__,
type(failure.exception))

# Unfortunately, the bz2 module throws OSError, which the codec
# machinery currently can't wrap :(
self.assertEqual(msg, failure.exception.__notes__[0])

# Ensure codec aliases from http://bugs.python.org/issue7475 work
def test_aliases(self):
Expand All @@ -2860,11 +2855,8 @@ def test_uu_invalid(self):
self.assertRaises(ValueError, codecs.decode, b"", "uu-codec")


# The codec system tries to wrap exceptions in order to ensure the error
# mentions the operation being performed and the codec involved. We
# currently *only* want this to happen for relatively stateless
# exceptions, where the only significant information they contain is their
# type and a single str argument.
# The codec system tries to add notes to exceptions in order to ensure
# the error mentions the operation being performed and the codec involved.

# Use a local codec registry to avoid appearing to leak objects when
# registering multiple search functions
Expand All @@ -2874,10 +2866,10 @@ def _get_test_codec(codec_name):
return _TEST_CODECS.get(codec_name)


class ExceptionChainingTest(unittest.TestCase):
class ExceptionNotesTest(unittest.TestCase):

def setUp(self):
self.codec_name = 'exception_chaining_test'
self.codec_name = 'exception_notes_test'
codecs.register(_get_test_codec)
self.addCleanup(codecs.unregister, _get_test_codec)

Expand All @@ -2901,91 +2893,77 @@ def set_codec(self, encode, decode):
_TEST_CODECS[self.codec_name] = codec_info

@contextlib.contextmanager
def assertWrapped(self, operation, exc_type, msg):
full_msg = r"{} with {!r} codec failed \({}: {}\)".format(
operation, self.codec_name, exc_type.__name__, msg)
with self.assertRaisesRegex(exc_type, full_msg) as caught:
def assertNoted(self, operation, exc_type, msg):
full_msg = r"{} with {!r} codec failed".format(
operation, self.codec_name)
with self.assertRaises(exc_type) as caught:
yield caught
self.assertIsInstance(caught.exception.__cause__, exc_type)
self.assertIsNotNone(caught.exception.__cause__.__traceback__)
self.assertIn(full_msg, caught.exception.__notes__[0])
caught.exception.__notes__.clear()

def raise_obj(self, *args, **kwds):
# Helper to dynamically change the object raised by a test codec
raise self.obj_to_raise

def check_wrapped(self, obj_to_raise, msg, exc_type=RuntimeError):
def check_note(self, obj_to_raise, msg, exc_type=RuntimeError):
self.obj_to_raise = obj_to_raise
self.set_codec(self.raise_obj, self.raise_obj)
with self.assertWrapped("encoding", exc_type, msg):
with self.assertNoted("encoding", exc_type, msg):
"str_input".encode(self.codec_name)
with self.assertWrapped("encoding", exc_type, msg):
with self.assertNoted("encoding", exc_type, msg):
codecs.encode("str_input", self.codec_name)
with self.assertWrapped("decoding", exc_type, msg):
with self.assertNoted("decoding", exc_type, msg):
b"bytes input".decode(self.codec_name)
with self.assertWrapped("decoding", exc_type, msg):
with self.assertNoted("decoding", exc_type, msg):
codecs.decode(b"bytes input", self.codec_name)

def test_raise_by_type(self):
self.check_wrapped(RuntimeError, "")
self.check_note(RuntimeError, "")

def test_raise_by_value(self):
msg = "This should be wrapped"
self.check_wrapped(RuntimeError(msg), msg)
msg = "This should be noted"
self.check_note(RuntimeError(msg), msg)

def test_raise_grandchild_subclass_exact_size(self):
msg = "This should be wrapped"
msg = "This should be noted"
class MyRuntimeError(RuntimeError):
__slots__ = ()
self.check_wrapped(MyRuntimeError(msg), msg, MyRuntimeError)
self.check_note(MyRuntimeError(msg), msg, MyRuntimeError)

def test_raise_subclass_with_weakref_support(self):
msg = "This should be wrapped"
msg = "This should be noted"
class MyRuntimeError(RuntimeError):
pass
self.check_wrapped(MyRuntimeError(msg), msg, MyRuntimeError)

def check_not_wrapped(self, obj_to_raise, msg):
def raise_obj(*args, **kwds):
raise obj_to_raise
self.set_codec(raise_obj, raise_obj)
with self.assertRaisesRegex(RuntimeError, msg):
"str input".encode(self.codec_name)
with self.assertRaisesRegex(RuntimeError, msg):
codecs.encode("str input", self.codec_name)
with self.assertRaisesRegex(RuntimeError, msg):
b"bytes input".decode(self.codec_name)
with self.assertRaisesRegex(RuntimeError, msg):
codecs.decode(b"bytes input", self.codec_name)
self.check_note(MyRuntimeError(msg), msg, MyRuntimeError)

def test_init_override_is_not_wrapped(self):
def test_init_override(self):
class CustomInit(RuntimeError):
def __init__(self):
pass
self.check_not_wrapped(CustomInit, "")
self.check_note(CustomInit, "")

def test_new_override_is_not_wrapped(self):
def test_new_override(self):
class CustomNew(RuntimeError):
def __new__(cls):
return super().__new__(cls)
self.check_not_wrapped(CustomNew, "")
self.check_note(CustomNew, "")

def test_instance_attribute_is_not_wrapped(self):
msg = "This should NOT be wrapped"
def test_instance_attribute(self):
msg = "This should be noted"
exc = RuntimeError(msg)
exc.attr = 1
self.check_not_wrapped(exc, "^{}$".format(msg))
self.check_note(exc, "^{}$".format(msg))

def test_non_str_arg_is_not_wrapped(self):
self.check_not_wrapped(RuntimeError(1), "1")
def test_non_str_arg(self):
self.check_note(RuntimeError(1), "1")

def test_multiple_args_is_not_wrapped(self):
def test_multiple_args(self):
msg_re = r"^\('a', 'b', 'c'\)$"
self.check_not_wrapped(RuntimeError('a', 'b', 'c'), msg_re)
self.check_note(RuntimeError('a', 'b', 'c'), msg_re)

# http://bugs.python.org/issue19609
def test_codec_lookup_failure_not_wrapped(self):
def test_codec_lookup_failure(self):
msg = "^unknown encoding: {}$".format(self.codec_name)
# The initial codec lookup should not be wrapped
with self.assertRaisesRegex(LookupError, msg):
"str input".encode(self.codec_name)
with self.assertRaisesRegex(LookupError, msg):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
:mod:`codecs` encoding/decoding errors now get the context information (which operation and which codecs) attached as :pep:`678` notes instead of through chaining a new instance of the exception.
110 changes: 0 additions & 110 deletions Objects/exceptions.c
Original file line number Diff line number Diff line change
Expand Up @@ -3764,113 +3764,3 @@ _PyException_AddNote(PyObject *exc, PyObject *note)
return res;
}

/* Helper to do the equivalent of "raise X from Y" in C, but always using
* the current exception rather than passing one in.
*
* We currently limit this to *only* exceptions that use the BaseException
* tp_init and tp_new methods, since we can be reasonably sure we can wrap
* those correctly without losing data and without losing backwards
* compatibility.
*
* We also aim to rule out *all* exceptions that might be storing additional
* state, whether by having a size difference relative to BaseException,
* additional arguments passed in during construction or by having a
* non-empty instance dict.
*
* We need to be very careful with what we wrap, since changing types to
* a broader exception type would be backwards incompatible for
* existing codecs, and with different init or new method implementations
* may either not support instantiation with PyErr_Format or lose
* information when instantiated that way.
*
* XXX (ncoghlan): This could be made more comprehensive by exploiting the
* fact that exceptions are expected to support pickling. If more builtin
* exceptions (e.g. AttributeError) start to be converted to rich
* exceptions with additional attributes, that's probably a better approach
* to pursue over adding special cases for particular stateful subclasses.
*
* Returns a borrowed reference to the new exception (if any), NULL if the
* existing exception was left in place.
*/
PyObject *
_PyErr_TrySetFromCause(const char *format, ...)
{
PyObject* msg_prefix;
PyObject *instance_args;
Py_ssize_t num_args, caught_type_size, base_exc_size;
va_list vargs;
int same_basic_size;

PyObject *exc = PyErr_GetRaisedException();
PyTypeObject *caught_type = Py_TYPE(exc);
/* Ensure type info indicates no extra state is stored at the C level
* and that the type can be reinstantiated using PyErr_Format
*/
caught_type_size = caught_type->tp_basicsize;
base_exc_size = _PyExc_BaseException.tp_basicsize;
same_basic_size = (
caught_type_size == base_exc_size ||
(_PyType_SUPPORTS_WEAKREFS(caught_type) &&
(caught_type_size == base_exc_size + (Py_ssize_t)sizeof(PyObject *))
)
);
if (caught_type->tp_init != (initproc)BaseException_init ||
caught_type->tp_new != BaseException_new ||
!same_basic_size ||
caught_type->tp_itemsize != _PyExc_BaseException.tp_itemsize) {
/* We can't be sure we can wrap this safely, since it may contain
* more state than just the exception type. Accordingly, we just
* leave it alone.
*/
PyErr_SetRaisedException(exc);
return NULL;
}

/* Check the args are empty or contain a single string */
instance_args = ((PyBaseExceptionObject *)exc)->args;
num_args = PyTuple_GET_SIZE(instance_args);
if (num_args > 1 ||
(num_args == 1 &&
!PyUnicode_CheckExact(PyTuple_GET_ITEM(instance_args, 0)))) {
/* More than 1 arg, or the one arg we do have isn't a string
*/
PyErr_SetRaisedException(exc);
return NULL;
}

/* Ensure the instance dict is also empty */
if (!_PyObject_IsInstanceDictEmpty(exc)) {
/* While we could potentially copy a non-empty instance dictionary
* to the replacement exception, for now we take the more
* conservative path of leaving exceptions with attributes set
* alone.
*/
PyErr_SetRaisedException(exc);
return NULL;
}

/* For exceptions that we can wrap safely, we chain the original
* exception to a new one of the exact same type with an
* error message that mentions the additional details and the
* original exception.
*
* It would be nice to wrap OSError and various other exception
* types as well, but that's quite a bit trickier due to the extra
* state potentially stored on OSError instances.
*/
va_start(vargs, format);
msg_prefix = PyUnicode_FromFormatV(format, vargs);
va_end(vargs);
if (msg_prefix == NULL) {
Py_DECREF(exc);
return NULL;
}

PyErr_Format((PyObject*)Py_TYPE(exc), "%U (%s: %S)",
msg_prefix, Py_TYPE(exc)->tp_name, exc);
Py_DECREF(msg_prefix);
PyObject *new_exc = PyErr_GetRaisedException();
PyException_SetCause(new_exc, exc);
PyErr_SetRaisedException(new_exc);
return new_exc;
}
35 changes: 21 additions & 14 deletions Python/codecs.c
Original file line number Diff line number Diff line change
Expand Up @@ -382,20 +382,27 @@ PyObject *PyCodec_StreamWriter(const char *encoding,
return codec_getstreamcodec(encoding, stream, errors, 3);
}

/* Helper that tries to ensure the reported exception chain indicates the
* codec that was invoked to trigger the failure without changing the type
* of the exception raised.
*/
static void
wrap_codec_error(const char *operation,
const char *encoding)
add_note_to_codec_error(const char *operation,
const char *encoding)
{
/* TrySetFromCause will replace the active exception with a suitably
* updated clone if it can, otherwise it will leave the original
* exception alone.
*/
_PyErr_TrySetFromCause("%s with '%s' codec failed",
operation, encoding);
PyObject *exc = PyErr_GetRaisedException();
if (exc == NULL) {
return;
}
PyObject *note = PyUnicode_FromFormat("%s with '%s' codec failed",
operation, encoding);
if (note == NULL) {
_PyErr_ChainExceptions1(exc);
return;
}
int res = _PyException_AddNote(exc, note);
Py_DECREF(note);
if (res < 0) {
_PyErr_ChainExceptions1(exc);
return;
}
PyErr_SetRaisedException(exc);
}

/* Encode an object (e.g. a Unicode object) using the given encoding
Expand All @@ -418,7 +425,7 @@ _PyCodec_EncodeInternal(PyObject *object,

result = PyObject_Call(encoder, args, NULL);
if (result == NULL) {
wrap_codec_error("encoding", encoding);
add_note_to_codec_error("encoding", encoding);
goto onError;
}

Expand Down Expand Up @@ -463,7 +470,7 @@ _PyCodec_DecodeInternal(PyObject *object,

result = PyObject_Call(decoder, args, NULL);
if (result == NULL) {
wrap_codec_error("decoding", encoding);
add_note_to_codec_error("decoding", encoding);
goto onError;
}
if (!PyTuple_Check(result) ||
Expand Down