From 49a1eeefd83b74969c156cc8da962b5f7516dd26 Mon Sep 17 00:00:00 2001 From: Kirill Podoprigora Date: Sun, 24 Dec 2023 11:57:41 +0300 Subject: [PATCH] gh-111784: Fix two segfaults in the elementtree module (GH-113405) First fix resolve situation when pyexpat module (which contains expat_CAPI capsule) deallocates before _elementtree, so we need to hold a strong reference to pyexpat module to. Second fix resolve situation when module state is deallocated before deallocation of XMLParser instances, which uses module state to clear some stuff. (cherry picked from commit 894f0e573d9eb49cd5864c44328f10a731852dab) Co-authored-by: Kirill Podoprigora --- ...023-12-23-13-10-42.gh-issue-111784.Nb4L1j.rst | 5 +++++ Modules/_elementtree.c | 16 ++++++++++++++-- 2 files changed, 19 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2023-12-23-13-10-42.gh-issue-111784.Nb4L1j.rst diff --git a/Misc/NEWS.d/next/Library/2023-12-23-13-10-42.gh-issue-111784.Nb4L1j.rst b/Misc/NEWS.d/next/Library/2023-12-23-13-10-42.gh-issue-111784.Nb4L1j.rst new file mode 100644 index 00000000000000..51ac0752cfae84 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-12-23-13-10-42.gh-issue-111784.Nb4L1j.rst @@ -0,0 +1,5 @@ +Fix segfaults in the ``_elementtree`` module. +Fix first segfault during deallocation of ``_elementtree.XMLParser`` instances by keeping strong reference +to ``pyexpat`` module in module state for capsule lifetime. +Fix second segfault which happens in the same deallocation process by keeping strong reference +to ``_elementtree`` module in ``XMLParser`` structure for ``_elementtree`` module lifetime. diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c index 6244fcc2064c33..620de8bb4c69a5 100644 --- a/Modules/_elementtree.c +++ b/Modules/_elementtree.c @@ -93,6 +93,7 @@ typedef struct { PyTypeObject *TreeBuilder_Type; PyTypeObject *XMLParser_Type; + PyObject *expat_capsule; struct PyExpat_CAPI *expat_capi; } elementtreestate; @@ -150,6 +151,7 @@ elementtree_clear(PyObject *m) Py_CLEAR(st->ElementIter_Type); Py_CLEAR(st->TreeBuilder_Type); Py_CLEAR(st->XMLParser_Type); + Py_CLEAR(st->expat_capsule); st->expat_capi = NULL; return 0; @@ -170,6 +172,7 @@ elementtree_traverse(PyObject *m, visitproc visit, void *arg) Py_VISIT(st->ElementIter_Type); Py_VISIT(st->TreeBuilder_Type); Py_VISIT(st->XMLParser_Type); + Py_VISIT(st->expat_capsule); return 0; } @@ -3065,6 +3068,7 @@ typedef struct { PyObject *handle_close; elementtreestate *state; + PyObject *elementtree_module; } XMLParserObject; /* helpers */ @@ -3611,7 +3615,11 @@ xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds) self->handle_start = self->handle_data = self->handle_end = NULL; self->handle_comment = self->handle_pi = self->handle_close = NULL; self->handle_doctype = NULL; - self->state = get_elementtree_state_by_type(type); + self->elementtree_module = PyType_GetModuleByDef(type, &elementtreemodule); + assert(self->elementtree_module != NULL); + Py_INCREF(self->elementtree_module); + // See gh-111784 for explanation why is reference to module needed here. + self->state = get_elementtree_state(self->elementtree_module); } return (PyObject *)self; } @@ -3788,6 +3796,7 @@ xmlparser_gc_clear(XMLParserObject *self) EXPAT(st, ParserFree)(parser); } + Py_CLEAR(self->elementtree_module); Py_CLEAR(self->handle_close); Py_CLEAR(self->handle_pi); Py_CLEAR(self->handle_comment); @@ -4347,7 +4356,10 @@ module_exec(PyObject *m) goto error; /* link against pyexpat */ - st->expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0); + if (!(st->expat_capsule = _PyImport_GetModuleAttrString("pyexpat", "expat_CAPI"))) + goto error; + if (!(st->expat_capi = PyCapsule_GetPointer(st->expat_capsule, PyExpat_CAPSULE_NAME))) + goto error; if (st->expat_capi) { /* check that it's usable */ if (strcmp(st->expat_capi->magic, PyExpat_CAPI_MAGIC) != 0 ||