From 762cb259a0f581d350a90c5d1771a509969b0657 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 9 Oct 2024 11:32:16 -0600 Subject: [PATCH 1/2] gh-116510: Fix a Crash Due to Shared Immortal Interned Strings (gh-124865) Fix a crash caused by immortal interned strings being shared between sub-interpreters that use basic single-phase init. In that case, the string can be used by an interpreter that outlives the interpreter that created and interned it. For interpreters that share obmalloc state, also share the interned dict with the main interpreter. This is an un-revert of gh-124646 that then addresses the Py_TRACE_REFS failures identified by gh-124785. (cherry picked from commit f2cb39947093feda3ff85b8dc820922cc5e5f954) Co-authored-by: Eric Snow --- ...-09-26-18-21-06.gh-issue-116510.FacUWO.rst | 5 ++ Objects/object.c | 36 ++++++++++++ Objects/unicodeobject.c | 56 +++++++++++++++++-- 3 files changed, 91 insertions(+), 6 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2024-09-26-18-21-06.gh-issue-116510.FacUWO.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-09-26-18-21-06.gh-issue-116510.FacUWO.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-09-26-18-21-06.gh-issue-116510.FacUWO.rst new file mode 100644 index 00000000000000..e3741321006548 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-09-26-18-21-06.gh-issue-116510.FacUWO.rst @@ -0,0 +1,5 @@ +Fix a crash caused by immortal interned strings being shared between +sub-interpreters that use basic single-phase init. In that case, the string +can be used by an interpreter that outlives the interpreter that created and +interned it. For interpreters that share obmalloc state, also share the +interned dict with the main interpreter. diff --git a/Objects/object.c b/Objects/object.c index 6b2e0aeaab9903..648ea1f75d08fe 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -2218,6 +2218,42 @@ _Py_NewReferenceNoTotal(PyObject *op) #ifdef Py_TRACE_REFS +/* Make sure the ref is associated with the right interpreter. + * This only needs special attention for heap-allocated objects + * that have been immortalized, and only when the object might + * outlive the interpreter where it was created. That means the + * object was necessarily created using a global allocator + * (i.e. from the main interpreter). Thus in that specific case + * we move the object over to the main interpreter's refchain. + * + * This was added for the sake of the immortal interned strings, + * where legacy subinterpreters share the main interpreter's + * interned dict (and allocator), and therefore the strings can + * outlive the subinterpreter. + * + * It may make sense to fold this into _Py_SetImmortalUntracked(), + * but that requires further investigation. In the meantime, it is + * up to the caller to know if this is needed. There should be + * very few cases. + */ +void +_Py_NormalizeImmortalReference(PyObject *op) +{ + assert(_Py_IsImmortal(op)); + PyInterpreterState *interp = _PyInterpreterState_GET(); + if (!_PyRefchain_IsTraced(interp, op)) { + return; + } + PyInterpreterState *main_interp = _PyInterpreterState_Main(); + if (interp != main_interp + && interp->feature_flags & Py_RTFLAGS_USE_MAIN_OBMALLOC) + { + assert(!_PyRefchain_IsTraced(main_interp, op)); + _PyRefchain_Remove(interp, op); + _PyRefchain_Trace(main_interp, op); + } +} + void _Py_ForgetReference(PyObject *op) { diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 7bd4b221c83cf7..642ffd78d311df 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -287,13 +287,37 @@ hashtable_unicode_compare(const void *key1, const void *key2) } } +/* Return true if this interpreter should share the main interpreter's + intern_dict. That's important for interpreters which load basic + single-phase init extension modules (m_size == -1). There could be interned + immortal strings that are shared between interpreters, due to the + PyDict_Update(mdict, m_copy) call in import_find_extension(). + + It's not safe to deallocate those strings until all interpreters that + potentially use them are freed. By storing them in the main interpreter, we + ensure they get freed after all other interpreters are freed. +*/ +static bool +has_shared_intern_dict(PyInterpreterState *interp) +{ + PyInterpreterState *main_interp = _PyInterpreterState_Main(); + return interp != main_interp && interp->feature_flags & Py_RTFLAGS_USE_MAIN_OBMALLOC; +} + static int init_interned_dict(PyInterpreterState *interp) { assert(get_interned_dict(interp) == NULL); - PyObject *interned = interned = PyDict_New(); - if (interned == NULL) { - return -1; + PyObject *interned; + if (has_shared_intern_dict(interp)) { + interned = get_interned_dict(_PyInterpreterState_Main()); + Py_INCREF(interned); + } + else { + interned = PyDict_New(); + if (interned == NULL) { + return -1; + } } _Py_INTERP_CACHED_OBJECT(interp, interned_strings) = interned; return 0; @@ -304,7 +328,10 @@ clear_interned_dict(PyInterpreterState *interp) { PyObject *interned = get_interned_dict(interp); if (interned != NULL) { - PyDict_Clear(interned); + if (!has_shared_intern_dict(interp)) { + // only clear if the dict belongs to this interpreter + PyDict_Clear(interned); + } Py_DECREF(interned); _Py_INTERP_CACHED_OBJECT(interp, interned_strings) = NULL; } @@ -14939,6 +14966,10 @@ _PyUnicode_InternStatic(PyInterpreterState *interp, PyObject **p) assert(*p); } +#ifdef Py_TRACE_REFS +extern void _Py_NormalizeImmortalReference(PyObject *); +#endif + static void immortalize_interned(PyObject *s) { @@ -14954,6 +14985,10 @@ immortalize_interned(PyObject *s) #endif _PyUnicode_STATE(s).interned = SSTATE_INTERNED_IMMORTAL; _Py_SetImmortal(s); +#ifdef Py_TRACE_REFS + /* Make sure the ref is associated with the right interpreter. */ + _Py_NormalizeImmortalReference(s); +#endif } static /* non-null */ PyObject* @@ -15140,6 +15175,13 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp) } assert(PyDict_CheckExact(interned)); + if (has_shared_intern_dict(interp)) { + // the dict doesn't belong to this interpreter, skip the debug + // checks on it and just clear the pointer to it + clear_interned_dict(interp); + return; + } + #ifdef INTERNED_STATS fprintf(stderr, "releasing %zd interned strings\n", PyDict_GET_SIZE(interned)); @@ -15658,8 +15700,10 @@ _PyUnicode_Fini(PyInterpreterState *interp) { struct _Py_unicode_state *state = &interp->unicode; - // _PyUnicode_ClearInterned() must be called before _PyUnicode_Fini() - assert(get_interned_dict(interp) == NULL); + if (!has_shared_intern_dict(interp)) { + // _PyUnicode_ClearInterned() must be called before _PyUnicode_Fini() + assert(get_interned_dict(interp) == NULL); + } _PyUnicode_FiniEncodings(&state->fs_codec); From cdb58a74b70348f4facb3f0877998d49b16f3fc0 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 23 Oct 2024 10:10:06 -0600 Subject: [PATCH 2/2] [3.12] gh-125286: Share the Main Refchain With Legacy Interpreters (gh-125709) They used to be shared, before 3.12. Returning to sharing them resolves a failure on Py_TRACE_REFS builds. --- Doc/library/sys.rst | 29 +++++++++++++ Doc/using/configure.rst | 2 +- Doc/whatsnew/3.12.rst | 11 +++++ Include/internal/pycore_object_state.h | 8 +++- Include/internal/pycore_runtime_init.h | 7 ---- Objects/object.c | 58 ++++++++------------------ Objects/unicodeobject.c | 8 ---- Python/pylifecycle.c | 8 ++++ Python/pystate.c | 4 +- 9 files changed, 77 insertions(+), 58 deletions(-) diff --git a/Doc/library/sys.rst b/Doc/library/sys.rst index 40d0ef80a3fe43..222cf0e5792ff6 100644 --- a/Doc/library/sys.rst +++ b/Doc/library/sys.rst @@ -907,6 +907,35 @@ always available. It is not guaranteed to exist in all implementations of Python. +.. function:: getobjects(limit[, type]) + + This function only exists if CPython was built using the + specialized configure option :option:`--with-trace-refs`. + It is intended only for debugging garbage-collection issues. + + Return a list of up to *limit* dynamically allocated Python objects. + If *type* is given, only objects of that exact type (not subtypes) + are included. + + Objects from the list are not safe to use. + Specifically, the result will include objects from all interpreters that + share their object allocator state (that is, ones created with + :c:member:`PyInterpreterConfig.use_main_obmalloc` set to 1 + or using :c:func:`Py_NewInterpreter`, and the + :ref:`main interpreter `). + Mixing objects from different interpreters may lead to crashes + or other unexpected behavior. + + .. impl-detail:: + + This function should be used for specialized purposes only. + It is not guaranteed to exist in all implementations of Python. + + .. versionchanged:: next + + The result may include objects from other interpreters. + + .. function:: getprofile() .. index:: diff --git a/Doc/using/configure.rst b/Doc/using/configure.rst index 51af4e3b7ad7a8..fed1d1e2c75940 100644 --- a/Doc/using/configure.rst +++ b/Doc/using/configure.rst @@ -459,7 +459,7 @@ Debug options Effects: * Define the ``Py_TRACE_REFS`` macro. - * Add :func:`!sys.getobjects` function. + * Add :func:`sys.getobjects` function. * Add :envvar:`PYTHONDUMPREFS` environment variable. This build is not ABI compatible with release build (default build) or debug diff --git a/Doc/whatsnew/3.12.rst b/Doc/whatsnew/3.12.rst index 5c36e2d4bcfb49..0ba1f55f731a2a 100644 --- a/Doc/whatsnew/3.12.rst +++ b/Doc/whatsnew/3.12.rst @@ -2294,3 +2294,14 @@ email check if the *strict* paramater is available. (Contributed by Thomas Dwyer and Victor Stinner for :gh:`102988` to improve the CVE-2023-27043 fix.) + + +Notable changes in 3.12.8 +========================= + +sys +--- + +* The previously undocumented special function :func:`sys.getobjects`, + which only exists in specialized builds of Python, may now return objects + from other interpreters than the one it's called in. diff --git a/Include/internal/pycore_object_state.h b/Include/internal/pycore_object_state.h index 65feb5af969f8b..6e07b1a01b0e34 100644 --- a/Include/internal/pycore_object_state.h +++ b/Include/internal/pycore_object_state.h @@ -24,7 +24,13 @@ struct _py_object_state { * together via the _ob_prev and _ob_next members of a PyObject, which * exist only in a Py_TRACE_REFS build. */ - PyObject refchain; + PyObject *refchain; + /* In most cases, refchain points to _refchain_obj. + * In sub-interpreters that share objmalloc state with the main interp, + * refchain points to the main interpreter's _refchain_obj, and their own + * _refchain_obj is unused. + */ + PyObject _refchain_obj; #endif int _not_used; }; diff --git a/Include/internal/pycore_runtime_init.h b/Include/internal/pycore_runtime_init.h index e5f9e17efff24b..ad90ea680a52c3 100644 --- a/Include/internal/pycore_runtime_init.h +++ b/Include/internal/pycore_runtime_init.h @@ -132,15 +132,8 @@ extern PyTypeObject _PyExc_MemoryError; .context_ver = 1, \ } -#ifdef Py_TRACE_REFS -# define _py_object_state_INIT(INTERP) \ - { \ - .refchain = {&INTERP.object_state.refchain, &INTERP.object_state.refchain}, \ - } -#else # define _py_object_state_INIT(INTERP) \ { 0 } -#endif // global objects diff --git a/Objects/object.c b/Objects/object.c index 648ea1f75d08fe..706719116da3e2 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -159,11 +159,27 @@ _PyDebug_PrintTotalRefs(void) { #ifdef Py_TRACE_REFS -#define REFCHAIN(interp) &interp->object_state.refchain +#define REFCHAIN(interp) interp->object_state.refchain + +static inline int +has_own_refchain(PyInterpreterState *interp) +{ + if (interp->feature_flags & Py_RTFLAGS_USE_MAIN_OBMALLOC) { + return (_Py_IsMainInterpreter(interp) + || _PyInterpreterState_Main() == NULL); + } + return 1; +} static inline void init_refchain(PyInterpreterState *interp) { + if (!has_own_refchain(interp)) { + // Legacy subinterpreters share a refchain with the main interpreter. + REFCHAIN(interp) = REFCHAIN(_PyInterpreterState_Main()); + return; + } + REFCHAIN(interp) = &interp->object_state._refchain_obj; PyObject *refchain = REFCHAIN(interp); refchain->_ob_prev = refchain; refchain->_ob_next = refchain; @@ -2010,9 +2026,7 @@ void _PyObject_InitState(PyInterpreterState *interp) { #ifdef Py_TRACE_REFS - if (!_Py_IsMainInterpreter(interp)) { - init_refchain(interp); - } + init_refchain(interp); #endif } @@ -2218,42 +2232,6 @@ _Py_NewReferenceNoTotal(PyObject *op) #ifdef Py_TRACE_REFS -/* Make sure the ref is associated with the right interpreter. - * This only needs special attention for heap-allocated objects - * that have been immortalized, and only when the object might - * outlive the interpreter where it was created. That means the - * object was necessarily created using a global allocator - * (i.e. from the main interpreter). Thus in that specific case - * we move the object over to the main interpreter's refchain. - * - * This was added for the sake of the immortal interned strings, - * where legacy subinterpreters share the main interpreter's - * interned dict (and allocator), and therefore the strings can - * outlive the subinterpreter. - * - * It may make sense to fold this into _Py_SetImmortalUntracked(), - * but that requires further investigation. In the meantime, it is - * up to the caller to know if this is needed. There should be - * very few cases. - */ -void -_Py_NormalizeImmortalReference(PyObject *op) -{ - assert(_Py_IsImmortal(op)); - PyInterpreterState *interp = _PyInterpreterState_GET(); - if (!_PyRefchain_IsTraced(interp, op)) { - return; - } - PyInterpreterState *main_interp = _PyInterpreterState_Main(); - if (interp != main_interp - && interp->feature_flags & Py_RTFLAGS_USE_MAIN_OBMALLOC) - { - assert(!_PyRefchain_IsTraced(main_interp, op)); - _PyRefchain_Remove(interp, op); - _PyRefchain_Trace(main_interp, op); - } -} - void _Py_ForgetReference(PyObject *op) { diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 642ffd78d311df..8112c740c3c4ca 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -14966,10 +14966,6 @@ _PyUnicode_InternStatic(PyInterpreterState *interp, PyObject **p) assert(*p); } -#ifdef Py_TRACE_REFS -extern void _Py_NormalizeImmortalReference(PyObject *); -#endif - static void immortalize_interned(PyObject *s) { @@ -14985,10 +14981,6 @@ immortalize_interned(PyObject *s) #endif _PyUnicode_STATE(s).interned = SSTATE_INTERNED_IMMORTAL; _Py_SetImmortal(s); -#ifdef Py_TRACE_REFS - /* Make sure the ref is associated with the right interpreter. */ - _Py_NormalizeImmortalReference(s); -#endif } static /* non-null */ PyObject* diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 6d580c6d4892fd..e9c1a0d72d6d4d 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -650,6 +650,10 @@ pycore_create_interpreter(_PyRuntimeState *runtime, return status; } + // This could be done in init_interpreter() (in pystate.c) if it + // didn't depend on interp->feature_flags being set already. + _PyObject_InitState(interp); + PyThreadState *tstate = _PyThreadState_New(interp); if (tstate == NULL) { return _PyStatus_ERR("can't make first thread"); @@ -2103,6 +2107,10 @@ new_interpreter(PyThreadState **tstate_p, const PyInterpreterConfig *config) goto error; } + // This could be done in init_interpreter() (in pystate.c) if it + // didn't depend on interp->feature_flags being set already. + _PyObject_InitState(interp); + status = init_interp_create_gil(tstate, config->gil); if (_PyStatus_EXCEPTION(status)) { goto error; diff --git a/Python/pystate.c b/Python/pystate.c index d0651fbd592f43..ae86fd1c0fcb3d 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -686,7 +686,9 @@ init_interpreter(PyInterpreterState *interp, _obmalloc_pools_INIT(interp->obmalloc.pools); memcpy(&interp->obmalloc.pools.used, temp, sizeof(temp)); } - _PyObject_InitState(interp); + + // We would call _PyObject_InitState() at this point + // if interp->feature_flags were alredy set. _PyEval_InitState(interp, pending_lock); _PyGC_InitState(&interp->gc);