From 15bd961fffbbe61f8f9545b5a004831a35703632 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Mon, 3 Mar 2025 20:34:16 +0000 Subject: [PATCH 1/2] gh-130794: Process interpreter QSBR queue in _PyMem_AbandonDelayed. This avoids a case where the interpreter's queue of memory to be freed could grow rapidly if there are many short lived threads. --- ...-03-03-20-33-44.gh-issue-130794.LwtGQc.rst | 2 ++ Objects/obmalloc.c | 31 +++++++++++++------ Python/qsbr.c | 1 + 3 files changed, 25 insertions(+), 9 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-03-03-20-33-44.gh-issue-130794.LwtGQc.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-03-03-20-33-44.gh-issue-130794.LwtGQc.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-03-03-20-33-44.gh-issue-130794.LwtGQc.rst new file mode 100644 index 00000000000000..2dfb53f92d232f --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-03-03-20-33-44.gh-issue-130794.LwtGQc.rst @@ -0,0 +1,2 @@ +Fix memory leak in the :term:`free threaded ` build when +resizing a shared list or dictionary from multiple short-lived threads. diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index dd6be1490af016..a02fac3f8f8f85 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -1303,6 +1303,18 @@ static void process_interp_queue(struct _Py_mem_interp_free_queue *queue, struct _qsbr_thread_state *qsbr, delayed_dealloc_cb cb, void *state) +{ + assert(PyMutex_IsLocked(&queue->mutex)); + process_queue(&queue->head, qsbr, false, cb, state); + + int more_work = !llist_empty(&queue->head); + _Py_atomic_store_int_relaxed(&queue->has_work, more_work); +} + +static void +maybe_process_interp_queue(struct _Py_mem_interp_free_queue *queue, + struct _qsbr_thread_state *qsbr, delayed_dealloc_cb cb, + void *state) { if (!_Py_atomic_load_int_relaxed(&queue->has_work)) { return; @@ -1310,11 +1322,7 @@ process_interp_queue(struct _Py_mem_interp_free_queue *queue, // Try to acquire the lock, but don't block if it's already held. if (_PyMutex_LockTimed(&queue->mutex, 0, 0) == PY_LOCK_ACQUIRED) { - process_queue(&queue->head, qsbr, false, cb, state); - - int more_work = !llist_empty(&queue->head); - _Py_atomic_store_int_relaxed(&queue->has_work, more_work); - + process_interp_queue(queue, qsbr, cb, state); PyMutex_Unlock(&queue->mutex); } } @@ -1329,7 +1337,7 @@ _PyMem_ProcessDelayed(PyThreadState *tstate) process_queue(&tstate_impl->mem_free_queue, tstate_impl->qsbr, true, NULL, NULL); // Process shared interpreter work - process_interp_queue(&interp->mem_free_queue, tstate_impl->qsbr, NULL, NULL); + maybe_process_interp_queue(&interp->mem_free_queue, tstate_impl->qsbr, NULL, NULL); } void @@ -1342,7 +1350,7 @@ _PyMem_ProcessDelayedNoDealloc(PyThreadState *tstate, delayed_dealloc_cb cb, voi process_queue(&tstate_impl->mem_free_queue, tstate_impl->qsbr, true, cb, state); // Process shared interpreter work - process_interp_queue(&interp->mem_free_queue, tstate_impl->qsbr, cb, state); + maybe_process_interp_queue(&interp->mem_free_queue, tstate_impl->qsbr, cb, state); } void @@ -1364,10 +1372,15 @@ _PyMem_AbandonDelayed(PyThreadState *tstate) return; } - // Merge the thread's work queue into the interpreter's work queue. PyMutex_Lock(&interp->mem_free_queue.mutex); + + // Merge the thread's work queue into the interpreter's work queue. llist_concat(&interp->mem_free_queue.head, queue); - _Py_atomic_store_int_relaxed(&interp->mem_free_queue.has_work, 1); + + // Process the merged queue now (see gh-130794). + _PyThreadStateImpl *this_tstate = (_PyThreadStateImpl *)tstate; + process_interp_queue(&interp->mem_free_queue, this_tstate->qsbr, NULL, NULL); + PyMutex_Unlock(&interp->mem_free_queue.mutex); assert(llist_empty(queue)); // the thread's queue is now empty diff --git a/Python/qsbr.c b/Python/qsbr.c index 0df1285cc8e063..386a8451dc40c0 100644 --- a/Python/qsbr.c +++ b/Python/qsbr.c @@ -161,6 +161,7 @@ bool _Py_qsbr_poll(struct _qsbr_thread_state *qsbr, uint64_t goal) { assert(_Py_atomic_load_int_relaxed(&_PyThreadState_GET()->state) == _Py_THREAD_ATTACHED); + assert(((_PyThreadStateImpl *)_PyThreadState_GET())->qsbr == qsbr); if (_Py_qbsr_goal_reached(qsbr, goal)) { return true; From 4fa53c733bcbe1dc2f57476d336624f34b0f28b6 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Mon, 3 Mar 2025 20:45:24 +0000 Subject: [PATCH 2/2] Use the active thread's QSBR state --- Objects/obmalloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index a02fac3f8f8f85..33fca3abfb3509 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -1378,7 +1378,7 @@ _PyMem_AbandonDelayed(PyThreadState *tstate) llist_concat(&interp->mem_free_queue.head, queue); // Process the merged queue now (see gh-130794). - _PyThreadStateImpl *this_tstate = (_PyThreadStateImpl *)tstate; + _PyThreadStateImpl *this_tstate = (_PyThreadStateImpl *)_PyThreadState_GET(); process_interp_queue(&interp->mem_free_queue, this_tstate->qsbr, NULL, NULL); PyMutex_Unlock(&interp->mem_free_queue.mutex);