From 7ef2e30c9d0d45472c3942ff9733f1d7dfc184ec Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Tue, 3 Jun 2025 20:47:13 +0000 Subject: [PATCH 1/2] gh-133136: Limit excess memory held by QSBR The free threading build uses QSBR to delay the freeing of dictionary keys and list arrays when the objects are accessed by multiple threads in order to allow concurrent reads to proceeed with holding the object lock. The requests are processed in batches to reduce execution overhead, but for large memory blocks this can lead to excess memory usage. Take into account the size of the memory block when deciding when to process QSBR requests. --- Include/internal/pycore_pymem.h | 2 +- Include/internal/pycore_qsbr.h | 4 ++ ...-06-03-21-06-22.gh-issue-133136.Usnvri.rst | 2 + Objects/codeobject.c | 2 +- Objects/dictobject.c | 4 +- Objects/listobject.c | 3 +- Objects/obmalloc.c | 39 +++++++++++++++---- 7 files changed, 44 insertions(+), 12 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-06-03-21-06-22.gh-issue-133136.Usnvri.rst diff --git a/Include/internal/pycore_pymem.h b/Include/internal/pycore_pymem.h index 02537bdfef8598..3e12084b82ab26 100644 --- a/Include/internal/pycore_pymem.h +++ b/Include/internal/pycore_pymem.h @@ -88,7 +88,7 @@ extern wchar_t *_PyMem_DefaultRawWcsdup(const wchar_t *str); extern int _PyMem_DebugEnabled(void); // Enqueue a pointer to be freed possibly after some delay. -extern void _PyMem_FreeDelayed(void *ptr); +extern void _PyMem_FreeDelayed(void *ptr, size_t size); // Enqueue an object to be freed possibly after some delay #ifdef Py_GIL_DISABLED diff --git a/Include/internal/pycore_qsbr.h b/Include/internal/pycore_qsbr.h index b835c3abaf5d0b..e839427ddf45db 100644 --- a/Include/internal/pycore_qsbr.h +++ b/Include/internal/pycore_qsbr.h @@ -51,6 +51,10 @@ struct _qsbr_thread_state { // Used to defer advancing write sequence a fixed number of times int deferrals; + // Estimate for the amount of memory that is held by this thread since + // the last non-deferred advance. + size_t memory_deferred; + // Is this thread state allocated? bool allocated; struct _qsbr_thread_state *freelist_next; diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-03-21-06-22.gh-issue-133136.Usnvri.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-03-21-06-22.gh-issue-133136.Usnvri.rst new file mode 100644 index 00000000000000..a9501c13c95b3a --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-03-21-06-22.gh-issue-133136.Usnvri.rst @@ -0,0 +1,2 @@ +Limit excess memory usage in the :term:`free threading` build when a +large dictionary or list is resized and accessed by multiple threads. diff --git a/Objects/codeobject.c b/Objects/codeobject.c index ee869d991d93cd..d0ce49d9d14ef6 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -3350,7 +3350,7 @@ create_tlbc_lock_held(PyCodeObject *co, Py_ssize_t idx) } memcpy(new_tlbc->entries, tlbc->entries, tlbc->size * sizeof(void *)); _Py_atomic_store_ptr_release(&co->co_tlbc, new_tlbc); - _PyMem_FreeDelayed(tlbc); + _PyMem_FreeDelayed(tlbc, tlbc->size * sizeof(void *)); tlbc = new_tlbc; } char *bc = PyMem_Calloc(1, _PyCode_NBYTES(co)); diff --git a/Objects/dictobject.c b/Objects/dictobject.c index fd8ccf56324207..6ff97cd67111a4 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -813,7 +813,7 @@ free_keys_object(PyDictKeysObject *keys, bool use_qsbr) { #ifdef Py_GIL_DISABLED if (use_qsbr) { - _PyMem_FreeDelayed(keys); + _PyMem_FreeDelayed(keys, _PyDict_KeysSize(keys)); return; } #endif @@ -858,7 +858,7 @@ free_values(PyDictValues *values, bool use_qsbr) assert(values->embedded == 0); #ifdef Py_GIL_DISABLED if (use_qsbr) { - _PyMem_FreeDelayed(values); + _PyMem_FreeDelayed(values, values_size_from_count(values->capacity)); return; } #endif diff --git a/Objects/listobject.c b/Objects/listobject.c index c5895645a2dd12..23d3472b6d4153 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -61,7 +61,8 @@ free_list_items(PyObject** items, bool use_qsbr) #ifdef Py_GIL_DISABLED _PyListArray *array = _Py_CONTAINER_OF(items, _PyListArray, ob_item); if (use_qsbr) { - _PyMem_FreeDelayed(array); + size_t size = sizeof(_PyListArray) + array->allocated * sizeof(PyObject *); + _PyMem_FreeDelayed(array, size); } else { PyMem_Free(array); diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index d3931aab623b70..5d6e65b1fffde3 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -1141,8 +1141,27 @@ free_work_item(uintptr_t ptr, delayed_dealloc_cb cb, void *state) } } +static int +should_advance_qsbr(_PyThreadStateImpl *tstate, size_t size) +{ + // If the deferred memory exceeds 1 MiB, we force an advance in the + // shared QSBR sequence number to limit excess memory usage. + static const size_t QSBR_DEFERRED_LIMIT = 1024 * 1024; + if (size > QSBR_DEFERRED_LIMIT) { + tstate->qsbr->memory_deferred = 0; + return 1; + } + + tstate->qsbr->memory_deferred += size; + if (tstate->qsbr->memory_deferred > QSBR_DEFERRED_LIMIT) { + tstate->qsbr->memory_deferred = 0; + return 1; + } + return 0; +} + static void -free_delayed(uintptr_t ptr) +free_delayed(uintptr_t ptr, size_t size) { #ifndef Py_GIL_DISABLED free_work_item(ptr, NULL, NULL); @@ -1200,23 +1219,29 @@ free_delayed(uintptr_t ptr) } assert(buf != NULL && buf->wr_idx < WORK_ITEMS_PER_CHUNK); - uint64_t seq = _Py_qsbr_deferred_advance(tstate->qsbr); + uint64_t seq; + int force_advance = should_advance_qsbr(tstate, size); + if (force_advance) { + seq = _Py_qsbr_advance(tstate->qsbr->shared); + } + else { + seq = _Py_qsbr_deferred_advance(tstate->qsbr); + } buf->array[buf->wr_idx].ptr = ptr; buf->array[buf->wr_idx].qsbr_goal = seq; buf->wr_idx++; - - if (buf->wr_idx == WORK_ITEMS_PER_CHUNK) { + if (buf->wr_idx == WORK_ITEMS_PER_CHUNK || force_advance) { _PyMem_ProcessDelayed((PyThreadState *)tstate); } #endif } void -_PyMem_FreeDelayed(void *ptr) +_PyMem_FreeDelayed(void *ptr, size_t size) { assert(!((uintptr_t)ptr & 0x01)); if (ptr != NULL) { - free_delayed((uintptr_t)ptr); + free_delayed((uintptr_t)ptr, size); } } @@ -1226,7 +1251,7 @@ _PyObject_XDecRefDelayed(PyObject *ptr) { assert(!((uintptr_t)ptr & 0x01)); if (ptr != NULL) { - free_delayed(((uintptr_t)ptr)|0x01); + free_delayed(((uintptr_t)ptr)|0x01, 64); } } #endif From ce9232b8ab26869b1d33ab851f82708ec79ef555 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Tue, 3 Jun 2025 21:36:02 +0000 Subject: [PATCH 2/2] Fix unused function warning --- Objects/obmalloc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index 5d6e65b1fffde3..5af2ed5a21afa9 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -1141,6 +1141,7 @@ free_work_item(uintptr_t ptr, delayed_dealloc_cb cb, void *state) } } +#ifdef Py_GIL_DISABLED static int should_advance_qsbr(_PyThreadStateImpl *tstate, size_t size) { @@ -1159,6 +1160,7 @@ should_advance_qsbr(_PyThreadStateImpl *tstate, size_t size) } return 0; } +#endif static void free_delayed(uintptr_t ptr, size_t size)