diff --git a/Include/internal/pycore_pymem.h b/Include/internal/pycore_pymem.h index 02537bdfef8598..3e12084b82ab26 100644 --- a/Include/internal/pycore_pymem.h +++ b/Include/internal/pycore_pymem.h @@ -88,7 +88,7 @@ extern wchar_t *_PyMem_DefaultRawWcsdup(const wchar_t *str); extern int _PyMem_DebugEnabled(void); // Enqueue a pointer to be freed possibly after some delay. -extern void _PyMem_FreeDelayed(void *ptr); +extern void _PyMem_FreeDelayed(void *ptr, size_t size); // Enqueue an object to be freed possibly after some delay #ifdef Py_GIL_DISABLED diff --git a/Include/internal/pycore_qsbr.h b/Include/internal/pycore_qsbr.h index b835c3abaf5d0b..e839427ddf45db 100644 --- a/Include/internal/pycore_qsbr.h +++ b/Include/internal/pycore_qsbr.h @@ -51,6 +51,10 @@ struct _qsbr_thread_state { // Used to defer advancing write sequence a fixed number of times int deferrals; + // Estimate for the amount of memory that is held by this thread since + // the last non-deferred advance. + size_t memory_deferred; + // Is this thread state allocated? bool allocated; struct _qsbr_thread_state *freelist_next; diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-03-21-06-22.gh-issue-133136.Usnvri.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-03-21-06-22.gh-issue-133136.Usnvri.rst new file mode 100644 index 00000000000000..a9501c13c95b3a --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-03-21-06-22.gh-issue-133136.Usnvri.rst @@ -0,0 +1,2 @@ +Limit excess memory usage in the :term:`free threading` build when a +large dictionary or list is resized and accessed by multiple threads. diff --git a/Objects/codeobject.c b/Objects/codeobject.c index ee869d991d93cd..d0ce49d9d14ef6 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -3350,7 +3350,7 @@ create_tlbc_lock_held(PyCodeObject *co, Py_ssize_t idx) } memcpy(new_tlbc->entries, tlbc->entries, tlbc->size * sizeof(void *)); _Py_atomic_store_ptr_release(&co->co_tlbc, new_tlbc); - _PyMem_FreeDelayed(tlbc); + _PyMem_FreeDelayed(tlbc, tlbc->size * sizeof(void *)); tlbc = new_tlbc; } char *bc = PyMem_Calloc(1, _PyCode_NBYTES(co)); diff --git a/Objects/dictobject.c b/Objects/dictobject.c index fd8ccf56324207..6ff97cd67111a4 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -813,7 +813,7 @@ free_keys_object(PyDictKeysObject *keys, bool use_qsbr) { #ifdef Py_GIL_DISABLED if (use_qsbr) { - _PyMem_FreeDelayed(keys); + _PyMem_FreeDelayed(keys, _PyDict_KeysSize(keys)); return; } #endif @@ -858,7 +858,7 @@ free_values(PyDictValues *values, bool use_qsbr) assert(values->embedded == 0); #ifdef Py_GIL_DISABLED if (use_qsbr) { - _PyMem_FreeDelayed(values); + _PyMem_FreeDelayed(values, values_size_from_count(values->capacity)); return; } #endif diff --git a/Objects/listobject.c b/Objects/listobject.c index c5895645a2dd12..23d3472b6d4153 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -61,7 +61,8 @@ free_list_items(PyObject** items, bool use_qsbr) #ifdef Py_GIL_DISABLED _PyListArray *array = _Py_CONTAINER_OF(items, _PyListArray, ob_item); if (use_qsbr) { - _PyMem_FreeDelayed(array); + size_t size = sizeof(_PyListArray) + array->allocated * sizeof(PyObject *); + _PyMem_FreeDelayed(array, size); } else { PyMem_Free(array); diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index d3931aab623b70..5af2ed5a21afa9 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -1141,8 +1141,29 @@ free_work_item(uintptr_t ptr, delayed_dealloc_cb cb, void *state) } } +#ifdef Py_GIL_DISABLED +static int +should_advance_qsbr(_PyThreadStateImpl *tstate, size_t size) +{ + // If the deferred memory exceeds 1 MiB, we force an advance in the + // shared QSBR sequence number to limit excess memory usage. + static const size_t QSBR_DEFERRED_LIMIT = 1024 * 1024; + if (size > QSBR_DEFERRED_LIMIT) { + tstate->qsbr->memory_deferred = 0; + return 1; + } + + tstate->qsbr->memory_deferred += size; + if (tstate->qsbr->memory_deferred > QSBR_DEFERRED_LIMIT) { + tstate->qsbr->memory_deferred = 0; + return 1; + } + return 0; +} +#endif + static void -free_delayed(uintptr_t ptr) +free_delayed(uintptr_t ptr, size_t size) { #ifndef Py_GIL_DISABLED free_work_item(ptr, NULL, NULL); @@ -1200,23 +1221,29 @@ free_delayed(uintptr_t ptr) } assert(buf != NULL && buf->wr_idx < WORK_ITEMS_PER_CHUNK); - uint64_t seq = _Py_qsbr_deferred_advance(tstate->qsbr); + uint64_t seq; + int force_advance = should_advance_qsbr(tstate, size); + if (force_advance) { + seq = _Py_qsbr_advance(tstate->qsbr->shared); + } + else { + seq = _Py_qsbr_deferred_advance(tstate->qsbr); + } buf->array[buf->wr_idx].ptr = ptr; buf->array[buf->wr_idx].qsbr_goal = seq; buf->wr_idx++; - - if (buf->wr_idx == WORK_ITEMS_PER_CHUNK) { + if (buf->wr_idx == WORK_ITEMS_PER_CHUNK || force_advance) { _PyMem_ProcessDelayed((PyThreadState *)tstate); } #endif } void -_PyMem_FreeDelayed(void *ptr) +_PyMem_FreeDelayed(void *ptr, size_t size) { assert(!((uintptr_t)ptr & 0x01)); if (ptr != NULL) { - free_delayed((uintptr_t)ptr); + free_delayed((uintptr_t)ptr, size); } } @@ -1226,7 +1253,7 @@ _PyObject_XDecRefDelayed(PyObject *ptr) { assert(!((uintptr_t)ptr & 0x01)); if (ptr != NULL) { - free_delayed(((uintptr_t)ptr)|0x01); + free_delayed(((uintptr_t)ptr)|0x01, 64); } } #endif