diff --git a/Include/internal/pycore_pymem.h b/Include/internal/pycore_pymem.h index 6b5113714dbeb2..56832960af6a14 100644 --- a/Include/internal/pycore_pymem.h +++ b/Include/internal/pycore_pymem.h @@ -42,6 +42,11 @@ struct _pymem_allocators { debug_alloc_api_t obj; } debug; PyObjectArenaAllocator obj_arena; + unsigned int num_gils; + struct { + PyMemAllocatorEx mem; + PyMemAllocatorEx obj; + } wrapped_with_lock; }; diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index 7d552ff57c8f1e..432c804559c9a1 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -1,6 +1,7 @@ /* Python's malloc wrappers (see pymem.h) */ #include "Python.h" +#include "pycore_ceval.h" // _PyEval_AcquireLock() #include "pycore_code.h" // stats #include "pycore_object.h" // _PyDebugAllocatorStats() definition #include "pycore_obmalloc.h" @@ -210,6 +211,8 @@ _PyMem_ArenaFree(void *Py_UNUSED(ctx), void *ptr, #define _PyObject (_PyRuntime.allocators.standard.obj) #define _PyMem_Debug (_PyRuntime.allocators.debug) #define _PyObject_Arena (_PyRuntime.allocators.obj_arena) +#define _PyMem_Wrapped (_PyRuntime.allocators.wrapped_with_lock.mem) +#define _PyObject_Wrapped (_PyRuntime.allocators.wrapped_with_lock.obj) /***************************/ @@ -531,14 +534,32 @@ PyMem_SetupDebugHooks(void) PyThread_release_lock(ALLOCATORS_MUTEX); } +static int has_locking_wrapper(PyMemAllocatorEx *allocator); + static void get_allocator_unlocked(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator) { switch(domain) { - case PYMEM_DOMAIN_RAW: *allocator = _PyMem_Raw; break; - case PYMEM_DOMAIN_MEM: *allocator = _PyMem; break; - case PYMEM_DOMAIN_OBJ: *allocator = _PyObject; break; + case PYMEM_DOMAIN_RAW: + *allocator = _PyMem_Raw; + break; + case PYMEM_DOMAIN_MEM: + if (has_locking_wrapper(&_PyMem)) { + *allocator = *(PyMemAllocatorEx *)_PyMem.ctx; + } + else { + *allocator = _PyMem; + } + break; + case PYMEM_DOMAIN_OBJ: + if (has_locking_wrapper(&_PyObject)) { + *allocator = *(PyMemAllocatorEx *)_PyObject.ctx; + } + else { + *allocator = _PyObject; + } + break; default: /* unknown domain: set all attributes to NULL */ allocator->ctx = NULL; @@ -549,15 +570,28 @@ get_allocator_unlocked(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator) } } +static void maybe_add_locking_wrapper( + PyMemAllocatorDomain, PyMemAllocatorEx *, PyMemAllocatorEx *); + static void set_allocator_unlocked(PyMemAllocatorDomain domain, PyMemAllocatorEx *allocator) { switch(domain) { - case PYMEM_DOMAIN_RAW: _PyMem_Raw = *allocator; break; - case PYMEM_DOMAIN_MEM: _PyMem = *allocator; break; - case PYMEM_DOMAIN_OBJ: _PyObject = *allocator; break; - /* ignore unknown domain */ + case PYMEM_DOMAIN_RAW: + _PyMem_Raw = *allocator; + break; + case PYMEM_DOMAIN_MEM: + _PyMem = *allocator; + maybe_add_locking_wrapper(domain, &_PyMem, &_PyMem_Wrapped); + break; + case PYMEM_DOMAIN_OBJ: + _PyObject = *allocator; + maybe_add_locking_wrapper(domain, &_PyObject, &_PyObject_Wrapped); + break; + default: + /* ignore unknown domain */ + return; } } @@ -628,6 +662,192 @@ PyObject_SetArenaAllocator(PyObjectArenaAllocator *allocator) * around these uses of the runtime-global allocators state. */ +/************************************/ +/* locking around custom allocators */ +/************************************/ + +static inline int +should_lock(PyInterpreterState *interp) +{ + return interp->ceval.gil != _PyInterpreterState_Main()->ceval.gil; +} + +static PyThreadState * +get_tstate_for_main_gil(void) +{ + PyThreadState *tstate = _PyRuntimeState_GetFinalizing(&_PyRuntime); + if (tstate == NULL) { + /* To use its GIL, we only need the pointer and one field. */ + static const PyThreadState _main_tstate = { + .interp = &_PyRuntime._main_interpreter, + }; + tstate = (PyThreadState *)&_main_tstate; + } + return tstate; +} + +static inline void +acquire_custom_allocator_lock(PyThreadState *tstate) +{ + _PyEval_AcquireLock(tstate); +} + +static inline void +release_custom_allocator_lock(PyThreadState *tstate) +{ + _PyEval_ReleaseLock(tstate->interp, tstate); +} + +static void * +_PyMem_MallocLocked(void *ctx, size_t size) +{ + PyMemAllocatorEx *wrapped = (PyMemAllocatorEx *)ctx; + if (_PyRuntime.allocators.num_gils > 1) { + PyThreadState *tstate = _PyThreadState_GET(); + if (should_lock(tstate->interp)) { + tstate = get_tstate_for_main_gil(); + acquire_custom_allocator_lock(tstate); + void *ptr = wrapped->malloc(wrapped->ctx, size); + release_custom_allocator_lock(tstate); + return ptr; + } + } + return wrapped->malloc(wrapped->ctx, size); +} + +static void * +_PyMem_CallocLocked(void *ctx, size_t nelem, size_t elsize) +{ + PyMemAllocatorEx *wrapped = (PyMemAllocatorEx *)ctx; + if (_PyRuntime.allocators.num_gils > 1) { + PyThreadState *tstate = _PyThreadState_GET(); + if (should_lock(tstate->interp)) { + tstate = get_tstate_for_main_gil(); + acquire_custom_allocator_lock(tstate); + void *ptr = wrapped->calloc(wrapped->ctx, nelem, elsize); + release_custom_allocator_lock(tstate); + return ptr; + } + } + return wrapped->calloc(wrapped->ctx, nelem, elsize); +} + +static void * +_PyMem_ReallocLocked(void *ctx, void *ptr, size_t new_size) +{ + PyMemAllocatorEx *wrapped = (PyMemAllocatorEx *)ctx; + if (_PyRuntime.allocators.num_gils > 1) { + PyThreadState *tstate = _PyThreadState_GET(); + if (should_lock(tstate->interp)) { + tstate = get_tstate_for_main_gil(); + acquire_custom_allocator_lock(tstate); + ptr = wrapped->realloc(wrapped->ctx, ptr, new_size); + release_custom_allocator_lock(tstate); + return ptr; + } + } + return wrapped->realloc(wrapped->ctx, ptr, new_size); +} + +static void +_PyMem_FreeLocked(void *ctx, void *ptr) +{ + PyMemAllocatorEx *wrapped = (PyMemAllocatorEx *)ctx; + if (_PyRuntime.allocators.num_gils > 1) { + PyThreadState *tstate = _PyThreadState_GET(); + if (should_lock(tstate->interp)) { + tstate = get_tstate_for_main_gil(); + acquire_custom_allocator_lock(tstate); + wrapped->free(wrapped->ctx, ptr); + release_custom_allocator_lock(tstate); + return; + } + } + wrapped->free(wrapped->ctx, ptr); +} + +static int +has_locking_wrapper(PyMemAllocatorEx *allocator) +{ + if (allocator->ctx == NULL) { + return 0; + } + return (allocator->malloc == _PyMem_MallocLocked + || allocator->calloc == _PyMem_CallocLocked + || allocator->realloc == _PyMem_ReallocLocked + || allocator->free == _PyMem_FreeLocked); +} + +static void +maybe_add_locking_wrapper(PyMemAllocatorDomain domain, + PyMemAllocatorEx *allocator, + PyMemAllocatorEx *wrapped) +{ + assert(domain == PYMEM_DOMAIN_MEM || domain == PYMEM_DOMAIN_OBJ); + + *wrapped = (PyMemAllocatorEx){0}; + + if (allocator->malloc == _PyMem_DebugMalloc) { + /* The debug allocator only wraps an already set allocator, + * which would have gone through this function already. */ + return; + } + + void *ctx = allocator->ctx; + + /* What is the likelihood of reentrancy with the wrapper funcs? + * For now we assume it is effectively zero. */ + + if (allocator->malloc != _PyMem_RawMalloc +#ifdef WITH_PYMALLOC + && allocator->malloc != _PyObject_Malloc +#endif + && allocator->malloc != _PyMem_MallocLocked) + { + wrapped->ctx = ctx; + wrapped->malloc = allocator->malloc; + allocator->ctx = wrapped; + allocator->malloc = _PyMem_MallocLocked; + } + + if (allocator->calloc != _PyMem_RawCalloc +#ifdef WITH_PYMALLOC + && allocator->calloc != _PyObject_Calloc +#endif + && allocator->calloc != _PyMem_CallocLocked) + { + wrapped->ctx = ctx; + wrapped->calloc = allocator->calloc; + allocator->ctx = wrapped; + allocator->calloc = _PyMem_CallocLocked; + } + + if (allocator->realloc != _PyMem_RawRealloc +#ifdef WITH_PYMALLOC + && allocator->realloc != _PyObject_Realloc +#endif + && allocator->realloc != _PyMem_ReallocLocked) + { + wrapped->ctx = ctx; + wrapped->realloc = allocator->realloc; + allocator->ctx = wrapped; + allocator->realloc = _PyMem_ReallocLocked; + } + + if (allocator->free != _PyMem_RawFree +#ifdef WITH_PYMALLOC + && allocator->free != _PyObject_Free +#endif + && allocator->free != _PyMem_FreeLocked) + { + wrapped->ctx = ctx; + wrapped->free = allocator->free; + allocator->ctx = wrapped; + allocator->free = _PyMem_FreeLocked; + } +} + + /*************************/ /* the "arena" allocator */ /*************************/ diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index ceca7776a276ed..bca533dd44fc86 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -592,6 +592,7 @@ static PyStatus init_interp_create_gil(PyThreadState *tstate, int gil) { PyStatus status; + _PyRuntimeState *runtime = tstate->interp->runtime; /* finalize_interp_delete() comment explains why _PyEval_FiniGIL() is only called here. */ @@ -619,7 +620,19 @@ init_interp_create_gil(PyThreadState *tstate, int gil) return status; } - return _PyStatus_OK(); + if (own_gil) { + HEAD_LOCK(runtime); + if (runtime->allocators.num_gils == INT_MAX) { + status = _PyStatus_ERR("allocators GIL overflow"); + } + else { + status =_PyStatus_OK(); + runtime->allocators.num_gils++; + } + HEAD_UNLOCK(runtime); + } + + return status; } @@ -1742,6 +1755,14 @@ finalize_interp_delete(PyInterpreterState *interp) /* Cleanup auto-thread-state */ _PyGILState_Fini(interp); + _PyRuntimeState *runtime = interp->runtime; + if (interp->ceval.own_gil) { + HEAD_LOCK(runtime); + assert(runtime->allocators.num_gils > 0); + runtime->allocators.num_gils--; + HEAD_UNLOCK(runtime); + } + /* We can't call _PyEval_FiniGIL() here because destroying the GIL lock can fail when it is being awaited by another running daemon thread (see bpo-9901). Instead pycore_create_interpreter() destroys the previously