From 361c4ed876b76f417bb560ec26be1d7ddec626d1 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Mon, 18 Nov 2024 11:24:39 -0700 Subject: [PATCH 1/4] Add THREADS_HEAD_LOCK(). --- Include/internal/pycore_interp.h | 1 + Include/internal/pycore_pystate.h | 5 +++++ Objects/codeobject.c | 4 ++++ Objects/object.c | 2 ++ Objects/obmalloc.c | 2 ++ Python/ceval.c | 2 ++ Python/ceval_gil.c | 12 ++++-------- Python/gc_free_threading.c | 17 ++++++++-------- Python/instrumentation.c | 5 ++--- Python/pystate.c | 32 +++++++++++++++---------------- 10 files changed, 47 insertions(+), 35 deletions(-) diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h index 5e4bcbf835a4d0..4c893baf3a0292 100644 --- a/Include/internal/pycore_interp.h +++ b/Include/internal/pycore_interp.h @@ -127,6 +127,7 @@ struct _is { uintptr_t last_restart_version; struct pythreads { + PyMutex mutex; uint64_t next_unique_id; /* The linked list of threads, newest first. */ PyThreadState *head; diff --git a/Include/internal/pycore_pystate.h b/Include/internal/pycore_pystate.h index f4fbf3734e2d44..98e8c23de7e92b 100644 --- a/Include/internal/pycore_pystate.h +++ b/Include/internal/pycore_pystate.h @@ -269,6 +269,11 @@ extern int _PyOS_InterruptOccurred(PyThreadState *tstate); #define HEAD_UNLOCK(runtime) \ PyMutex_Unlock(&(runtime)->interpreters.mutex) +#define THREADS_HEAD_LOCK(interp) \ + PyMutex_LockFlags(&(interp)->threads.mutex, _Py_LOCK_DONT_DETACH) +#define THREADS_HEAD_UNLOCK(interp) \ + PyMutex_Unlock(&(interp)->threads.mutex) + // Get the configuration of the current interpreter. // The caller must hold the GIL. // Export for test_peg_generator. diff --git a/Objects/codeobject.c b/Objects/codeobject.c index dba43d5911da95..bbb3ea60e21bec 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -2871,20 +2871,24 @@ get_indices_in_use(PyInterpreterState *interp, struct flag_set *in_use) assert(interp->stoptheworld.world_stopped); assert(in_use->flags == NULL); int32_t max_index = 0; + THREADS_HEAD_LOCK(interp); for (PyThreadState *p = interp->threads.head; p != NULL; p = p->next) { int32_t idx = ((_PyThreadStateImpl *) p)->tlbc_index; if (idx > max_index) { max_index = idx; } } + THREADS_HEAD_UNLOCK(interp); in_use->size = (size_t) max_index + 1; in_use->flags = PyMem_Calloc(in_use->size, sizeof(*in_use->flags)); if (in_use->flags == NULL) { return -1; } + THREADS_HEAD_LOCK(interp); for (PyThreadState *p = interp->threads.head; p != NULL; p = p->next) { in_use->flags[((_PyThreadStateImpl *) p)->tlbc_index] = 1; } + THREADS_HEAD_UNLOCK(interp); return 0; } diff --git a/Objects/object.c b/Objects/object.c index b115bc756d90b3..15f1cfbed8be0e 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -119,11 +119,13 @@ get_reftotal(PyInterpreterState *interp) since we can't determine which interpreter updated it. */ Py_ssize_t total = REFTOTAL(interp); #ifdef Py_GIL_DISABLED + THREADS_HEAD_LOCK(interp); for (PyThreadState *p = interp->threads.head; p != NULL; p = p->next) { /* This may race with other threads modifications to their reftotal */ _PyThreadStateImpl *tstate_impl = (_PyThreadStateImpl *)p; total += _Py_atomic_load_ssize_relaxed(&tstate_impl->reftotal); } + THREADS_HEAD_UNLOCK(interp); #endif return total; } diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index dfeccfa4dd7658..ef075830e8e52e 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -1405,6 +1405,7 @@ get_mimalloc_allocated_blocks(PyInterpreterState *interp) { size_t allocated_blocks = 0; #ifdef Py_GIL_DISABLED + THREADS_HEAD_LOCK(interp); for (PyThreadState *t = interp->threads.head; t != NULL; t = t->next) { _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)t; for (int i = 0; i < _Py_MIMALLOC_HEAP_COUNT; i++) { @@ -1412,6 +1413,7 @@ get_mimalloc_allocated_blocks(PyInterpreterState *interp) mi_heap_visit_blocks(heap, false, &count_blocks, &allocated_blocks); } } + THREADS_HEAD_UNLOCK(interp); mi_abandoned_pool_t *pool = &interp->mimalloc.abandoned_pool; for (uint8_t tag = 0; tag < _Py_MIMALLOC_HEAP_COUNT; tag++) { diff --git a/Python/ceval.c b/Python/ceval.c index 892dc5f7b58ff8..a2aed1b274c86a 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -296,11 +296,13 @@ Py_SetRecursionLimit(int new_limit) { PyInterpreterState *interp = _PyInterpreterState_GET(); interp->ceval.recursion_limit = new_limit; + THREADS_HEAD_LOCK(interp); for (PyThreadState *p = interp->threads.head; p != NULL; p = p->next) { int depth = p->py_recursion_limit - p->py_recursion_remaining; p->py_recursion_limit = new_limit; p->py_recursion_remaining = new_limit - depth; } + THREADS_HEAD_UNLOCK(interp); } /* The function _Py_EnterRecursiveCallTstate() only calls _Py_CheckRecursiveCall() diff --git a/Python/ceval_gil.c b/Python/ceval_gil.c index 4c9f59f837e11b..875670a1a52317 100644 --- a/Python/ceval_gil.c +++ b/Python/ceval_gil.c @@ -977,25 +977,21 @@ make_pending_calls(PyThreadState *tstate) void _Py_set_eval_breaker_bit_all(PyInterpreterState *interp, uintptr_t bit) { - _PyRuntimeState *runtime = &_PyRuntime; - - HEAD_LOCK(runtime); + THREADS_HEAD_LOCK(interp); for (PyThreadState *tstate = interp->threads.head; tstate != NULL; tstate = tstate->next) { _Py_set_eval_breaker_bit(tstate, bit); } - HEAD_UNLOCK(runtime); + THREADS_HEAD_UNLOCK(interp); } void _Py_unset_eval_breaker_bit_all(PyInterpreterState *interp, uintptr_t bit) { - _PyRuntimeState *runtime = &_PyRuntime; - - HEAD_LOCK(runtime); + THREADS_HEAD_LOCK(interp); for (PyThreadState *tstate = interp->threads.head; tstate != NULL; tstate = tstate->next) { _Py_unset_eval_breaker_bit(tstate, bit); } - HEAD_UNLOCK(runtime); + THREADS_HEAD_UNLOCK(interp); } void diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index 499ee51fdb2cd4..e6877d6fae3ca3 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -304,6 +304,7 @@ gc_visit_heaps_lock_held(PyInterpreterState *interp, mi_block_visit_fun *visitor Py_ssize_t offset_pre = offset_base + 2 * sizeof(PyObject*); // visit each thread's heaps for GC objects + // XXX THREADS_HEAD_LOCK()? for (PyThreadState *p = interp->threads.head; p != NULL; p = p->next) { struct _mimalloc_thread_state *m = &((_PyThreadStateImpl *)p)->mimalloc; if (!_Py_atomic_load_int(&m->initialized)) { @@ -374,7 +375,7 @@ gc_visit_stackref(_PyStackRef stackref) static void gc_visit_thread_stacks(PyInterpreterState *interp) { - HEAD_LOCK(&_PyRuntime); + THREADS_HEAD_LOCK(interp); for (PyThreadState *p = interp->threads.head; p != NULL; p = p->next) { for (_PyInterpreterFrame *f = p->current_frame; f != NULL; f = f->previous) { PyObject *executable = PyStackRef_AsPyObjectBorrow(f->f_executable); @@ -390,7 +391,7 @@ gc_visit_thread_stacks(PyInterpreterState *interp) } } } - HEAD_UNLOCK(&_PyRuntime); + THREADS_HEAD_UNLOCK(interp); } static void @@ -429,14 +430,14 @@ process_delayed_frees(PyInterpreterState *interp) // Merge the queues from other threads into our own queue so that we can // process all of the pending delayed free requests at once. - HEAD_LOCK(&_PyRuntime); + THREADS_HEAD_LOCK(interp); for (PyThreadState *p = interp->threads.head; p != NULL; p = p->next) { _PyThreadStateImpl *other = (_PyThreadStateImpl *)p; if (other != current_tstate) { llist_concat(¤t_tstate->mem_free_queue, &other->mem_free_queue); } } - HEAD_UNLOCK(&_PyRuntime); + THREADS_HEAD_UNLOCK(interp); _PyMem_ProcessDelayed((PyThreadState *)current_tstate); } @@ -1226,7 +1227,7 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state, state->gcstate->old[i-1].count = 0; } - HEAD_LOCK(&_PyRuntime); + THREADS_HEAD_LOCK(interp); for (PyThreadState *p = interp->threads.head; p != NULL; p = p->next) { _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)p; @@ -1236,7 +1237,7 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state, // merge refcounts for all queued objects merge_queued_objects(tstate, state); } - HEAD_UNLOCK(&_PyRuntime); + THREADS_HEAD_UNLOCK(interp); process_delayed_frees(interp); @@ -1991,13 +1992,13 @@ PyUnstable_GC_VisitObjects(gcvisitobjects_t callback, void *arg) void _PyGC_ClearAllFreeLists(PyInterpreterState *interp) { - HEAD_LOCK(&_PyRuntime); + THREADS_HEAD_LOCK(interp); _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)interp->threads.head; while (tstate != NULL) { _PyObject_ClearFreeLists(&tstate->freelists, 0); tstate = (_PyThreadStateImpl *)tstate->base.next; } - HEAD_UNLOCK(&_PyRuntime); + THREADS_HEAD_UNLOCK(interp); } #endif // Py_GIL_DISABLED diff --git a/Python/instrumentation.c b/Python/instrumentation.c index 87c2addaf809eb..e1577618e70196 100644 --- a/Python/instrumentation.c +++ b/Python/instrumentation.c @@ -1006,13 +1006,12 @@ set_global_version(PyThreadState *tstate, uint32_t version) #ifdef Py_GIL_DISABLED // Set the version on all threads in free-threaded builds. - _PyRuntimeState *runtime = &_PyRuntime; - HEAD_LOCK(runtime); + THREADS_HEAD_LOCK(interp); for (tstate = interp->threads.head; tstate; tstate = PyThreadState_Next(tstate)) { set_version_raw(&tstate->eval_breaker, version); }; - HEAD_UNLOCK(runtime); + THREADS_HEAD_UNLOCK(interp); #else // Normal builds take the current version from instrumentation_version when // attaching a thread, so we only have to set the current thread's version. diff --git a/Python/pystate.c b/Python/pystate.c index 01e54fc745de1f..72088298f867fb 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -773,7 +773,6 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate) { assert(interp != NULL); assert(tstate != NULL); - _PyRuntimeState *runtime = interp->runtime; /* XXX Conditions we need to enforce: @@ -790,18 +789,16 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate) } // Clear the current/main thread state last. - HEAD_LOCK(runtime); + THREADS_HEAD_LOCK(interp); PyThreadState *p = interp->threads.head; - HEAD_UNLOCK(runtime); while (p != NULL) { // See https://github.com/python/cpython/issues/102126 // Must be called without HEAD_LOCK held as it can deadlock // if any finalizer tries to acquire that lock. PyThreadState_Clear(p); - HEAD_LOCK(runtime); p = p->next; - HEAD_UNLOCK(runtime); } + THREADS_HEAD_UNLOCK(interp); if (tstate->interp == interp) { /* We fix tstate->_status below when we for sure aren't using it (e.g. no longer need the GIL). */ @@ -1539,7 +1536,7 @@ new_threadstate(PyInterpreterState *interp, int whence) #endif /* We serialize concurrent creation to protect global state. */ - HEAD_LOCK(interp->runtime); + THREADS_HEAD_LOCK(interp); // Initialize the new thread state. interp->threads.next_unique_id += 1; @@ -1550,7 +1547,7 @@ new_threadstate(PyInterpreterState *interp, int whence) PyThreadState *old_head = interp->threads.head; add_threadstate(interp, (PyThreadState *)tstate, old_head); - HEAD_UNLOCK(interp->runtime); + THREADS_HEAD_UNLOCK(interp); #ifdef Py_GIL_DISABLED // Must be called with lock unlocked to avoid lock ordering deadlocks. @@ -1741,7 +1738,7 @@ tstate_delete_common(PyThreadState *tstate, int release_gil) } _PyRuntimeState *runtime = interp->runtime; - HEAD_LOCK(runtime); + THREADS_HEAD_LOCK(interp); if (tstate->prev) { tstate->prev->next = tstate->next; } @@ -1757,9 +1754,11 @@ tstate_delete_common(PyThreadState *tstate, int release_gil) if (interp->stoptheworld.requested) { decrement_stoptheworld_countdown(&interp->stoptheworld); } + HEAD_LOCK(runtime); if (runtime->stoptheworld.requested) { decrement_stoptheworld_countdown(&runtime->stoptheworld); } + HEAD_UNLOCK(runtime); } #if defined(Py_REF_DEBUG) && defined(Py_GIL_DISABLED) @@ -1770,7 +1769,7 @@ tstate_delete_common(PyThreadState *tstate, int release_gil) assert(tstate_impl->refcounts.values == NULL); #endif - HEAD_UNLOCK(runtime); + THREADS_HEAD_UNLOCK(interp); // XXX Unbind in PyThreadState_Clear(), or earlier // (and assert not-equal here)? @@ -1851,13 +1850,15 @@ _PyThreadState_RemoveExcept(PyThreadState *tstate) { assert(tstate != NULL); PyInterpreterState *interp = tstate->interp; - _PyRuntimeState *runtime = interp->runtime; #ifdef Py_GIL_DISABLED +#ifndef NDEBUG + _PyRuntimeState *runtime = interp->runtime; +#endif assert(runtime->stoptheworld.world_stopped); #endif - HEAD_LOCK(runtime); + THREADS_HEAD_LOCK(interp); /* Remove all thread states, except tstate, from the linked list of thread states. */ PyThreadState *list = interp->threads.head; @@ -1872,7 +1873,7 @@ _PyThreadState_RemoveExcept(PyThreadState *tstate) } tstate->prev = tstate->next = NULL; interp->threads.head = tstate; - HEAD_UNLOCK(runtime); + THREADS_HEAD_UNLOCK(interp); return list; } @@ -2339,7 +2340,6 @@ _PyEval_StartTheWorld(PyInterpreterState *interp) int PyThreadState_SetAsyncExc(unsigned long id, PyObject *exc) { - _PyRuntimeState *runtime = &_PyRuntime; PyInterpreterState *interp = _PyInterpreterState_GET(); /* Although the GIL is held, a few C API functions can be called @@ -2348,7 +2348,7 @@ PyThreadState_SetAsyncExc(unsigned long id, PyObject *exc) * list of thread states we're traversing, so to prevent that we lock * head_mutex for the duration. */ - HEAD_LOCK(runtime); + THREADS_HEAD_LOCK(interp); for (PyThreadState *tstate = interp->threads.head; tstate != NULL; tstate = tstate->next) { if (tstate->thread_id != id) { continue; @@ -2363,13 +2363,13 @@ PyThreadState_SetAsyncExc(unsigned long id, PyObject *exc) */ Py_XINCREF(exc); PyObject *old_exc = _Py_atomic_exchange_ptr(&tstate->async_exc, exc); - HEAD_UNLOCK(runtime); + THREADS_HEAD_UNLOCK(interp); Py_XDECREF(old_exc); _Py_set_eval_breaker_bit(tstate, _PY_ASYNC_EXCEPTION_BIT); return 1; } - HEAD_UNLOCK(runtime); + THREADS_HEAD_LOCK(interp); return 0; } From 96a572c8bbe86c9d1f2581540102662f751700d0 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Tue, 19 Nov 2024 14:11:04 -0700 Subject: [PATCH 2/4] Fix the lock around gc_visit_heaps_lock_held(). --- Python/gc_free_threading.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index e6877d6fae3ca3..59a6d0ab92b7ed 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -304,7 +304,6 @@ gc_visit_heaps_lock_held(PyInterpreterState *interp, mi_block_visit_fun *visitor Py_ssize_t offset_pre = offset_base + 2 * sizeof(PyObject*); // visit each thread's heaps for GC objects - // XXX THREADS_HEAD_LOCK()? for (PyThreadState *p = interp->threads.head; p != NULL; p = p->next) { struct _mimalloc_thread_state *m = &((_PyThreadStateImpl *)p)->mimalloc; if (!_Py_atomic_load_int(&m->initialized)) { @@ -351,9 +350,9 @@ gc_visit_heaps(PyInterpreterState *interp, mi_block_visit_fun *visitor, assert(interp->stoptheworld.world_stopped); int err; - HEAD_LOCK(&_PyRuntime); + THREADS_HEAD_LOCK(interp); err = gc_visit_heaps_lock_held(interp, visitor, arg); - HEAD_UNLOCK(&_PyRuntime); + THREADS_HEAD_UNLOCK(interp); return err; } From c1eb46338e92114d992579980896cceeede6928d Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Tue, 19 Nov 2024 15:29:04 -0700 Subject: [PATCH 3/4] Fix a typo. --- Python/pystate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/pystate.c b/Python/pystate.c index 72088298f867fb..bac1e7d33620eb 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -2369,7 +2369,7 @@ PyThreadState_SetAsyncExc(unsigned long id, PyObject *exc) _Py_set_eval_breaker_bit(tstate, _PY_ASYNC_EXCEPTION_BIT); return 1; } - THREADS_HEAD_LOCK(interp); + THREADS_HEAD_UNLOCK(interp); return 0; } From 62e1ac3009bb4e75547c4e235c2836efe4deaa23 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Tue, 19 Nov 2024 15:36:10 -0700 Subject: [PATCH 4/4] Revert the locking logic in interpreter_clear(). --- Python/pystate.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Python/pystate.c b/Python/pystate.c index bac1e7d33620eb..df1e3d91275706 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -791,14 +791,16 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate) // Clear the current/main thread state last. THREADS_HEAD_LOCK(interp); PyThreadState *p = interp->threads.head; + THREADS_HEAD_UNLOCK(interp); while (p != NULL) { // See https://github.com/python/cpython/issues/102126 // Must be called without HEAD_LOCK held as it can deadlock // if any finalizer tries to acquire that lock. PyThreadState_Clear(p); + THREADS_HEAD_LOCK(interp); p = p->next; + THREADS_HEAD_UNLOCK(interp); } - THREADS_HEAD_UNLOCK(interp); if (tstate->interp == interp) { /* We fix tstate->_status below when we for sure aren't using it (e.g. no longer need the GIL). */