From dba7b7e59604e0eef23eb537920f89b3a90a7f77 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 13 Sep 2023 11:11:01 +0100 Subject: [PATCH 01/18] Remove nonsense thread checks --- Python/ceval_gil.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/Python/ceval_gil.c b/Python/ceval_gil.c index 3b7e6cb1bda3ff..190bfae716751a 100644 --- a/Python/ceval_gil.c +++ b/Python/ceval_gil.c @@ -67,11 +67,9 @@ COMPUTE_EVAL_BREAKER(PyInterpreterState *interp, { _Py_atomic_store_relaxed(&ceval2->eval_breaker, _Py_atomic_load_relaxed_int32(&ceval2->gil_drop_request) - | (_Py_atomic_load_relaxed_int32(&ceval->signals_pending) - && _Py_ThreadCanHandleSignals(interp)) + | _Py_atomic_load_relaxed_int32(&ceval->signals_pending) | (_Py_atomic_load_relaxed_int32(&ceval2->pending.calls_to_do)) - | (_Py_IsMainThread() && _Py_IsMainInterpreter(interp) - &&_Py_atomic_load_relaxed_int32(&ceval->pending_mainthread.calls_to_do)) + | _Py_atomic_load_relaxed_int32(&ceval->pending_mainthread.calls_to_do) | ceval2->pending.async_exc | _Py_atomic_load_relaxed_int32(&ceval2->gc_scheduled)); } From 2b8766e808782b742e47970b0b3c4601d5cf7dce Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 13 Sep 2023 14:08:11 +0100 Subject: [PATCH 02/18] Use one bit per check in eval_breaker --- Include/internal/pycore_ceval.h | 31 +++++++ Include/internal/pycore_ceval_state.h | 15 +--- Modules/gcmodule.c | 7 +- Modules/signalmodule.c | 5 +- Python/bytecodes.c | 2 +- Python/ceval_gil.c | 124 ++++++-------------------- Python/ceval_macros.h | 2 +- Python/executor_cases.c.h | 2 +- Python/generated_cases.c.h | 2 +- 9 files changed, 71 insertions(+), 119 deletions(-) diff --git a/Include/internal/pycore_ceval.h b/Include/internal/pycore_ceval.h index e9535023cec46b..4d3489d1f4e8a9 100644 --- a/Include/internal/pycore_ceval.h +++ b/Include/internal/pycore_ceval.h @@ -10,6 +10,7 @@ extern "C" { #include "pycore_interp.h" // PyInterpreterState.eval_frame #include "pycore_pystate.h" // _PyThreadState_GET() +#include "cpython/pyatomic.h" /* Forward declarations */ struct pyruntimestate; @@ -193,6 +194,36 @@ int _PyEval_UnpackIterable(PyThreadState *tstate, PyObject *v, int argcnt, int a void _PyEval_FrameClearAndPop(PyThreadState *tstate, _PyInterpreterFrame *frame); +#define _PY_GIL_DROP_REQUEST_BIT 0 +#define _PY_SIGNALS_PENDING_BIT 1 +#define _PY_CALLS_TO_DO_BIT 2 +#define _PY_ASYNC_EXCEPTION_BIT 3 +#define _PY_GC_SCHEDULED_BIT 4 + + +static inline void +_Py_set_eval_breaker_bit(PyInterpreterState *interp, int32_t bit, int32_t set) +{ + assert(set == 0 || set == 1); + int32_t to_set = set << bit; + int32_t mask = 1 << bit; + int32_t old = _Py_atomic_load_int32(&interp->ceval.eval_breaker2); + if ((old & mask) == to_set) { + return; + } + int32_t new; + do { + new = (old & ~mask) | (set << bit); + } while (!_Py_atomic_compare_exchange_int32(&interp->ceval.eval_breaker2, &old, new)); +} + +static inline bool +_Py_eval_breaker_bit_is_set(PyInterpreterState *interp, int32_t bit) +{ + return _Py_atomic_load_int32(&interp->ceval.eval_breaker2) & (1 << bit); +} + + #ifdef __cplusplus } #endif diff --git a/Include/internal/pycore_ceval_state.h b/Include/internal/pycore_ceval_state.h index 6e3d669dc646af..c885239e783061 100644 --- a/Include/internal/pycore_ceval_state.h +++ b/Include/internal/pycore_ceval_state.h @@ -15,7 +15,7 @@ struct _pending_calls { int busy; PyThread_type_lock lock; /* Request for running pending calls. */ - _Py_atomic_int calls_to_do; + int32_t calls_to_do; /* Request for looking at the `async_exc` field of the current thread state. Guarded by the GIL. */ @@ -60,11 +60,6 @@ struct _ceval_runtime_state { int _not_used; #endif } perf; - /* Request for checking signals. It is shared by all interpreters (see - bpo-40513). Any thread of any interpreter can receive a signal, but only - the main thread of the main interpreter can handle signals: see - _Py_ThreadCanHandleSignals(). */ - _Py_atomic_int signals_pending; /* Pending calls to be made only on the main thread. */ struct _pending_calls pending_mainthread; }; @@ -85,14 +80,12 @@ struct _ceval_state { * the fast path in the eval loop. * It is by far the hottest field in this struct and * should be placed at the beginning. */ - _Py_atomic_int eval_breaker; - /* Request for dropping the GIL */ - _Py_atomic_int gil_drop_request; + int32_t eval_breaker2; + /* Avoid false sharing */ + int64_t padding[7]; int recursion_limit; struct _gil_runtime_state *gil; int own_gil; - /* The GC is ready to be executed */ - _Py_atomic_int gc_scheduled; struct _pending_calls pending; }; diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c index 632cabdf4bcfbd..96771c2f7a0c45 100644 --- a/Modules/gcmodule.c +++ b/Modules/gcmodule.c @@ -24,6 +24,7 @@ */ #include "Python.h" +#include "pycore_ceval.h" // _PyEval_SignalReceived() #include "pycore_context.h" #include "pycore_dict.h" // _PyDict_MaybeUntrack() #include "pycore_initconfig.h" @@ -2274,11 +2275,7 @@ _Py_ScheduleGC(PyInterpreterState *interp) if (gcstate->collecting == 1) { return; } - struct _ceval_state *ceval = &interp->ceval; - if (!_Py_atomic_load_relaxed(&ceval->gc_scheduled)) { - _Py_atomic_store_relaxed(&ceval->gc_scheduled, 1); - _Py_atomic_store_relaxed(&ceval->eval_breaker, 1); - } + _Py_set_eval_breaker_bit(interp, _PY_GC_SCHEDULED_BIT, 1); } void diff --git a/Modules/signalmodule.c b/Modules/signalmodule.c index 8d6556727b3a5a..ac3457003b0cb6 100644 --- a/Modules/signalmodule.c +++ b/Modules/signalmodule.c @@ -1767,9 +1767,8 @@ PyErr_CheckSignals(void) Python code to ensure signals are handled. Checking for the GC here allows long running native code to clean cycles created using the C-API even if it doesn't run the evaluation loop */ - struct _ceval_state *interp_ceval_state = &tstate->interp->ceval; - if (_Py_atomic_load_relaxed(&interp_ceval_state->gc_scheduled)) { - _Py_atomic_store_relaxed(&interp_ceval_state->gc_scheduled, 0); + if (_Py_eval_breaker_bit_is_set(tstate->interp, _PY_GC_SCHEDULED_BIT)) { + _Py_set_eval_breaker_bit(tstate->interp, _PY_GC_SCHEDULED_BIT, 0); _Py_RunGC(tstate); } diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 08d91b5efe51be..8939fb189c61f1 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -160,7 +160,7 @@ dummy_func( /* Possibly combine these two checks */ DEOPT_IF(_PyFrame_GetCode(frame)->_co_instrumentation_version != tstate->interp->monitoring_version, RESUME); - DEOPT_IF(_Py_atomic_load_relaxed_int32(&tstate->interp->ceval.eval_breaker), RESUME); + DEOPT_IF(_Py_atomic_load_int32_relaxed(&tstate->interp->ceval.eval_breaker2), RESUME); } inst(INSTRUMENTED_RESUME, (--)) { diff --git a/Python/ceval_gil.c b/Python/ceval_gil.c index 190bfae716751a..5c33a8e2d38ed1 100644 --- a/Python/ceval_gil.c +++ b/Python/ceval_gil.c @@ -1,6 +1,7 @@ #include "Python.h" #include "pycore_atomic.h" // _Py_atomic_int +#include "cpython/pyatomic.h" // _Py_atomic_load_int32 #include "pycore_ceval.h" // _PyEval_SignalReceived() #include "pycore_initconfig.h" // _PyStatus_OK() #include "pycore_interp.h" // _Py_RunGC() @@ -57,89 +58,45 @@ #define _Py_atomic_load_relaxed_int32(ATOMIC_VAL) _Py_atomic_load_relaxed(ATOMIC_VAL) #endif -/* This can set eval_breaker to 0 even though gil_drop_request became - 1. We believe this is all right because the eval loop will release - the GIL eventually anyway. */ -static inline void -COMPUTE_EVAL_BREAKER(PyInterpreterState *interp, - struct _ceval_runtime_state *ceval, - struct _ceval_state *ceval2) -{ - _Py_atomic_store_relaxed(&ceval2->eval_breaker, - _Py_atomic_load_relaxed_int32(&ceval2->gil_drop_request) - | _Py_atomic_load_relaxed_int32(&ceval->signals_pending) - | (_Py_atomic_load_relaxed_int32(&ceval2->pending.calls_to_do)) - | _Py_atomic_load_relaxed_int32(&ceval->pending_mainthread.calls_to_do) - | ceval2->pending.async_exc - | _Py_atomic_load_relaxed_int32(&ceval2->gc_scheduled)); -} - - static inline void SET_GIL_DROP_REQUEST(PyInterpreterState *interp) { - struct _ceval_state *ceval2 = &interp->ceval; - _Py_atomic_store_relaxed(&ceval2->gil_drop_request, 1); - _Py_atomic_store_relaxed(&ceval2->eval_breaker, 1); + _Py_set_eval_breaker_bit(interp, _PY_GIL_DROP_REQUEST_BIT, 1); } static inline void RESET_GIL_DROP_REQUEST(PyInterpreterState *interp) { - struct _ceval_runtime_state *ceval = &interp->runtime->ceval; - struct _ceval_state *ceval2 = &interp->ceval; - _Py_atomic_store_relaxed(&ceval2->gil_drop_request, 0); - COMPUTE_EVAL_BREAKER(interp, ceval, ceval2); + _Py_set_eval_breaker_bit(interp, _PY_GIL_DROP_REQUEST_BIT, 0); } static inline void -SIGNAL_PENDING_CALLS(struct _pending_calls *pending, PyInterpreterState *interp) +SIGNAL_PENDING_CALLS(PyInterpreterState *interp) { - struct _ceval_runtime_state *ceval = &interp->runtime->ceval; - struct _ceval_state *ceval2 = &interp->ceval; - _Py_atomic_store_relaxed(&pending->calls_to_do, 1); - COMPUTE_EVAL_BREAKER(interp, ceval, ceval2); + _Py_set_eval_breaker_bit(interp, _PY_CALLS_TO_DO_BIT, 1); } static inline void UNSIGNAL_PENDING_CALLS(PyInterpreterState *interp) { - struct _ceval_runtime_state *ceval = &interp->runtime->ceval; - struct _ceval_state *ceval2 = &interp->ceval; - if (_Py_IsMainThread() && _Py_IsMainInterpreter(interp)) { - _Py_atomic_store_relaxed(&ceval->pending_mainthread.calls_to_do, 0); - } - _Py_atomic_store_relaxed(&ceval2->pending.calls_to_do, 0); - COMPUTE_EVAL_BREAKER(interp, ceval, ceval2); + _Py_set_eval_breaker_bit(interp, _PY_CALLS_TO_DO_BIT, 0); } static inline void SIGNAL_PENDING_SIGNALS(PyInterpreterState *interp, int force) { - struct _ceval_runtime_state *ceval = &interp->runtime->ceval; - struct _ceval_state *ceval2 = &interp->ceval; - _Py_atomic_store_relaxed(&ceval->signals_pending, 1); - if (force) { - _Py_atomic_store_relaxed(&ceval2->eval_breaker, 1); - } - else { - /* eval_breaker is not set to 1 if thread_can_handle_signals() is false */ - COMPUTE_EVAL_BREAKER(interp, ceval, ceval2); - } + _Py_set_eval_breaker_bit(interp, _PY_SIGNALS_PENDING_BIT, 1); } static inline void UNSIGNAL_PENDING_SIGNALS(PyInterpreterState *interp) { - struct _ceval_runtime_state *ceval = &interp->runtime->ceval; - struct _ceval_state *ceval2 = &interp->ceval; - _Py_atomic_store_relaxed(&ceval->signals_pending, 0); - COMPUTE_EVAL_BREAKER(interp, ceval, ceval2); + _Py_set_eval_breaker_bit(interp, _PY_SIGNALS_PENDING_BIT, 0); } @@ -148,17 +105,16 @@ SIGNAL_ASYNC_EXC(PyInterpreterState *interp) { struct _ceval_state *ceval2 = &interp->ceval; ceval2->pending.async_exc = 1; - _Py_atomic_store_relaxed(&ceval2->eval_breaker, 1); + _Py_set_eval_breaker_bit(interp, _PY_ASYNC_EXCEPTION_BIT, 1); } static inline void UNSIGNAL_ASYNC_EXC(PyInterpreterState *interp) { - struct _ceval_runtime_state *ceval = &interp->runtime->ceval; struct _ceval_state *ceval2 = &interp->ceval; ceval2->pending.async_exc = 0; - COMPUTE_EVAL_BREAKER(interp, ceval, ceval2); + _Py_set_eval_breaker_bit(interp, _PY_ASYNC_EXCEPTION_BIT, 0); } @@ -308,7 +264,7 @@ drop_gil(struct _ceval_state *ceval, PyThreadState *tstate) the GIL, and that's the only time we might delete the interpreter, so checking tstate first prevents the crash. See https://github.com/python/cpython/issues/104341. */ - if (tstate != NULL && _Py_atomic_load_relaxed(&ceval->gil_drop_request)) { + if (tstate != NULL && _Py_eval_breaker_bit_is_set(tstate->interp, _PY_GIL_DROP_REQUEST_BIT)) { MUTEX_LOCK(gil->switch_mutex); /* Not switched yet => wait */ if (((PyThreadState*)_Py_atomic_load_relaxed(&gil->last_holder)) == tstate) @@ -434,17 +390,7 @@ take_gil(PyThreadState *tstate) } assert(_PyThreadState_CheckConsistency(tstate)); - if (_Py_atomic_load_relaxed(&ceval->gil_drop_request)) { - RESET_GIL_DROP_REQUEST(interp); - } - else { - /* bpo-40010: eval_breaker should be recomputed to be set to 1 if there - is a pending signal: signal received by another thread which cannot - handle signals. - - Note: RESET_GIL_DROP_REQUEST() calls COMPUTE_EVAL_BREAKER(). */ - COMPUTE_EVAL_BREAKER(interp, &_PyRuntime.ceval, ceval); - } + RESET_GIL_DROP_REQUEST(interp); /* Don't access tstate if the thread must exit */ if (tstate->async_exc != NULL) { @@ -771,6 +717,8 @@ _push_pending_call(struct _pending_calls *pending, pending->calls[i].func = func; pending->calls[i].arg = arg; pending->last = j; + assert(pending->calls_to_do < NPENDINGCALLS); + pending->calls_to_do++; return 0; } @@ -798,6 +746,8 @@ _pop_pending_call(struct _pending_calls *pending, if (i >= 0) { pending->calls[i] = (struct _pending_call){0}; pending->first = (i + 1) % NPENDINGCALLS; + assert(pending->calls_to_do > 0); + pending->calls_to_do--; } } @@ -827,7 +777,7 @@ _PyEval_AddPendingCall(PyInterpreterState *interp, PyThread_release_lock(pending->lock); /* signal main loop */ - SIGNAL_PENDING_CALLS(pending, interp); + SIGNAL_PENDING_CALLS(interp); return result; } @@ -860,15 +810,7 @@ handle_signals(PyThreadState *tstate) static inline int maybe_has_pending_calls(PyInterpreterState *interp) { - struct _pending_calls *pending = &interp->ceval.pending; - if (_Py_atomic_load_relaxed_int32(&pending->calls_to_do)) { - return 1; - } - if (!_Py_IsMainThread() || !_Py_IsMainInterpreter(interp)) { - return 0; - } - pending = &_PyRuntime.ceval.pending_mainthread; - return _Py_atomic_load_relaxed_int32(&pending->calls_to_do); + return _Py_eval_breaker_bit_is_set(interp, _PY_CALLS_TO_DO_BIT) ? 1 : 0; } static int @@ -928,7 +870,7 @@ make_pending_calls(PyInterpreterState *interp) if (_make_pending_calls(pending) != 0) { pending->busy = 0; /* There might not be more calls to make, but we play it safe. */ - SIGNAL_PENDING_CALLS(pending, interp); + SIGNAL_PENDING_CALLS(interp); return -1; } @@ -936,10 +878,13 @@ make_pending_calls(PyInterpreterState *interp) if (_make_pending_calls(pending_main) != 0) { pending->busy = 0; /* There might not be more calls to make, but we play it safe. */ - SIGNAL_PENDING_CALLS(pending_main, interp); + SIGNAL_PENDING_CALLS(interp); return -1; } } + else if (pending_main->calls_to_do) { + SIGNAL_PENDING_CALLS(interp); + } pending->busy = 0; return 0; @@ -1084,9 +1029,10 @@ _Py_HandlePending(PyThreadState *tstate) _PyRuntimeState * const runtime = &_PyRuntime; struct _ceval_runtime_state *ceval = &runtime->ceval; struct _ceval_state *interp_ceval_state = &tstate->interp->ceval; + PyInterpreterState *interp = tstate->interp; /* Pending signals */ - if (_Py_atomic_load_relaxed_int32(&ceval->signals_pending)) { + if (_Py_eval_breaker_bit_is_set(interp, _PY_SIGNALS_PENDING_BIT)) { if (handle_signals(tstate) != 0) { return -1; } @@ -1100,14 +1046,13 @@ _Py_HandlePending(PyThreadState *tstate) } /* GC scheduled to run */ - if (_Py_atomic_load_relaxed_int32(&interp_ceval_state->gc_scheduled)) { - _Py_atomic_store_relaxed(&interp_ceval_state->gc_scheduled, 0); - COMPUTE_EVAL_BREAKER(tstate->interp, ceval, interp_ceval_state); + if (_Py_eval_breaker_bit_is_set(tstate->interp, _PY_GC_SCHEDULED_BIT)) { + _Py_set_eval_breaker_bit(tstate->interp, _PY_GC_SCHEDULED_BIT, 0); _Py_RunGC(tstate); } /* GIL drop request */ - if (_Py_atomic_load_relaxed_int32(&interp_ceval_state->gil_drop_request)) { + if (_Py_eval_breaker_bit_is_set(tstate->interp, _PY_GIL_DROP_REQUEST_BIT)) { /* Give another thread a chance */ if (_PyThreadState_SwapNoGIL(NULL) != tstate) { Py_FatalError("tstate mix-up"); @@ -1132,19 +1077,6 @@ _Py_HandlePending(PyThreadState *tstate) Py_DECREF(exc); return -1; } - - - // It is possible that some of the conditions that trigger the eval breaker - // are called in a different thread than the Python thread. An example of - // this is bpo-42296: On Windows, _PyEval_SignalReceived() can be called in - // a different thread than the Python thread, in which case - // _Py_ThreadCanHandleSignals() is wrong. Recompute eval_breaker in the - // current Python thread with the correct _Py_ThreadCanHandleSignals() - // value. It prevents to interrupt the eval loop at every instruction if - // the current Python thread cannot handle signals (if - // _Py_ThreadCanHandleSignals() is false). - COMPUTE_EVAL_BREAKER(tstate->interp, ceval, interp_ceval_state); - return 0; } diff --git a/Python/ceval_macros.h b/Python/ceval_macros.h index f5d915554d56e7..efdb731484ed34 100644 --- a/Python/ceval_macros.h +++ b/Python/ceval_macros.h @@ -116,7 +116,7 @@ #define CHECK_EVAL_BREAKER() \ _Py_CHECK_EMSCRIPTEN_SIGNALS_PERIODICALLY(); \ - if (_Py_atomic_load_relaxed_int32(&tstate->interp->ceval.eval_breaker)) { \ + if (_Py_atomic_load_int32_relaxed(&tstate->interp->ceval.eval_breaker2)) { \ if (_Py_HandlePending(tstate) != 0) { \ goto error; \ } \ diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 8f3febe7d1ab95..cd5f0b5cae065e 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -15,7 +15,7 @@ /* Possibly combine these two checks */ DEOPT_IF(_PyFrame_GetCode(frame)->_co_instrumentation_version != tstate->interp->monitoring_version, RESUME); - DEOPT_IF(_Py_atomic_load_relaxed_int32(&tstate->interp->ceval.eval_breaker), RESUME); + DEOPT_IF(_Py_atomic_load_int32_relaxed(&tstate->interp->ceval.eval_breaker2), RESUME); break; } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index b86e35f84fda09..efb4c04b67fcae 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -34,7 +34,7 @@ /* Possibly combine these two checks */ DEOPT_IF(_PyFrame_GetCode(frame)->_co_instrumentation_version != tstate->interp->monitoring_version, RESUME); - DEOPT_IF(_Py_atomic_load_relaxed_int32(&tstate->interp->ceval.eval_breaker), RESUME); + DEOPT_IF(_Py_atomic_load_int32_relaxed(&tstate->interp->ceval.eval_breaker2), RESUME); DISPATCH(); } From f7205ff07fc70a8e5f0b9c19f5a559637f954bef Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Fri, 22 Sep 2023 10:02:25 +0100 Subject: [PATCH 03/18] Add main callback enum --- Include/internal/pycore_ceval.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Include/internal/pycore_ceval.h b/Include/internal/pycore_ceval.h index 4d3489d1f4e8a9..4cca9a4974df2f 100644 --- a/Include/internal/pycore_ceval.h +++ b/Include/internal/pycore_ceval.h @@ -197,8 +197,9 @@ void _PyEval_FrameClearAndPop(PyThreadState *tstate, _PyInterpreterFrame *frame) #define _PY_GIL_DROP_REQUEST_BIT 0 #define _PY_SIGNALS_PENDING_BIT 1 #define _PY_CALLS_TO_DO_BIT 2 -#define _PY_ASYNC_EXCEPTION_BIT 3 -#define _PY_GC_SCHEDULED_BIT 4 +#define _PY_CALLS_ON_MAIN_TO_DO_BIT 3 +#define _PY_ASYNC_EXCEPTION_BIT 4 +#define _PY_GC_SCHEDULED_BIT 5 static inline void From 4da67aaf42bc98a6985410d29647c6459e931db8 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Mon, 25 Sep 2023 13:05:55 +0100 Subject: [PATCH 04/18] Put instrumentation version and eval-breaker flags into same word --- Include/cpython/code.h | 2 +- Include/internal/pycore_ceval.h | 19 +++++----- Include/internal/pycore_ceval_state.h | 2 +- Include/internal/pycore_interp.h | 3 +- Python/bytecodes.c | 21 +++++------ Python/ceval_gil.c | 2 -- Python/ceval_macros.h | 2 +- Python/executor_cases.c.h | 7 ++-- Python/generated_cases.c.h | 19 +++++----- Python/instrumentation.c | 50 +++++++++++++++++++++++---- 10 files changed, 79 insertions(+), 48 deletions(-) diff --git a/Include/cpython/code.h b/Include/cpython/code.h index 45b09a1265df80..0ce075189359e5 100644 --- a/Include/cpython/code.h +++ b/Include/cpython/code.h @@ -167,7 +167,7 @@ typedef struct { PyObject *co_weakreflist; /* to support weakrefs to code objects */ \ _PyExecutorArray *co_executors; /* executors from optimizer */ \ _PyCoCached *_co_cached; /* cached co_* attributes */ \ - uint64_t _co_instrumentation_version; /* current instrumentation version */ \ + uint32_t _co_instrumentation_version; /* current instrumentation version */ \ _PyCoMonitoringData *_co_monitoring; /* Monitoring data */ \ int _co_firsttraceable; /* index of first traceable instruction */ \ /* Scratch space for extra data relating to the code object. \ diff --git a/Include/internal/pycore_ceval.h b/Include/internal/pycore_ceval.h index 4cca9a4974df2f..9f1ed21ab50ea1 100644 --- a/Include/internal/pycore_ceval.h +++ b/Include/internal/pycore_ceval.h @@ -197,31 +197,30 @@ void _PyEval_FrameClearAndPop(PyThreadState *tstate, _PyInterpreterFrame *frame) #define _PY_GIL_DROP_REQUEST_BIT 0 #define _PY_SIGNALS_PENDING_BIT 1 #define _PY_CALLS_TO_DO_BIT 2 -#define _PY_CALLS_ON_MAIN_TO_DO_BIT 3 -#define _PY_ASYNC_EXCEPTION_BIT 4 -#define _PY_GC_SCHEDULED_BIT 5 +#define _PY_ASYNC_EXCEPTION_BIT 3 +#define _PY_GC_SCHEDULED_BIT 4 static inline void -_Py_set_eval_breaker_bit(PyInterpreterState *interp, int32_t bit, int32_t set) +_Py_set_eval_breaker_bit(PyInterpreterState *interp, uint32_t bit, uint32_t set) { assert(set == 0 || set == 1); - int32_t to_set = set << bit; - int32_t mask = 1 << bit; - int32_t old = _Py_atomic_load_int32(&interp->ceval.eval_breaker2); + uint32_t to_set = set << bit; + uint32_t mask = ((uint32_t)1) << bit; + uint32_t old = _Py_atomic_load_uint32(&interp->ceval.eval_breaker2); if ((old & mask) == to_set) { return; } - int32_t new; + uint32_t new; do { new = (old & ~mask) | (set << bit); - } while (!_Py_atomic_compare_exchange_int32(&interp->ceval.eval_breaker2, &old, new)); + } while (!_Py_atomic_compare_exchange_uint32(&interp->ceval.eval_breaker2, &old, new)); } static inline bool _Py_eval_breaker_bit_is_set(PyInterpreterState *interp, int32_t bit) { - return _Py_atomic_load_int32(&interp->ceval.eval_breaker2) & (1 << bit); + return _Py_atomic_load_uint32(&interp->ceval.eval_breaker2) & (((uint32_t)1) << bit); } diff --git a/Include/internal/pycore_ceval_state.h b/Include/internal/pycore_ceval_state.h index c885239e783061..aa4117acf880dc 100644 --- a/Include/internal/pycore_ceval_state.h +++ b/Include/internal/pycore_ceval_state.h @@ -80,7 +80,7 @@ struct _ceval_state { * the fast path in the eval loop. * It is by far the hottest field in this struct and * should be placed at the beginning. */ - int32_t eval_breaker2; + uint32_t eval_breaker2; /* Avoid false sharing */ int64_t padding[7]; int recursion_limit; diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h index ba5764e943e676..a0bdb968ad513f 100644 --- a/Include/internal/pycore_interp.h +++ b/Include/internal/pycore_interp.h @@ -67,8 +67,7 @@ struct _is { int _initialized; int finalizing; - uint64_t monitoring_version; - uint64_t last_restart_version; + uint32_t last_restart_version; struct pythreads { uint64_t next_unique_id; /* The linked list of threads, newest first. */ diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 8939fb189c61f1..50b44e611dca9e 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -139,7 +139,10 @@ dummy_func( inst(RESUME, (--)) { TIER_ONE_ONLY assert(frame == tstate->current_frame); - if (_PyFrame_GetCode(frame)->_co_instrumentation_version != tstate->interp->monitoring_version) { + uint32_t global_version = _Py_atomic_load_uint32_relaxed(&tstate->interp->ceval.eval_breaker2) & ~0xff; + uint32_t code_version = _PyFrame_GetCode(frame)->_co_instrumentation_version; + assert((code_version & 255) == 0); + if (code_version != global_version) { int err = _Py_Instrument(_PyFrame_GetCode(frame), tstate->interp); ERROR_IF(err, error); next_instr--; @@ -157,18 +160,16 @@ dummy_func( DEOPT_IF(_Py_emscripten_signal_clock == 0, RESUME); _Py_emscripten_signal_clock -= Py_EMSCRIPTEN_SIGNAL_HANDLING; #endif - /* Possibly combine these two checks */ - DEOPT_IF(_PyFrame_GetCode(frame)->_co_instrumentation_version - != tstate->interp->monitoring_version, RESUME); - DEOPT_IF(_Py_atomic_load_int32_relaxed(&tstate->interp->ceval.eval_breaker2), RESUME); + uint32_t eval_breaker = _Py_atomic_load_uint32_relaxed(&tstate->interp->ceval.eval_breaker2); + uint32_t version = _PyFrame_GetCode(frame)->_co_instrumentation_version; + assert((version & 255) == 0); + DEOPT_IF(eval_breaker != version, RESUME); } inst(INSTRUMENTED_RESUME, (--)) { - /* Possible performance enhancement: - * We need to check the eval breaker anyway, can we - * combine the instrument verison check and the eval breaker test? - */ - if (_PyFrame_GetCode(frame)->_co_instrumentation_version != tstate->interp->monitoring_version) { + uint32_t global_version = _Py_atomic_load_uint32_relaxed(&tstate->interp->ceval.eval_breaker2) & ~0xff; + uint32_t code_version = _PyFrame_GetCode(frame)->_co_instrumentation_version; + if (code_version != global_version) { if (_Py_Instrument(_PyFrame_GetCode(frame), tstate->interp)) { goto error; } diff --git a/Python/ceval_gil.c b/Python/ceval_gil.c index 5c33a8e2d38ed1..c4815889470afb 100644 --- a/Python/ceval_gil.c +++ b/Python/ceval_gil.c @@ -1026,8 +1026,6 @@ _PyEval_FiniState(struct _ceval_state *ceval) int _Py_HandlePending(PyThreadState *tstate) { - _PyRuntimeState * const runtime = &_PyRuntime; - struct _ceval_runtime_state *ceval = &runtime->ceval; struct _ceval_state *interp_ceval_state = &tstate->interp->ceval; PyInterpreterState *interp = tstate->interp; diff --git a/Python/ceval_macros.h b/Python/ceval_macros.h index efdb731484ed34..8079477550a76b 100644 --- a/Python/ceval_macros.h +++ b/Python/ceval_macros.h @@ -116,7 +116,7 @@ #define CHECK_EVAL_BREAKER() \ _Py_CHECK_EMSCRIPTEN_SIGNALS_PERIODICALLY(); \ - if (_Py_atomic_load_int32_relaxed(&tstate->interp->ceval.eval_breaker2)) { \ + if (_Py_atomic_load_uint32_relaxed(&tstate->interp->ceval.eval_breaker2) & 255) { \ if (_Py_HandlePending(tstate) != 0) { \ goto error; \ } \ diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index cd5f0b5cae065e..04e3e6a5fad0b9 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -12,10 +12,9 @@ DEOPT_IF(_Py_emscripten_signal_clock == 0, RESUME); _Py_emscripten_signal_clock -= Py_EMSCRIPTEN_SIGNAL_HANDLING; #endif - /* Possibly combine these two checks */ - DEOPT_IF(_PyFrame_GetCode(frame)->_co_instrumentation_version - != tstate->interp->monitoring_version, RESUME); - DEOPT_IF(_Py_atomic_load_int32_relaxed(&tstate->interp->ceval.eval_breaker2), RESUME); + uint32_t eval_breaker = _Py_atomic_load_uint32_relaxed(&tstate->interp->ceval.eval_breaker2); + uint32_t version = _PyFrame_GetCode(frame)->_co_instrumentation_version; + DEOPT_IF(eval_breaker != version, RESUME); break; } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index efb4c04b67fcae..afbfbd23de7f92 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -12,7 +12,9 @@ static_assert(0 == 0, "incorrect cache size"); TIER_ONE_ONLY assert(frame == tstate->current_frame); - if (_PyFrame_GetCode(frame)->_co_instrumentation_version != tstate->interp->monitoring_version) { + uint32_t global_version = _Py_atomic_load_uint32_relaxed(&tstate->interp->ceval.eval_breaker2) & ~255; + uint32_t code_version = _PyFrame_GetCode(frame)->_co_instrumentation_version; + if (code_version != global_version) { int err = _Py_Instrument(_PyFrame_GetCode(frame), tstate->interp); if (err) goto error; next_instr--; @@ -31,19 +33,16 @@ DEOPT_IF(_Py_emscripten_signal_clock == 0, RESUME); _Py_emscripten_signal_clock -= Py_EMSCRIPTEN_SIGNAL_HANDLING; #endif - /* Possibly combine these two checks */ - DEOPT_IF(_PyFrame_GetCode(frame)->_co_instrumentation_version - != tstate->interp->monitoring_version, RESUME); - DEOPT_IF(_Py_atomic_load_int32_relaxed(&tstate->interp->ceval.eval_breaker2), RESUME); + uint32_t eval_breaker = _Py_atomic_load_uint32_relaxed(&tstate->interp->ceval.eval_breaker2); + uint32_t version = _PyFrame_GetCode(frame)->_co_instrumentation_version; + DEOPT_IF(eval_breaker != version, RESUME); DISPATCH(); } TARGET(INSTRUMENTED_RESUME) { - /* Possible performance enhancement: - * We need to check the eval breaker anyway, can we - * combine the instrument verison check and the eval breaker test? - */ - if (_PyFrame_GetCode(frame)->_co_instrumentation_version != tstate->interp->monitoring_version) { + uint32_t global_version = _Py_atomic_load_uint32_relaxed(&tstate->interp->ceval.eval_breaker2) & ~255; + uint32_t code_version = _PyFrame_GetCode(frame)->_co_instrumentation_version; + if (code_version != global_version) { if (_Py_Instrument(_PyFrame_GetCode(frame), tstate->interp)) { goto error; } diff --git a/Python/instrumentation.c b/Python/instrumentation.c index fee6eae1734394..c8cf660a36695c 100644 --- a/Python/instrumentation.c +++ b/Python/instrumentation.c @@ -1,6 +1,7 @@ #include "Python.h" #include "opcode_ids.h" +#include "cpython/pyatomic.h" #include "pycore_bitutils.h" // _Py_popcount32 #include "pycore_call.h" @@ -890,10 +891,30 @@ static inline int most_significant_bit(uint8_t bits) { return MOST_SIGNIFICANT_BITS[bits]; } +static uint32_t +global_version(PyInterpreterState *interp) +{ + return interp->ceval.eval_breaker2 & ~255; +} + +static void +set_global_version(PyInterpreterState *interp, uint32_t version) +{ + assert((version & 255) == 0); + uint32_t old = _Py_atomic_load_uint32(&interp->ceval.eval_breaker2); + if (old == version) { + return; + } + int32_t new; + do { + new = (old & 255) | version; + } while (!_Py_atomic_compare_exchange_uint32(&interp->ceval.eval_breaker2, &old, new)); +} + static bool is_version_up_to_date(PyCodeObject *code, PyInterpreterState *interp) { - return interp->monitoring_version == code->_co_instrumentation_version; + return global_version(interp) == code->_co_instrumentation_version; } #ifndef NDEBUG @@ -1548,7 +1569,7 @@ _Py_Instrument(PyCodeObject *code, PyInterpreterState *interp) { if (is_version_up_to_date(code, interp)) { assert( - interp->monitoring_version == 0 || + (interp->ceval.eval_breaker2 & ~0xff) == 0 || instrumentation_cross_checks(interp, code) ); return 0; @@ -1586,7 +1607,7 @@ _Py_Instrument(PyCodeObject *code, PyInterpreterState *interp) assert(monitors_are_empty(monitors_and(new_events, removed_events))); } code->_co_monitoring->active_monitors = active_events; - code->_co_instrumentation_version = interp->monitoring_version; + code->_co_instrumentation_version = global_version(interp); if (monitors_are_empty(new_events) && monitors_are_empty(removed_events)) { #ifdef INSTRUMENT_DEBUG sanity_check_instrumentation(code); @@ -1753,6 +1774,10 @@ check_tool(PyInterpreterState *interp, int tool_id) return 0; } +/* We share the eval-breaker with flags, so the monitoring + * version goes in the top 24 bits */ +#define MONITORING_VERSION_INCREMENT (1 << 8) + int _PyMonitoring_SetEvents(int tool_id, _PyMonitoringEventSet events) { @@ -1767,7 +1792,12 @@ _PyMonitoring_SetEvents(int tool_id, _PyMonitoringEventSet events) return 0; } set_events(&interp->monitors, tool_id, events); - interp->monitoring_version++; + uint32_t new_version = global_version(interp) + MONITORING_VERSION_INCREMENT; + if (new_version == 0) { + PyErr_Format(PyExc_OverflowError, "events set too many time"); + return -1; + } + set_global_version(interp, new_version); return instrument_all_executing_code_objects(interp); } @@ -1795,7 +1825,7 @@ _PyMonitoring_SetLocalEvents(PyCodeObject *code, int tool_id, _PyMonitoringEvent set_local_events(local, tool_id, events); if (is_version_up_to_date(code, interp)) { /* Force instrumentation update */ - code->_co_instrumentation_version = UINT64_MAX; + code->_co_instrumentation_version -= MONITORING_VERSION_INCREMENT; } if (_Py_Instrument(code, interp)) { return -1; @@ -2078,8 +2108,14 @@ monitoring_restart_events_impl(PyObject *module) * last restart version < current version */ PyInterpreterState *interp = _PyInterpreterState_GET(); - interp->last_restart_version = interp->monitoring_version + 1; - interp->monitoring_version = interp->last_restart_version + 1; + uint32_t restart_version = global_version(interp) + MONITORING_VERSION_INCREMENT; + uint32_t new_version = restart_version + MONITORING_VERSION_INCREMENT; + if (new_version <= MONITORING_VERSION_INCREMENT) { + PyErr_Format(PyExc_OverflowError, "events set too many time"); + return NULL; + } + interp->last_restart_version = restart_version; + set_global_version(interp, new_version); if (instrument_all_executing_code_objects(interp)) { return NULL; } From 4fa77bcc881dc8a9ff499bde7b05b7a0aaf44408 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Mon, 25 Sep 2023 13:19:47 +0100 Subject: [PATCH 05/18] Restore name of eval_breaker --- Include/internal/pycore_ceval.h | 6 +++--- Include/internal/pycore_ceval_state.h | 2 +- Python/bytecodes.c | 6 +++--- Python/ceval_macros.h | 2 +- Python/executor_cases.c.h | 3 ++- Python/generated_cases.c.h | 8 +++++--- Python/instrumentation.c | 8 ++++---- 7 files changed, 19 insertions(+), 16 deletions(-) diff --git a/Include/internal/pycore_ceval.h b/Include/internal/pycore_ceval.h index 9f1ed21ab50ea1..bb02d42636bb58 100644 --- a/Include/internal/pycore_ceval.h +++ b/Include/internal/pycore_ceval.h @@ -207,20 +207,20 @@ _Py_set_eval_breaker_bit(PyInterpreterState *interp, uint32_t bit, uint32_t set) assert(set == 0 || set == 1); uint32_t to_set = set << bit; uint32_t mask = ((uint32_t)1) << bit; - uint32_t old = _Py_atomic_load_uint32(&interp->ceval.eval_breaker2); + uint32_t old = _Py_atomic_load_uint32(&interp->ceval.eval_breaker); if ((old & mask) == to_set) { return; } uint32_t new; do { new = (old & ~mask) | (set << bit); - } while (!_Py_atomic_compare_exchange_uint32(&interp->ceval.eval_breaker2, &old, new)); + } while (!_Py_atomic_compare_exchange_uint32(&interp->ceval.eval_breaker, &old, new)); } static inline bool _Py_eval_breaker_bit_is_set(PyInterpreterState *interp, int32_t bit) { - return _Py_atomic_load_uint32(&interp->ceval.eval_breaker2) & (((uint32_t)1) << bit); + return _Py_atomic_load_uint32(&interp->ceval.eval_breaker) & (((uint32_t)1) << bit); } diff --git a/Include/internal/pycore_ceval_state.h b/Include/internal/pycore_ceval_state.h index aa4117acf880dc..aaa84a0d4c7048 100644 --- a/Include/internal/pycore_ceval_state.h +++ b/Include/internal/pycore_ceval_state.h @@ -80,7 +80,7 @@ struct _ceval_state { * the fast path in the eval loop. * It is by far the hottest field in this struct and * should be placed at the beginning. */ - uint32_t eval_breaker2; + uint32_t eval_breaker; /* Avoid false sharing */ int64_t padding[7]; int recursion_limit; diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 50b44e611dca9e..5f0a1b1e559fcd 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -139,7 +139,7 @@ dummy_func( inst(RESUME, (--)) { TIER_ONE_ONLY assert(frame == tstate->current_frame); - uint32_t global_version = _Py_atomic_load_uint32_relaxed(&tstate->interp->ceval.eval_breaker2) & ~0xff; + uint32_t global_version = _Py_atomic_load_uint32_relaxed(&tstate->interp->ceval.eval_breaker) & ~0xff; uint32_t code_version = _PyFrame_GetCode(frame)->_co_instrumentation_version; assert((code_version & 255) == 0); if (code_version != global_version) { @@ -160,14 +160,14 @@ dummy_func( DEOPT_IF(_Py_emscripten_signal_clock == 0, RESUME); _Py_emscripten_signal_clock -= Py_EMSCRIPTEN_SIGNAL_HANDLING; #endif - uint32_t eval_breaker = _Py_atomic_load_uint32_relaxed(&tstate->interp->ceval.eval_breaker2); + uint32_t eval_breaker = _Py_atomic_load_uint32_relaxed(&tstate->interp->ceval.eval_breaker); uint32_t version = _PyFrame_GetCode(frame)->_co_instrumentation_version; assert((version & 255) == 0); DEOPT_IF(eval_breaker != version, RESUME); } inst(INSTRUMENTED_RESUME, (--)) { - uint32_t global_version = _Py_atomic_load_uint32_relaxed(&tstate->interp->ceval.eval_breaker2) & ~0xff; + uint32_t global_version = _Py_atomic_load_uint32_relaxed(&tstate->interp->ceval.eval_breaker) & ~0xff; uint32_t code_version = _PyFrame_GetCode(frame)->_co_instrumentation_version; if (code_version != global_version) { if (_Py_Instrument(_PyFrame_GetCode(frame), tstate->interp)) { diff --git a/Python/ceval_macros.h b/Python/ceval_macros.h index 8079477550a76b..4597b2589eca8d 100644 --- a/Python/ceval_macros.h +++ b/Python/ceval_macros.h @@ -116,7 +116,7 @@ #define CHECK_EVAL_BREAKER() \ _Py_CHECK_EMSCRIPTEN_SIGNALS_PERIODICALLY(); \ - if (_Py_atomic_load_uint32_relaxed(&tstate->interp->ceval.eval_breaker2) & 255) { \ + if (_Py_atomic_load_uint32_relaxed(&tstate->interp->ceval.eval_breaker) & 255) { \ if (_Py_HandlePending(tstate) != 0) { \ goto error; \ } \ diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 04e3e6a5fad0b9..966833ad32cd4c 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -12,8 +12,9 @@ DEOPT_IF(_Py_emscripten_signal_clock == 0, RESUME); _Py_emscripten_signal_clock -= Py_EMSCRIPTEN_SIGNAL_HANDLING; #endif - uint32_t eval_breaker = _Py_atomic_load_uint32_relaxed(&tstate->interp->ceval.eval_breaker2); + uint32_t eval_breaker = _Py_atomic_load_uint32_relaxed(&tstate->interp->ceval.eval_breaker); uint32_t version = _PyFrame_GetCode(frame)->_co_instrumentation_version; + assert((version & 255) == 0); DEOPT_IF(eval_breaker != version, RESUME); break; } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index afbfbd23de7f92..344c06bc685d24 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -12,8 +12,9 @@ static_assert(0 == 0, "incorrect cache size"); TIER_ONE_ONLY assert(frame == tstate->current_frame); - uint32_t global_version = _Py_atomic_load_uint32_relaxed(&tstate->interp->ceval.eval_breaker2) & ~255; + uint32_t global_version = _Py_atomic_load_uint32_relaxed(&tstate->interp->ceval.eval_breaker) & ~0xff; uint32_t code_version = _PyFrame_GetCode(frame)->_co_instrumentation_version; + assert((code_version & 255) == 0); if (code_version != global_version) { int err = _Py_Instrument(_PyFrame_GetCode(frame), tstate->interp); if (err) goto error; @@ -33,14 +34,15 @@ DEOPT_IF(_Py_emscripten_signal_clock == 0, RESUME); _Py_emscripten_signal_clock -= Py_EMSCRIPTEN_SIGNAL_HANDLING; #endif - uint32_t eval_breaker = _Py_atomic_load_uint32_relaxed(&tstate->interp->ceval.eval_breaker2); + uint32_t eval_breaker = _Py_atomic_load_uint32_relaxed(&tstate->interp->ceval.eval_breaker); uint32_t version = _PyFrame_GetCode(frame)->_co_instrumentation_version; + assert((version & 255) == 0); DEOPT_IF(eval_breaker != version, RESUME); DISPATCH(); } TARGET(INSTRUMENTED_RESUME) { - uint32_t global_version = _Py_atomic_load_uint32_relaxed(&tstate->interp->ceval.eval_breaker2) & ~255; + uint32_t global_version = _Py_atomic_load_uint32_relaxed(&tstate->interp->ceval.eval_breaker) & ~0xff; uint32_t code_version = _PyFrame_GetCode(frame)->_co_instrumentation_version; if (code_version != global_version) { if (_Py_Instrument(_PyFrame_GetCode(frame), tstate->interp)) { diff --git a/Python/instrumentation.c b/Python/instrumentation.c index c8cf660a36695c..c0fff7a27917b6 100644 --- a/Python/instrumentation.c +++ b/Python/instrumentation.c @@ -894,21 +894,21 @@ static inline int most_significant_bit(uint8_t bits) { static uint32_t global_version(PyInterpreterState *interp) { - return interp->ceval.eval_breaker2 & ~255; + return interp->ceval.eval_breaker & ~255; } static void set_global_version(PyInterpreterState *interp, uint32_t version) { assert((version & 255) == 0); - uint32_t old = _Py_atomic_load_uint32(&interp->ceval.eval_breaker2); + uint32_t old = _Py_atomic_load_uint32(&interp->ceval.eval_breaker); if (old == version) { return; } int32_t new; do { new = (old & 255) | version; - } while (!_Py_atomic_compare_exchange_uint32(&interp->ceval.eval_breaker2, &old, new)); + } while (!_Py_atomic_compare_exchange_uint32(&interp->ceval.eval_breaker, &old, new)); } static bool @@ -1569,7 +1569,7 @@ _Py_Instrument(PyCodeObject *code, PyInterpreterState *interp) { if (is_version_up_to_date(code, interp)) { assert( - (interp->ceval.eval_breaker2 & ~0xff) == 0 || + (interp->ceval.eval_breaker & ~0xff) == 0 || instrumentation_cross_checks(interp, code) ); return 0; From b8258df797cd695e69faf188acaabcdf8964fa61 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Mon, 25 Sep 2023 13:43:03 +0100 Subject: [PATCH 06/18] Use full word for monitoring. Avoids running out of versions on 64 bit machines. --- Include/internal/pycore_ceval.h | 12 ++++++------ Include/internal/pycore_ceval_state.h | 2 +- Python/bytecodes.c | 12 ++++++------ Python/ceval_macros.h | 2 +- Python/executor_cases.c.h | 4 ++-- Python/generated_cases.c.h | 12 ++++++------ Python/instrumentation.c | 9 +++------ 7 files changed, 25 insertions(+), 28 deletions(-) diff --git a/Include/internal/pycore_ceval.h b/Include/internal/pycore_ceval.h index bb02d42636bb58..50266803b56659 100644 --- a/Include/internal/pycore_ceval.h +++ b/Include/internal/pycore_ceval.h @@ -205,22 +205,22 @@ static inline void _Py_set_eval_breaker_bit(PyInterpreterState *interp, uint32_t bit, uint32_t set) { assert(set == 0 || set == 1); - uint32_t to_set = set << bit; - uint32_t mask = ((uint32_t)1) << bit; - uint32_t old = _Py_atomic_load_uint32(&interp->ceval.eval_breaker); + uintptr_t to_set = set << bit; + uintptr_t mask = ((uintptr_t)1) << bit; + uintptr_t old = _Py_atomic_load_uintptr(&interp->ceval.eval_breaker); if ((old & mask) == to_set) { return; } - uint32_t new; + uintptr_t new; do { new = (old & ~mask) | (set << bit); - } while (!_Py_atomic_compare_exchange_uint32(&interp->ceval.eval_breaker, &old, new)); + } while (!_Py_atomic_compare_exchange_uintptr(&interp->ceval.eval_breaker, &old, new)); } static inline bool _Py_eval_breaker_bit_is_set(PyInterpreterState *interp, int32_t bit) { - return _Py_atomic_load_uint32(&interp->ceval.eval_breaker) & (((uint32_t)1) << bit); + return _Py_atomic_load_uintptr(&interp->ceval.eval_breaker) & (((uintptr_t)1) << bit); } diff --git a/Include/internal/pycore_ceval_state.h b/Include/internal/pycore_ceval_state.h index aaa84a0d4c7048..5b7be237e57d84 100644 --- a/Include/internal/pycore_ceval_state.h +++ b/Include/internal/pycore_ceval_state.h @@ -80,7 +80,7 @@ struct _ceval_state { * the fast path in the eval loop. * It is by far the hottest field in this struct and * should be placed at the beginning. */ - uint32_t eval_breaker; + uintptr_t eval_breaker; /* Avoid false sharing */ int64_t padding[7]; int recursion_limit; diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 5f0a1b1e559fcd..25e684e959c89d 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -139,8 +139,8 @@ dummy_func( inst(RESUME, (--)) { TIER_ONE_ONLY assert(frame == tstate->current_frame); - uint32_t global_version = _Py_atomic_load_uint32_relaxed(&tstate->interp->ceval.eval_breaker) & ~0xff; - uint32_t code_version = _PyFrame_GetCode(frame)->_co_instrumentation_version; + uintptr_t global_version = _Py_atomic_load_uintptr_relaxed(&tstate->interp->ceval.eval_breaker) & ~0xff; + uintptr_t code_version = _PyFrame_GetCode(frame)->_co_instrumentation_version; assert((code_version & 255) == 0); if (code_version != global_version) { int err = _Py_Instrument(_PyFrame_GetCode(frame), tstate->interp); @@ -160,15 +160,15 @@ dummy_func( DEOPT_IF(_Py_emscripten_signal_clock == 0, RESUME); _Py_emscripten_signal_clock -= Py_EMSCRIPTEN_SIGNAL_HANDLING; #endif - uint32_t eval_breaker = _Py_atomic_load_uint32_relaxed(&tstate->interp->ceval.eval_breaker); - uint32_t version = _PyFrame_GetCode(frame)->_co_instrumentation_version; + uintptr_t eval_breaker = _Py_atomic_load_uintptr_relaxed(&tstate->interp->ceval.eval_breaker); + uintptr_t version = _PyFrame_GetCode(frame)->_co_instrumentation_version; assert((version & 255) == 0); DEOPT_IF(eval_breaker != version, RESUME); } inst(INSTRUMENTED_RESUME, (--)) { - uint32_t global_version = _Py_atomic_load_uint32_relaxed(&tstate->interp->ceval.eval_breaker) & ~0xff; - uint32_t code_version = _PyFrame_GetCode(frame)->_co_instrumentation_version; + uintptr_t global_version = _Py_atomic_load_uintptr_relaxed(&tstate->interp->ceval.eval_breaker) & ~0xff; + uintptr_t code_version = _PyFrame_GetCode(frame)->_co_instrumentation_version; if (code_version != global_version) { if (_Py_Instrument(_PyFrame_GetCode(frame), tstate->interp)) { goto error; diff --git a/Python/ceval_macros.h b/Python/ceval_macros.h index 4597b2589eca8d..635a16dc1e61c9 100644 --- a/Python/ceval_macros.h +++ b/Python/ceval_macros.h @@ -116,7 +116,7 @@ #define CHECK_EVAL_BREAKER() \ _Py_CHECK_EMSCRIPTEN_SIGNALS_PERIODICALLY(); \ - if (_Py_atomic_load_uint32_relaxed(&tstate->interp->ceval.eval_breaker) & 255) { \ + if (_Py_atomic_load_uintptr_relaxed(&tstate->interp->ceval.eval_breaker) & 255) { \ if (_Py_HandlePending(tstate) != 0) { \ goto error; \ } \ diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 966833ad32cd4c..db5ba157a42fcf 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -12,8 +12,8 @@ DEOPT_IF(_Py_emscripten_signal_clock == 0, RESUME); _Py_emscripten_signal_clock -= Py_EMSCRIPTEN_SIGNAL_HANDLING; #endif - uint32_t eval_breaker = _Py_atomic_load_uint32_relaxed(&tstate->interp->ceval.eval_breaker); - uint32_t version = _PyFrame_GetCode(frame)->_co_instrumentation_version; + uintptr_t eval_breaker = _Py_atomic_load_uintptr_relaxed(&tstate->interp->ceval.eval_breaker); + uintptr_t version = _PyFrame_GetCode(frame)->_co_instrumentation_version; assert((version & 255) == 0); DEOPT_IF(eval_breaker != version, RESUME); break; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 344c06bc685d24..f832c94ac2786c 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -12,8 +12,8 @@ static_assert(0 == 0, "incorrect cache size"); TIER_ONE_ONLY assert(frame == tstate->current_frame); - uint32_t global_version = _Py_atomic_load_uint32_relaxed(&tstate->interp->ceval.eval_breaker) & ~0xff; - uint32_t code_version = _PyFrame_GetCode(frame)->_co_instrumentation_version; + uintptr_t global_version = _Py_atomic_load_uintptr_relaxed(&tstate->interp->ceval.eval_breaker) & ~0xff; + uintptr_t code_version = _PyFrame_GetCode(frame)->_co_instrumentation_version; assert((code_version & 255) == 0); if (code_version != global_version) { int err = _Py_Instrument(_PyFrame_GetCode(frame), tstate->interp); @@ -34,16 +34,16 @@ DEOPT_IF(_Py_emscripten_signal_clock == 0, RESUME); _Py_emscripten_signal_clock -= Py_EMSCRIPTEN_SIGNAL_HANDLING; #endif - uint32_t eval_breaker = _Py_atomic_load_uint32_relaxed(&tstate->interp->ceval.eval_breaker); - uint32_t version = _PyFrame_GetCode(frame)->_co_instrumentation_version; + uintptr_t eval_breaker = _Py_atomic_load_uintptr_relaxed(&tstate->interp->ceval.eval_breaker); + uintptr_t version = _PyFrame_GetCode(frame)->_co_instrumentation_version; assert((version & 255) == 0); DEOPT_IF(eval_breaker != version, RESUME); DISPATCH(); } TARGET(INSTRUMENTED_RESUME) { - uint32_t global_version = _Py_atomic_load_uint32_relaxed(&tstate->interp->ceval.eval_breaker) & ~0xff; - uint32_t code_version = _PyFrame_GetCode(frame)->_co_instrumentation_version; + uintptr_t global_version = _Py_atomic_load_uintptr_relaxed(&tstate->interp->ceval.eval_breaker) & ~0xff; + uintptr_t code_version = _PyFrame_GetCode(frame)->_co_instrumentation_version; if (code_version != global_version) { if (_Py_Instrument(_PyFrame_GetCode(frame), tstate->interp)) { goto error; diff --git a/Python/instrumentation.c b/Python/instrumentation.c index c0fff7a27917b6..41a860be489bbc 100644 --- a/Python/instrumentation.c +++ b/Python/instrumentation.c @@ -901,14 +901,11 @@ static void set_global_version(PyInterpreterState *interp, uint32_t version) { assert((version & 255) == 0); - uint32_t old = _Py_atomic_load_uint32(&interp->ceval.eval_breaker); - if (old == version) { - return; - } - int32_t new; + uintptr_t old = _Py_atomic_load_uintptr(&interp->ceval.eval_breaker); + intptr_t new; do { new = (old & 255) | version; - } while (!_Py_atomic_compare_exchange_uint32(&interp->ceval.eval_breaker, &old, new)); + } while (!_Py_atomic_compare_exchange_uintptr(&interp->ceval.eval_breaker, &old, new)); } static bool From 558923333f098c56646329daa6c9e50a98be5fe1 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Mon, 25 Sep 2023 17:02:24 +0100 Subject: [PATCH 07/18] Remove some #includes --- Include/internal/pycore_ceval.h | 1 - Python/ceval_gil.c | 1 - Python/instrumentation.c | 1 - 3 files changed, 3 deletions(-) diff --git a/Include/internal/pycore_ceval.h b/Include/internal/pycore_ceval.h index 6865a6dc407aaa..a0638c1b097056 100644 --- a/Include/internal/pycore_ceval.h +++ b/Include/internal/pycore_ceval.h @@ -10,7 +10,6 @@ extern "C" { #include "pycore_interp.h" // PyInterpreterState.eval_frame #include "pycore_pystate.h" // _PyThreadState_GET() -#include "cpython/pyatomic.h" /* Forward declarations */ struct pyruntimestate; diff --git a/Python/ceval_gil.c b/Python/ceval_gil.c index a1b779a6c31393..7da6f8229c798f 100644 --- a/Python/ceval_gil.c +++ b/Python/ceval_gil.c @@ -1,7 +1,6 @@ #include "Python.h" #include "pycore_atomic.h" // _Py_atomic_int -#include "cpython/pyatomic.h" // _Py_atomic_load_int32 #include "pycore_ceval.h" // _PyEval_SignalReceived() #include "pycore_initconfig.h" // _PyStatus_OK() #include "pycore_interp.h" // _Py_RunGC() diff --git a/Python/instrumentation.c b/Python/instrumentation.c index 54392068590c9d..2e9473d9d571c4 100644 --- a/Python/instrumentation.c +++ b/Python/instrumentation.c @@ -1,7 +1,6 @@ #include "Python.h" #include "opcode_ids.h" -#include "cpython/pyatomic.h" #include "pycore_bitutils.h" // _Py_popcount32 #include "pycore_call.h" From 66cd1c33154f40e4e7bad3af8bbb3c3d645ce9d5 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Mon, 25 Sep 2023 19:12:54 +0100 Subject: [PATCH 08/18] Fix a couple of sizes and make read atomic --- Include/cpython/code.h | 2 +- Include/internal/pycore_ceval.h | 2 +- Include/internal/pycore_interp.h | 2 +- Modules/gcmodule.c | 2 +- Python/ceval_gil.c | 12 ++++++------ 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/Include/cpython/code.h b/Include/cpython/code.h index 0ce075189359e5..cf715c55a2b3b8 100644 --- a/Include/cpython/code.h +++ b/Include/cpython/code.h @@ -167,7 +167,7 @@ typedef struct { PyObject *co_weakreflist; /* to support weakrefs to code objects */ \ _PyExecutorArray *co_executors; /* executors from optimizer */ \ _PyCoCached *_co_cached; /* cached co_* attributes */ \ - uint32_t _co_instrumentation_version; /* current instrumentation version */ \ + uintptr_t _co_instrumentation_version; /* current instrumentation version */ \ _PyCoMonitoringData *_co_monitoring; /* Monitoring data */ \ int _co_firsttraceable; /* index of first traceable instruction */ \ /* Scratch space for extra data relating to the code object. \ diff --git a/Include/internal/pycore_ceval.h b/Include/internal/pycore_ceval.h index a0638c1b097056..9ca6426fe08b30 100644 --- a/Include/internal/pycore_ceval.h +++ b/Include/internal/pycore_ceval.h @@ -212,7 +212,7 @@ _Py_set_eval_breaker_bit(PyInterpreterState *interp, uint32_t bit, uint32_t set) } uintptr_t new; do { - new = (old & ~mask) | (set << bit); + new = (old & ~mask) | to_set; } while (!_Py_atomic_compare_exchange_uintptr(&interp->ceval.eval_breaker, &old, new)); } diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h index a0bdb968ad513f..22d7ade35a667c 100644 --- a/Include/internal/pycore_interp.h +++ b/Include/internal/pycore_interp.h @@ -67,7 +67,7 @@ struct _is { int _initialized; int finalizing; - uint32_t last_restart_version; + uintptr_t last_restart_version; struct pythreads { uint64_t next_unique_id; /* The linked list of threads, newest first. */ diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c index 96771c2f7a0c45..592d527f0bd6a2 100644 --- a/Modules/gcmodule.c +++ b/Modules/gcmodule.c @@ -24,7 +24,7 @@ */ #include "Python.h" -#include "pycore_ceval.h" // _PyEval_SignalReceived() +#include "pycore_ceval.h" // _Py_set_eval_breaker_bit() #include "pycore_context.h" #include "pycore_dict.h" // _PyDict_MaybeUntrack() #include "pycore_initconfig.h" diff --git a/Python/ceval_gil.c b/Python/ceval_gil.c index 7da6f8229c798f..69940d9dc2f341 100644 --- a/Python/ceval_gil.c +++ b/Python/ceval_gil.c @@ -881,7 +881,7 @@ make_pending_calls(PyInterpreterState *interp) return -1; } } - else if (pending_main->calls_to_do) { + else if (_Py_atomic_load_int32_relaxed(&pending_main->calls_to_do)) { SIGNAL_PENDING_CALLS(interp); } @@ -1036,20 +1036,20 @@ _Py_HandlePending(PyThreadState *tstate) } /* Pending calls */ - if (maybe_has_pending_calls(tstate->interp)) { - if (make_pending_calls(tstate->interp) != 0) { + if (maybe_has_pending_calls(interp)) { + if (make_pending_calls(interp) != 0) { return -1; } } /* GC scheduled to run */ - if (_Py_eval_breaker_bit_is_set(tstate->interp, _PY_GC_SCHEDULED_BIT)) { - _Py_set_eval_breaker_bit(tstate->interp, _PY_GC_SCHEDULED_BIT, 0); + if (_Py_eval_breaker_bit_is_set(interp, _PY_GC_SCHEDULED_BIT)) { + _Py_set_eval_breaker_bit(interp, _PY_GC_SCHEDULED_BIT, 0); _Py_RunGC(tstate); } /* GIL drop request */ - if (_Py_eval_breaker_bit_is_set(tstate->interp, _PY_GIL_DROP_REQUEST_BIT)) { + if (_Py_eval_breaker_bit_is_set(interp, _PY_GIL_DROP_REQUEST_BIT)) { /* Give another thread a chance */ if (_PyThreadState_SwapNoGIL(NULL) != tstate) { Py_FatalError("tstate mix-up"); From 0b85608af5e77188a70acb6902c028962d50740f Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Mon, 25 Sep 2023 21:17:43 +0100 Subject: [PATCH 09/18] Keep eval_breaker synchronized with thread state --- Include/internal/pycore_ceval_state.h | 4 -- Python/ceval_gil.c | 97 ++++++++++----------------- 2 files changed, 35 insertions(+), 66 deletions(-) diff --git a/Include/internal/pycore_ceval_state.h b/Include/internal/pycore_ceval_state.h index e4e344bfa4d0dc..47971fbf2b4bfe 100644 --- a/Include/internal/pycore_ceval_state.h +++ b/Include/internal/pycore_ceval_state.h @@ -18,10 +18,6 @@ struct _pending_calls { PyThread_type_lock lock; /* Request for running pending calls. */ int32_t calls_to_do; - /* Request for looking at the `async_exc` field of the current - thread state. - Guarded by the GIL. */ - int async_exc; #define NPENDINGCALLS 32 struct _pending_call { _Py_pending_call_func func; diff --git a/Python/ceval_gil.c b/Python/ceval_gil.c index 69940d9dc2f341..ff05d95fe8fc19 100644 --- a/Python/ceval_gil.c +++ b/Python/ceval_gil.c @@ -57,6 +57,33 @@ #define _Py_atomic_load_relaxed_int32(ATOMIC_VAL) _Py_atomic_load_relaxed(ATOMIC_VAL) #endif + +static inline void +update_eval_breaker_from_thread(PyInterpreterState *interp, PyThreadState *tstate) +{ + if (tstate == NULL) { + return; + } + int32_t calls_to_do; + if (_Py_ThreadCanHandleSignals(interp)) { + calls_to_do = _Py_atomic_load_int32_relaxed( + &_PyRuntime.ceval.pending_mainthread.calls_to_do); + if (_Py_atomic_load(&_PyRuntime.signals.is_tripped)) { + _Py_set_eval_breaker_bit(interp, _PY_SIGNALS_PENDING_BIT, 1); + } + } + else { + calls_to_do = _Py_atomic_load_int32_relaxed( + &interp->ceval.pending.calls_to_do); + } + if (calls_to_do) { + _Py_set_eval_breaker_bit(interp, _PY_CALLS_TO_DO_BIT, 1); + } + if (tstate->async_exc != NULL) { + _Py_set_eval_breaker_bit(interp, _PY_ASYNC_EXCEPTION_BIT, 1); + } +} + static inline void SET_GIL_DROP_REQUEST(PyInterpreterState *interp) { @@ -84,39 +111,6 @@ UNSIGNAL_PENDING_CALLS(PyInterpreterState *interp) _Py_set_eval_breaker_bit(interp, _PY_CALLS_TO_DO_BIT, 0); } - -static inline void -SIGNAL_PENDING_SIGNALS(PyInterpreterState *interp, int force) -{ - _Py_set_eval_breaker_bit(interp, _PY_SIGNALS_PENDING_BIT, 1); -} - - -static inline void -UNSIGNAL_PENDING_SIGNALS(PyInterpreterState *interp) -{ - _Py_set_eval_breaker_bit(interp, _PY_SIGNALS_PENDING_BIT, 0); -} - - -static inline void -SIGNAL_ASYNC_EXC(PyInterpreterState *interp) -{ - struct _ceval_state *ceval2 = &interp->ceval; - ceval2->pending.async_exc = 1; - _Py_set_eval_breaker_bit(interp, _PY_ASYNC_EXCEPTION_BIT, 1); -} - - -static inline void -UNSIGNAL_ASYNC_EXC(PyInterpreterState *interp) -{ - struct _ceval_state *ceval2 = &interp->ceval; - ceval2->pending.async_exc = 0; - _Py_set_eval_breaker_bit(interp, _PY_ASYNC_EXCEPTION_BIT, 0); -} - - /* * Implementation of the Global Interpreter Lock (GIL). */ @@ -390,11 +384,7 @@ take_gil(PyThreadState *tstate) assert(_PyThreadState_CheckConsistency(tstate)); RESET_GIL_DROP_REQUEST(interp); - - /* Don't access tstate if the thread must exit */ - if (tstate->async_exc != NULL) { - _PyEval_SignalAsyncExc(tstate->interp); - } + update_eval_breaker_from_thread(interp, tstate); MUTEX_UNLOCK(gil->mutex); @@ -634,7 +624,7 @@ _PyEval_ReInitThreads(PyThreadState *tstate) void _PyEval_SignalAsyncExc(PyInterpreterState *interp) { - SIGNAL_ASYNC_EXC(interp); + _Py_set_eval_breaker_bit(interp, _PY_ASYNC_EXCEPTION_BIT, 1); } PyThreadState * @@ -685,22 +675,9 @@ PyEval_RestoreThread(PyThreadState *tstate) void _PyEval_SignalReceived(PyInterpreterState *interp) { -#ifdef MS_WINDOWS - // bpo-42296: On Windows, _PyEval_SignalReceived() is called from a signal - // handler which can run in a thread different than the Python thread, in - // which case _Py_ThreadCanHandleSignals() is wrong. Ignore - // _Py_ThreadCanHandleSignals() and always set eval_breaker to 1. - // - // The next eval_frame_handle_pending() call will call - // _Py_ThreadCanHandleSignals() to recompute eval_breaker. - int force = 1; -#else - int force = 0; -#endif - /* bpo-30703: Function called when the C signal handler of Python gets a - signal. We cannot queue a callback using _PyEval_AddPendingCall() since - that function is not async-signal-safe. */ - SIGNAL_PENDING_SIGNALS(interp, force); + if (_Py_ThreadCanHandleSignals(interp)) { + _Py_set_eval_breaker_bit(interp, _PY_SIGNALS_PENDING_BIT, 1); + } } /* Push one item onto the queue while holding the lock. */ @@ -793,14 +770,13 @@ static int handle_signals(PyThreadState *tstate) { assert(_PyThreadState_CheckConsistency(tstate)); + _Py_set_eval_breaker_bit(tstate->interp, _PY_SIGNALS_PENDING_BIT, 0); if (!_Py_ThreadCanHandleSignals(tstate->interp)) { return 0; } - - UNSIGNAL_PENDING_SIGNALS(tstate->interp); if (_PyErr_CheckSignalsTstate(tstate) < 0) { /* On failure, re-schedule a call to handle_signals(). */ - SIGNAL_PENDING_SIGNALS(tstate->interp, 0); + _Py_set_eval_breaker_bit(tstate->interp, _PY_SIGNALS_PENDING_BIT, 1); return -1; } return 0; @@ -881,9 +857,6 @@ make_pending_calls(PyInterpreterState *interp) return -1; } } - else if (_Py_atomic_load_int32_relaxed(&pending_main->calls_to_do)) { - SIGNAL_PENDING_CALLS(interp); - } pending->busy = 0; return 0; @@ -1069,7 +1042,7 @@ _Py_HandlePending(PyThreadState *tstate) if (tstate->async_exc != NULL) { PyObject *exc = tstate->async_exc; tstate->async_exc = NULL; - UNSIGNAL_ASYNC_EXC(tstate->interp); + _Py_set_eval_breaker_bit(interp, _PY_ASYNC_EXCEPTION_BIT, 0); _PyErr_SetNone(tstate, exc); Py_DECREF(exc); return -1; From 558a4f985cf682733022a9de5f43832cbbe7d403 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 26 Sep 2023 03:08:18 +0100 Subject: [PATCH 10/18] Avoid using tstate when it might have been freed --- Python/ceval_gil.c | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/Python/ceval_gil.c b/Python/ceval_gil.c index ff05d95fe8fc19..ce5f91452dc50e 100644 --- a/Python/ceval_gil.c +++ b/Python/ceval_gil.c @@ -218,8 +218,9 @@ static void recreate_gil(struct _gil_runtime_state *gil) #endif static void -drop_gil(struct _ceval_state *ceval, PyThreadState *tstate) +drop_gil(PyInterpreterState *interp, PyThreadState *tstate) { + struct _ceval_state *ceval = &interp->ceval; /* If tstate is NULL, the caller is indicating that we're releasing the GIL for the last time in this thread. This is particularly relevant when the current thread state is finalizing or its @@ -257,7 +258,7 @@ drop_gil(struct _ceval_state *ceval, PyThreadState *tstate) the GIL, and that's the only time we might delete the interpreter, so checking tstate first prevents the crash. See https://github.com/python/cpython/issues/104341. */ - if (tstate != NULL && _Py_eval_breaker_bit_is_set(tstate->interp, _PY_GIL_DROP_REQUEST_BIT)) { + if (tstate != NULL && _Py_eval_breaker_bit_is_set(interp, _PY_GIL_DROP_REQUEST_BIT)) { MUTEX_LOCK(gil->switch_mutex); /* Not switched yet => wait */ if (((PyThreadState*)_Py_atomic_load_relaxed(&gil->last_holder)) == tstate) @@ -303,8 +304,7 @@ take_gil(PyThreadState *tstate) assert(_PyThreadState_CheckConsistency(tstate)); PyInterpreterState *interp = tstate->interp; - struct _ceval_state *ceval = &interp->ceval; - struct _gil_runtime_state *gil = ceval->gil; + struct _gil_runtime_state *gil = interp->ceval.gil; /* Check that _PyEval_InitThreads() was called to create the lock */ assert(gil_created(gil)); @@ -378,7 +378,7 @@ take_gil(PyThreadState *tstate) in take_gil() while the main thread called wait_for_thread_shutdown() from Py_Finalize(). */ MUTEX_UNLOCK(gil->mutex); - drop_gil(ceval, tstate); + drop_gil(interp, tstate); PyThread_exit_thread(); } assert(_PyThreadState_CheckConsistency(tstate)); @@ -544,8 +544,7 @@ PyEval_ReleaseLock(void) /* This function must succeed when the current thread state is NULL. We therefore avoid PyThreadState_Get() which dumps a fatal error in debug mode. */ - struct _ceval_state *ceval = &tstate->interp->ceval; - drop_gil(ceval, tstate); + drop_gil(tstate->interp, tstate); } void @@ -561,8 +560,7 @@ _PyEval_ReleaseLock(PyInterpreterState *interp, PyThreadState *tstate) /* If tstate is NULL then we do not expect the current thread to acquire the GIL ever again. */ assert(tstate == NULL || tstate->interp == interp); - struct _ceval_state *ceval = &interp->ceval; - drop_gil(ceval, tstate); + drop_gil(interp, tstate); } void @@ -586,8 +584,7 @@ PyEval_ReleaseThread(PyThreadState *tstate) if (new_tstate != tstate) { Py_FatalError("wrong thread state"); } - struct _ceval_state *ceval = &tstate->interp->ceval; - drop_gil(ceval, tstate); + drop_gil(tstate->interp, tstate); } #ifdef HAVE_FORK @@ -633,9 +630,8 @@ PyEval_SaveThread(void) PyThreadState *tstate = _PyThreadState_SwapNoGIL(NULL); _Py_EnsureTstateNotNULL(tstate); - struct _ceval_state *ceval = &tstate->interp->ceval; - assert(gil_created(ceval->gil)); - drop_gil(ceval, tstate); + assert(gil_created(tstate->interp->ceval.gil)); + drop_gil(tstate->interp, tstate); return tstate; } @@ -998,7 +994,6 @@ _PyEval_FiniState(struct _ceval_state *ceval) int _Py_HandlePending(PyThreadState *tstate) { - struct _ceval_state *interp_ceval_state = &tstate->interp->ceval; PyInterpreterState *interp = tstate->interp; /* Pending signals */ @@ -1027,7 +1022,7 @@ _Py_HandlePending(PyThreadState *tstate) if (_PyThreadState_SwapNoGIL(NULL) != tstate) { Py_FatalError("tstate mix-up"); } - drop_gil(interp_ceval_state, tstate); + drop_gil(interp, tstate); /* Other threads may run now */ From 36f85b7a41591117bf9fb6076e5dc826da93454c Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 26 Sep 2023 03:40:25 +0100 Subject: [PATCH 11/18] Address review comments --- Python/ceval_gil.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/Python/ceval_gil.c b/Python/ceval_gil.c index ce5f91452dc50e..78d3bb6673a2a3 100644 --- a/Python/ceval_gil.c +++ b/Python/ceval_gil.c @@ -57,28 +57,27 @@ #define _Py_atomic_load_relaxed_int32(ATOMIC_VAL) _Py_atomic_load_relaxed(ATOMIC_VAL) #endif - +/* bpo-40010: eval_breaker should be recomputed if there + is a pending signal: signal received by another thread which cannot + handle signals. + Similarly, we set CALLS_TO_DO and ASYNC_EXCEPTION to match the thread. +*/ static inline void update_eval_breaker_from_thread(PyInterpreterState *interp, PyThreadState *tstate) { if (tstate == NULL) { return; } - int32_t calls_to_do; if (_Py_ThreadCanHandleSignals(interp)) { - calls_to_do = _Py_atomic_load_int32_relaxed( + int32_t calls_to_do = _Py_atomic_load_int32_relaxed( &_PyRuntime.ceval.pending_mainthread.calls_to_do); + if (calls_to_do) { + _Py_set_eval_breaker_bit(interp, _PY_CALLS_TO_DO_BIT, 1); + } if (_Py_atomic_load(&_PyRuntime.signals.is_tripped)) { _Py_set_eval_breaker_bit(interp, _PY_SIGNALS_PENDING_BIT, 1); } } - else { - calls_to_do = _Py_atomic_load_int32_relaxed( - &interp->ceval.pending.calls_to_do); - } - if (calls_to_do) { - _Py_set_eval_breaker_bit(interp, _PY_CALLS_TO_DO_BIT, 1); - } if (tstate->async_exc != NULL) { _Py_set_eval_breaker_bit(interp, _PY_ASYNC_EXCEPTION_BIT, 1); } From 964329039861020b463231a157ef29e1634aad64 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 26 Sep 2023 03:47:08 +0100 Subject: [PATCH 12/18] Add news --- .../2023-09-26-03-46-55.gh-issue-109369.OJbxbF.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2023-09-26-03-46-55.gh-issue-109369.OJbxbF.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-09-26-03-46-55.gh-issue-109369.OJbxbF.rst b/Misc/NEWS.d/next/Core and Builtins/2023-09-26-03-46-55.gh-issue-109369.OJbxbF.rst new file mode 100644 index 00000000000000..ca1f0f1bd44a8c --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2023-09-26-03-46-55.gh-issue-109369.OJbxbF.rst @@ -0,0 +1,2 @@ +The internal eval_breaker and supporting flags, plus the monitoring version +have been merged into a single atomic integer to speed up checks. From b6a49f501160db316166d7f5da95aa4cad415173 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 26 Sep 2023 06:26:24 +0100 Subject: [PATCH 13/18] Convert magic numbers to named constants --- Include/internal/pycore_ceval.h | 3 +++ Python/bytecodes.c | 4 ++-- Python/ceval_macros.h | 2 +- Python/instrumentation.c | 3 ++- 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/Include/internal/pycore_ceval.h b/Include/internal/pycore_ceval.h index 9ca6426fe08b30..abdf7441f52b4b 100644 --- a/Include/internal/pycore_ceval.h +++ b/Include/internal/pycore_ceval.h @@ -199,6 +199,9 @@ void _PyEval_FrameClearAndPop(PyThreadState *tstate, _PyInterpreterFrame *frame) #define _PY_ASYNC_EXCEPTION_BIT 3 #define _PY_GC_SCHEDULED_BIT 4 +/* Reserve a few bits for future use */ +#define _PY_EVAL_EVENTS_BITS 8 +#define _PY_EVAL_EVENTS_MASK ((1 << _PY_EVAL_EVENTS_BITS)-1) static inline void _Py_set_eval_breaker_bit(PyInterpreterState *interp, uint32_t bit, uint32_t set) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 69a30353bbb649..068da240528da1 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -161,12 +161,12 @@ dummy_func( #endif uintptr_t eval_breaker = _Py_atomic_load_uintptr_relaxed(&tstate->interp->ceval.eval_breaker); uintptr_t version = _PyFrame_GetCode(frame)->_co_instrumentation_version; - assert((version & 255) == 0); + assert((version & _PY_EVAL_EVENTS_MASK) == 0); DEOPT_IF(eval_breaker != version, RESUME); } inst(INSTRUMENTED_RESUME, (--)) { - uintptr_t global_version = _Py_atomic_load_uintptr_relaxed(&tstate->interp->ceval.eval_breaker) & ~0xff; + uintptr_t global_version = _Py_atomic_load_uintptr_relaxed(&tstate->interp->ceval.eval_breaker) & ~_PY_EVAL_EVENTS_MASK; uintptr_t code_version = _PyFrame_GetCode(frame)->_co_instrumentation_version; if (code_version != global_version) { if (_Py_Instrument(_PyFrame_GetCode(frame), tstate->interp)) { diff --git a/Python/ceval_macros.h b/Python/ceval_macros.h index 935d636d87e43f..872e0a2b7f92ca 100644 --- a/Python/ceval_macros.h +++ b/Python/ceval_macros.h @@ -116,7 +116,7 @@ #define CHECK_EVAL_BREAKER() \ _Py_CHECK_EMSCRIPTEN_SIGNALS_PERIODICALLY(); \ - if (_Py_atomic_load_uintptr_relaxed(&tstate->interp->ceval.eval_breaker) & 255) { \ + if (_Py_atomic_load_uintptr_relaxed(&tstate->interp->ceval.eval_breaker) & _PY_EVAL_EVENTS_MASK) { \ if (_Py_HandlePending(tstate) != 0) { \ goto error; \ } \ diff --git a/Python/instrumentation.c b/Python/instrumentation.c index 2e9473d9d571c4..db03e3912aa38d 100644 --- a/Python/instrumentation.c +++ b/Python/instrumentation.c @@ -4,6 +4,7 @@ #include "pycore_bitutils.h" // _Py_popcount32 #include "pycore_call.h" +#include "pycore_ceval.h" // _PY_EVAL_EVENTS_BITS #include "pycore_code.h" // _PyCode_Clear_Executors() #include "pycore_frame.h" #include "pycore_interp.h" @@ -1780,7 +1781,7 @@ check_tool(PyInterpreterState *interp, int tool_id) /* We share the eval-breaker with flags, so the monitoring * version goes in the top 24 bits */ -#define MONITORING_VERSION_INCREMENT (1 << 8) +#define MONITORING_VERSION_INCREMENT (1 << _PY_EVAL_EVENTS_BITS) int _PyMonitoring_SetEvents(int tool_id, _PyMonitoringEventSet events) From 94b05156559794f72f302e1c499c242da3022893 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 26 Sep 2023 07:32:51 +0100 Subject: [PATCH 14/18] Make sure that async exception bit is cleared when handling pending events. --- Python/ceval_gil.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/Python/ceval_gil.c b/Python/ceval_gil.c index 78d3bb6673a2a3..febce8826ebeaa 100644 --- a/Python/ceval_gil.c +++ b/Python/ceval_gil.c @@ -777,12 +777,6 @@ handle_signals(PyThreadState *tstate) return 0; } -static inline int -maybe_has_pending_calls(PyInterpreterState *interp) -{ - return _Py_eval_breaker_bit_is_set(interp, _PY_CALLS_TO_DO_BIT) ? 1 : 0; -} - static int _make_pending_calls(struct _pending_calls *pending) { @@ -1003,7 +997,7 @@ _Py_HandlePending(PyThreadState *tstate) } /* Pending calls */ - if (maybe_has_pending_calls(interp)) { + if (_Py_eval_breaker_bit_is_set(interp, _PY_CALLS_TO_DO_BIT)) { if (make_pending_calls(interp) != 0) { return -1; } @@ -1033,13 +1027,15 @@ _Py_HandlePending(PyThreadState *tstate) } /* Check for asynchronous exception. */ - if (tstate->async_exc != NULL) { - PyObject *exc = tstate->async_exc; - tstate->async_exc = NULL; + if (_Py_eval_breaker_bit_is_set(interp, _PY_ASYNC_EXCEPTION_BIT)) { _Py_set_eval_breaker_bit(interp, _PY_ASYNC_EXCEPTION_BIT, 0); - _PyErr_SetNone(tstate, exc); - Py_DECREF(exc); - return -1; + if (tstate->async_exc != NULL) { + PyObject *exc = tstate->async_exc; + tstate->async_exc = NULL; + _PyErr_SetNone(tstate, exc); + Py_DECREF(exc); + return -1; + } } return 0; } From 0bb8bd2f3a3add3d86fe53b9cb9889c6ec855968 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 26 Sep 2023 09:59:09 +0100 Subject: [PATCH 15/18] Relax a load and use named const --- Include/internal/pycore_ceval.h | 2 +- Python/bytecodes.c | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/Include/internal/pycore_ceval.h b/Include/internal/pycore_ceval.h index abdf7441f52b4b..48fee697324f8f 100644 --- a/Include/internal/pycore_ceval.h +++ b/Include/internal/pycore_ceval.h @@ -222,7 +222,7 @@ _Py_set_eval_breaker_bit(PyInterpreterState *interp, uint32_t bit, uint32_t set) static inline bool _Py_eval_breaker_bit_is_set(PyInterpreterState *interp, int32_t bit) { - return _Py_atomic_load_uintptr(&interp->ceval.eval_breaker) & (((uintptr_t)1) << bit); + return _Py_atomic_load_uintptr_relaxed(&interp->ceval.eval_breaker) & (((uintptr_t)1) << bit); } diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 068da240528da1..4cfdb1011f7d79 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -138,7 +138,9 @@ dummy_func( inst(RESUME, (--)) { TIER_ONE_ONLY assert(frame == tstate->current_frame); - uintptr_t global_version = _Py_atomic_load_uintptr_relaxed(&tstate->interp->ceval.eval_breaker) & ~0xff; + uintptr_t global_version = + _Py_atomic_load_uintptr_relaxed(&tstate->interp->ceval.eval_breaker) & + ~_PY_EVAL_EVENTS_MASK; uintptr_t code_version = _PyFrame_GetCode(frame)->_co_instrumentation_version; assert((code_version & 255) == 0); if (code_version != global_version) { From 7c74435962e954c6a66cf5c4112913a44c46b67f Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 26 Sep 2023 10:26:25 +0100 Subject: [PATCH 16/18] Regen files --- Python/executor_cases.c.h | 2 +- Python/generated_cases.c.h | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 6b72f31da18423..032fa67df2a6df 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -14,7 +14,7 @@ #endif uintptr_t eval_breaker = _Py_atomic_load_uintptr_relaxed(&tstate->interp->ceval.eval_breaker); uintptr_t version = _PyFrame_GetCode(frame)->_co_instrumentation_version; - assert((version & 255) == 0); + assert((version & _PY_EVAL_EVENTS_MASK) == 0); DEOPT_IF(eval_breaker != version, RESUME); break; } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 47ce3e3bd6098c..aa9de1f69886d0 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -12,7 +12,9 @@ static_assert(0 == 0, "incorrect cache size"); TIER_ONE_ONLY assert(frame == tstate->current_frame); - uintptr_t global_version = _Py_atomic_load_uintptr_relaxed(&tstate->interp->ceval.eval_breaker) & ~0xff; + uintptr_t global_version = + _Py_atomic_load_uintptr_relaxed(&tstate->interp->ceval.eval_breaker) & + ~_PY_EVAL_EVENTS_MASK; uintptr_t code_version = _PyFrame_GetCode(frame)->_co_instrumentation_version; assert((code_version & 255) == 0); if (code_version != global_version) { @@ -36,13 +38,13 @@ #endif uintptr_t eval_breaker = _Py_atomic_load_uintptr_relaxed(&tstate->interp->ceval.eval_breaker); uintptr_t version = _PyFrame_GetCode(frame)->_co_instrumentation_version; - assert((version & 255) == 0); + assert((version & _PY_EVAL_EVENTS_MASK) == 0); DEOPT_IF(eval_breaker != version, RESUME); DISPATCH(); } TARGET(INSTRUMENTED_RESUME) { - uintptr_t global_version = _Py_atomic_load_uintptr_relaxed(&tstate->interp->ceval.eval_breaker) & ~0xff; + uintptr_t global_version = _Py_atomic_load_uintptr_relaxed(&tstate->interp->ceval.eval_breaker) & ~_PY_EVAL_EVENTS_MASK; uintptr_t code_version = _PyFrame_GetCode(frame)->_co_instrumentation_version; if (code_version != global_version) { if (_Py_Instrument(_PyFrame_GetCode(frame), tstate->interp)) { From 92cf1ffc913901ef712b12b24b04650a23aff62f Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 28 Sep 2023 10:02:30 +0100 Subject: [PATCH 17/18] Use named constant --- Python/instrumentation.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Python/instrumentation.c b/Python/instrumentation.c index db03e3912aa38d..eee1908e503e43 100644 --- a/Python/instrumentation.c +++ b/Python/instrumentation.c @@ -899,17 +899,17 @@ static inline int most_significant_bit(uint8_t bits) { static uint32_t global_version(PyInterpreterState *interp) { - return interp->ceval.eval_breaker & ~255; + return interp->ceval.eval_breaker & ~_PY_EVAL_EVENTS_MASK; } static void set_global_version(PyInterpreterState *interp, uint32_t version) { - assert((version & 255) == 0); + assert((version & _PY_EVAL_EVENTS_MASK) == 0); uintptr_t old = _Py_atomic_load_uintptr(&interp->ceval.eval_breaker); intptr_t new; do { - new = (old & 255) | version; + new = (old & _PY_EVAL_EVENTS_MASK) | version; } while (!_Py_atomic_compare_exchange_uintptr(&interp->ceval.eval_breaker, &old, new)); } @@ -1574,7 +1574,7 @@ _Py_Instrument(PyCodeObject *code, PyInterpreterState *interp) { if (is_version_up_to_date(code, interp)) { assert( - (interp->ceval.eval_breaker & ~0xff) == 0 || + (interp->ceval.eval_breaker & ~_PY_EVAL_EVENTS_MASK) == 0 || instrumentation_cross_checks(interp, code) ); return 0; @@ -1799,7 +1799,7 @@ _PyMonitoring_SetEvents(int tool_id, _PyMonitoringEventSet events) set_events(&interp->monitors, tool_id, events); uint32_t new_version = global_version(interp) + MONITORING_VERSION_INCREMENT; if (new_version == 0) { - PyErr_Format(PyExc_OverflowError, "events set too many time"); + PyErr_Format(PyExc_OverflowError, "events set too many times"); return -1; } set_global_version(interp, new_version); @@ -2116,7 +2116,7 @@ monitoring_restart_events_impl(PyObject *module) uint32_t restart_version = global_version(interp) + MONITORING_VERSION_INCREMENT; uint32_t new_version = restart_version + MONITORING_VERSION_INCREMENT; if (new_version <= MONITORING_VERSION_INCREMENT) { - PyErr_Format(PyExc_OverflowError, "events set too many time"); + PyErr_Format(PyExc_OverflowError, "events set too many times"); return NULL; } interp->last_restart_version = restart_version; From da0d844321737f59682dd053ef4ec18df7d38303 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Mon, 2 Oct 2023 04:55:40 +0100 Subject: [PATCH 18/18] Clarify distinction between main thread handling calls and main thread handling signals. --- Python/ceval_gil.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/Python/ceval_gil.c b/Python/ceval_gil.c index febce8826ebeaa..6b4ec8eed03aab 100644 --- a/Python/ceval_gil.c +++ b/Python/ceval_gil.c @@ -68,14 +68,17 @@ update_eval_breaker_from_thread(PyInterpreterState *interp, PyThreadState *tstat if (tstate == NULL) { return; } - if (_Py_ThreadCanHandleSignals(interp)) { + + if (_Py_IsMainThread()) { int32_t calls_to_do = _Py_atomic_load_int32_relaxed( &_PyRuntime.ceval.pending_mainthread.calls_to_do); if (calls_to_do) { _Py_set_eval_breaker_bit(interp, _PY_CALLS_TO_DO_BIT, 1); } - if (_Py_atomic_load(&_PyRuntime.signals.is_tripped)) { - _Py_set_eval_breaker_bit(interp, _PY_SIGNALS_PENDING_BIT, 1); + if (_Py_ThreadCanHandleSignals(interp)) { + if (_Py_atomic_load(&_PyRuntime.signals.is_tripped)) { + _Py_set_eval_breaker_bit(interp, _PY_SIGNALS_PENDING_BIT, 1); + } } } if (tstate->async_exc != NULL) {