From 6d9b6a21f3aaa8fe0b7e83b5c7283b32cfb873d7 Mon Sep 17 00:00:00 2001 From: Matt Page Date: Wed, 16 Oct 2024 14:55:05 -0700 Subject: [PATCH 01/25] Refactor specialize_c_call to use helpers --- Python/specialize.c | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/Python/specialize.c b/Python/specialize.c index ad41dfc39c0147..a0c6601dc0d748 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -2107,49 +2107,49 @@ specialize_py_call_kw(PyFunctionObject *func, _Py_CODEUNIT *instr, int nargs, return 0; } -static int +static void specialize_c_call(PyObject *callable, _Py_CODEUNIT *instr, int nargs) { if (PyCFunction_GET_FUNCTION(callable) == NULL) { - SPECIALIZATION_FAIL(CALL, SPEC_FAIL_OTHER); - return 1; + unspecialize(instr, SPEC_FAIL_OTHER); + return; } switch (PyCFunction_GET_FLAGS(callable) & (METH_VARARGS | METH_FASTCALL | METH_NOARGS | METH_O | METH_KEYWORDS | METH_METHOD)) { case METH_O: { if (nargs != 1) { - SPECIALIZATION_FAIL(CALL, SPEC_FAIL_WRONG_NUMBER_ARGUMENTS); - return 1; + unspecialize(instr, SPEC_FAIL_WRONG_NUMBER_ARGUMENTS); + return; } /* len(o) */ PyInterpreterState *interp = _PyInterpreterState_GET(); if (callable == interp->callable_cache.len) { - instr->op.code = CALL_LEN; - return 0; + specialize(instr, CALL_LEN); + return; } - instr->op.code = CALL_BUILTIN_O; - return 0; + specialize(instr, CALL_BUILTIN_O); + return; } case METH_FASTCALL: { if (nargs == 2) { /* isinstance(o1, o2) */ PyInterpreterState *interp = _PyInterpreterState_GET(); if (callable == interp->callable_cache.isinstance) { - instr->op.code = CALL_ISINSTANCE; - return 0; + specialize(instr, CALL_ISINSTANCE); + return; } } - instr->op.code = CALL_BUILTIN_FAST; - return 0; + specialize(instr, CALL_BUILTIN_FAST); + return; } case METH_FASTCALL | METH_KEYWORDS: { - instr->op.code = CALL_BUILTIN_FAST_WITH_KEYWORDS; - return 0; + specialize(instr, CALL_BUILTIN_FAST_WITH_KEYWORDS); + return; } default: - instr->op.code = CALL_NON_PY_GENERAL; - return 0; + specialize(instr, CALL_NON_PY_GENERAL); + return; } } @@ -2164,7 +2164,8 @@ _Py_Specialize_Call(_PyStackRef callable_st, _Py_CODEUNIT *instr, int nargs) _PyCallCache *cache = (_PyCallCache *)(instr + 1); int fail; if (PyCFunction_CheckExact(callable)) { - fail = specialize_c_call(callable, instr, nargs); + specialize_c_call(callable, instr, nargs); + return; } else if (PyFunction_Check(callable)) { fail = specialize_py_call((PyFunctionObject *)callable, instr, nargs, false); From 30e25d22717d223dbd149ef1fa77d0ad4159dcf6 Mon Sep 17 00:00:00 2001 From: Matt Page Date: Wed, 16 Oct 2024 15:04:06 -0700 Subject: [PATCH 02/25] Refactor specialize_py_call to use helpers --- Python/specialize.c | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/Python/specialize.c b/Python/specialize.c index a0c6601dc0d748..af54e385f40bbf 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -2045,7 +2045,7 @@ specialize_method_descriptor(PyMethodDescrObject *descr, _Py_CODEUNIT *instr, return 0; } -static int +static void specialize_py_call(PyFunctionObject *func, _Py_CODEUNIT *instr, int nargs, bool bound_method) { @@ -2054,33 +2054,34 @@ specialize_py_call(PyFunctionObject *func, _Py_CODEUNIT *instr, int nargs, int kind = function_kind(code); /* Don't specialize if PEP 523 is active */ if (_PyInterpreterState_GET()->eval_frame) { - SPECIALIZATION_FAIL(CALL, SPEC_FAIL_CALL_PEP_523); - return -1; + unspecialize(instr, SPEC_FAIL_CALL_PEP_523); + return; } int argcount = -1; if (kind == SPEC_FAIL_CODE_NOT_OPTIMIZED) { - SPECIALIZATION_FAIL(CALL, SPEC_FAIL_CODE_NOT_OPTIMIZED); - return -1; + unspecialize(instr, SPEC_FAIL_CODE_NOT_OPTIMIZED); + return; } if (kind == SIMPLE_FUNCTION) { argcount = code->co_argcount; } int version = _PyFunction_GetVersionForCurrentState(func); if (!_PyFunction_IsVersionValid(version)) { - SPECIALIZATION_FAIL(CALL, SPEC_FAIL_OUT_OF_VERSIONS); - return -1; + unspecialize(instr, SPEC_FAIL_OUT_OF_VERSIONS); + return; } write_u32(cache->func_version, version); + uint8_t opcode; if (argcount == nargs + bound_method) { - instr->op.code = bound_method ? CALL_BOUND_METHOD_EXACT_ARGS : CALL_PY_EXACT_ARGS; + opcode = + bound_method ? CALL_BOUND_METHOD_EXACT_ARGS : CALL_PY_EXACT_ARGS; } else { - instr->op.code = bound_method ? CALL_BOUND_METHOD_GENERAL : CALL_PY_GENERAL; + opcode = bound_method ? CALL_BOUND_METHOD_GENERAL : CALL_PY_GENERAL; } - return 0; + specialize(instr, opcode); } - static int specialize_py_call_kw(PyFunctionObject *func, _Py_CODEUNIT *instr, int nargs, bool bound_method) @@ -2168,7 +2169,8 @@ _Py_Specialize_Call(_PyStackRef callable_st, _Py_CODEUNIT *instr, int nargs) return; } else if (PyFunction_Check(callable)) { - fail = specialize_py_call((PyFunctionObject *)callable, instr, nargs, false); + specialize_py_call((PyFunctionObject *)callable, instr, nargs, false); + return; } else if (PyType_Check(callable)) { fail = specialize_class_call(callable, instr, nargs); @@ -2179,7 +2181,8 @@ _Py_Specialize_Call(_PyStackRef callable_st, _Py_CODEUNIT *instr, int nargs) else if (PyMethod_Check(callable)) { PyObject *func = ((PyMethodObject *)callable)->im_func; if (PyFunction_Check(func)) { - fail = specialize_py_call((PyFunctionObject *)func, instr, nargs, true); + specialize_py_call((PyFunctionObject *)func, instr, nargs, true); + return; } else { SPECIALIZATION_FAIL(CALL, SPEC_FAIL_CALL_BOUND_METHOD); From 92160decedd2395ed0fadd41aea9447df1681687 Mon Sep 17 00:00:00 2001 From: Matt Page Date: Wed, 16 Oct 2024 15:16:21 -0700 Subject: [PATCH 03/25] Refactor specialize_class_call to use helpers --- Python/specialize.c | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/Python/specialize.c b/Python/specialize.c index af54e385f40bbf..42938f7f12ab27 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -608,6 +608,7 @@ _PyCode_Quicken(_Py_CODEUNIT *instructions, Py_ssize_t size, PyObject *consts, #define SPEC_FAIL_CALL_INIT_NOT_SIMPLE 30 #define SPEC_FAIL_CALL_METACLASS 31 #define SPEC_FAIL_CALL_INIT_NOT_INLINE_VALUES 32 +#define SPEC_FAIL_CALL_NO_TYPE_VERSION 33 /* COMPARE_OP */ #define SPEC_FAIL_COMPARE_OP_DIFFERENT_TYPES 12 @@ -1953,7 +1954,7 @@ get_init_for_simple_managed_python_class(PyTypeObject *tp) return (PyFunctionObject *)init; } -static int +static void specialize_class_call(PyObject *callable, _Py_CODEUNIT *instr, int nargs) { assert(PyType_Check(callable)); @@ -1962,21 +1963,21 @@ specialize_class_call(PyObject *callable, _Py_CODEUNIT *instr, int nargs) int oparg = instr->op.arg; if (nargs == 1 && oparg == 1) { if (tp == &PyUnicode_Type) { - instr->op.code = CALL_STR_1; - return 0; + specialize(instr, CALL_STR_1); + return; } else if (tp == &PyType_Type) { - instr->op.code = CALL_TYPE_1; - return 0; + specialize(instr, CALL_TYPE_1); + return; } else if (tp == &PyTuple_Type) { - instr->op.code = CALL_TUPLE_1; - return 0; + specialize(instr, CALL_TUPLE_1); + return; } } if (tp->tp_vectorcall != NULL) { - instr->op.code = CALL_BUILTIN_CLASS; - return 0; + specialize(instr, CALL_BUILTIN_CLASS); + return; } goto generic; } @@ -1986,18 +1987,18 @@ specialize_class_call(PyObject *callable, _Py_CODEUNIT *instr, int nargs) if (tp->tp_new == PyBaseObject_Type.tp_new) { PyFunctionObject *init = get_init_for_simple_managed_python_class(tp); if (type_get_version(tp, CALL) == 0) { - return -1; + unspecialize(instr, SPEC_FAIL_CALL_NO_TYPE_VERSION); + return; } if (init != NULL) { _PyCallCache *cache = (_PyCallCache *)(instr + 1); write_u32(cache->func_version, tp->tp_version_tag); - _Py_SET_OPCODE(*instr, CALL_ALLOC_AND_ENTER_INIT); - return 0; + specialize(instr, CALL_ALLOC_AND_ENTER_INIT); + return; } } generic: - instr->op.code = CALL_NON_PY_GENERAL; - return 0; + specialize(instr, CALL_NON_PY_GENERAL); } static int @@ -2173,7 +2174,8 @@ _Py_Specialize_Call(_PyStackRef callable_st, _Py_CODEUNIT *instr, int nargs) return; } else if (PyType_Check(callable)) { - fail = specialize_class_call(callable, instr, nargs); + specialize_class_call(callable, instr, nargs); + return; } else if (Py_IS_TYPE(callable, &PyMethodDescr_Type)) { fail = specialize_method_descriptor((PyMethodDescrObject *)callable, instr, nargs); From 475492108153ebab1244422615ddd8a7b56bbc85 Mon Sep 17 00:00:00 2001 From: Matt Page Date: Wed, 16 Oct 2024 15:45:47 -0700 Subject: [PATCH 04/25] Refactor specialize_method_descriptor to use helpers --- Python/specialize.c | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/Python/specialize.c b/Python/specialize.c index 42938f7f12ab27..b85d70d20c4e58 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -2001,7 +2001,7 @@ specialize_class_call(PyObject *callable, _Py_CODEUNIT *instr, int nargs) specialize(instr, CALL_NON_PY_GENERAL); } -static int +static void specialize_method_descriptor(PyMethodDescrObject *descr, _Py_CODEUNIT *instr, int nargs) { @@ -2010,16 +2010,16 @@ specialize_method_descriptor(PyMethodDescrObject *descr, _Py_CODEUNIT *instr, METH_KEYWORDS | METH_METHOD)) { case METH_NOARGS: { if (nargs != 1) { - SPECIALIZATION_FAIL(CALL, SPEC_FAIL_WRONG_NUMBER_ARGUMENTS); - return -1; + unspecialize(instr, SPEC_FAIL_WRONG_NUMBER_ARGUMENTS); + return; } - instr->op.code = CALL_METHOD_DESCRIPTOR_NOARGS; - return 0; + specialize(instr, CALL_METHOD_DESCRIPTOR_NOARGS); + return; } case METH_O: { if (nargs != 2) { - SPECIALIZATION_FAIL(CALL, SPEC_FAIL_WRONG_NUMBER_ARGUMENTS); - return -1; + unspecialize(instr, SPEC_FAIL_WRONG_NUMBER_ARGUMENTS); + return; } PyInterpreterState *interp = _PyInterpreterState_GET(); PyObject *list_append = interp->callable_cache.list_append; @@ -2027,23 +2027,22 @@ specialize_method_descriptor(PyMethodDescrObject *descr, _Py_CODEUNIT *instr, bool pop = (next.op.code == POP_TOP); int oparg = instr->op.arg; if ((PyObject *)descr == list_append && oparg == 1 && pop) { - instr->op.code = CALL_LIST_APPEND; - return 0; + specialize(instr, CALL_LIST_APPEND); + return; } - instr->op.code = CALL_METHOD_DESCRIPTOR_O; - return 0; + specialize(instr, CALL_METHOD_DESCRIPTOR_O); + return; } case METH_FASTCALL: { - instr->op.code = CALL_METHOD_DESCRIPTOR_FAST; - return 0; + specialize(instr, CALL_METHOD_DESCRIPTOR_FAST); + return; } case METH_FASTCALL | METH_KEYWORDS: { - instr->op.code = CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS; - return 0; + specialize(instr, CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS); + return; } } - instr->op.code = CALL_NON_PY_GENERAL; - return 0; + specialize(instr, CALL_NON_PY_GENERAL); } static void @@ -2178,7 +2177,8 @@ _Py_Specialize_Call(_PyStackRef callable_st, _Py_CODEUNIT *instr, int nargs) return; } else if (Py_IS_TYPE(callable, &PyMethodDescr_Type)) { - fail = specialize_method_descriptor((PyMethodDescrObject *)callable, instr, nargs); + specialize_method_descriptor((PyMethodDescrObject *)callable, instr, nargs); + return; } else if (PyMethod_Check(callable)) { PyObject *func = ((PyMethodObject *)callable)->im_func; From 6a20bb0f918876dc1d8933979bda0351e31ca673 Mon Sep 17 00:00:00 2001 From: Matt Page Date: Wed, 16 Oct 2024 15:50:55 -0700 Subject: [PATCH 05/25] Remove unneeded code --- Python/specialize.c | 24 ++---------------------- 1 file changed, 2 insertions(+), 22 deletions(-) diff --git a/Python/specialize.c b/Python/specialize.c index b85d70d20c4e58..e43d80b949e004 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -2162,49 +2162,29 @@ _Py_Specialize_Call(_PyStackRef callable_st, _Py_CODEUNIT *instr, int nargs) assert(ENABLE_SPECIALIZATION); assert(_PyOpcode_Caches[CALL] == INLINE_CACHE_ENTRIES_CALL); assert(_Py_OPCODE(*instr) != INSTRUMENTED_CALL); - _PyCallCache *cache = (_PyCallCache *)(instr + 1); - int fail; if (PyCFunction_CheckExact(callable)) { specialize_c_call(callable, instr, nargs); - return; } else if (PyFunction_Check(callable)) { specialize_py_call((PyFunctionObject *)callable, instr, nargs, false); - return; } else if (PyType_Check(callable)) { specialize_class_call(callable, instr, nargs); - return; } else if (Py_IS_TYPE(callable, &PyMethodDescr_Type)) { specialize_method_descriptor((PyMethodDescrObject *)callable, instr, nargs); - return; } else if (PyMethod_Check(callable)) { PyObject *func = ((PyMethodObject *)callable)->im_func; if (PyFunction_Check(func)) { specialize_py_call((PyFunctionObject *)func, instr, nargs, true); - return; } else { - SPECIALIZATION_FAIL(CALL, SPEC_FAIL_CALL_BOUND_METHOD); - fail = -1; + unspecialize(instr, SPEC_FAIL_CALL_BOUND_METHOD); } } else { - instr->op.code = CALL_NON_PY_GENERAL; - fail = 0; - } - if (fail) { - STAT_INC(CALL, failure); - assert(!PyErr_Occurred()); - instr->op.code = CALL; - cache->counter = adaptive_counter_backoff(cache->counter); - } - else { - STAT_INC(CALL, success); - assert(!PyErr_Occurred()); - cache->counter = adaptive_counter_cooldown(); + specialize(instr, CALL_NON_PY_GENERAL, NULL); } } From ea7206d6ba50f5529835195a754681ad0e33bee7 Mon Sep 17 00:00:00 2001 From: Matt Page Date: Wed, 16 Oct 2024 19:09:19 -0700 Subject: [PATCH 06/25] Enable almost all specializations of CALL _CALL_ALLOC_AND_ENTER_INIT will be addressed in a separate PR --- Python/bytecodes.c | 4 ++-- Python/specialize.c | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 7ffe2f5b940942..0467155826d99a 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -3308,7 +3308,7 @@ dummy_func( }; specializing op(_SPECIALIZE_CALL, (counter/1, callable[1], self_or_null[1], args[oparg] -- callable[1], self_or_null[1], args[oparg])) { - #if ENABLE_SPECIALIZATION + #if ENABLE_SPECIALIZATION_FT if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; _Py_Specialize_Call(callable[0], next_instr, oparg + !PyStackRef_IsNull(self_or_null[0])); @@ -3316,7 +3316,7 @@ dummy_func( } OPCODE_DEFERRED_INC(CALL); ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); - #endif /* ENABLE_SPECIALIZATION */ + #endif /* ENABLE_SPECIALIZATION_FT */ } op(_MAYBE_EXPAND_METHOD, (callable[1], self_or_null[1], args[oparg] -- func[1], maybe_self[1], args[oparg])) { diff --git a/Python/specialize.c b/Python/specialize.c index e43d80b949e004..91d2227f8ebda8 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -1984,6 +1984,7 @@ specialize_class_call(PyObject *callable, _Py_CODEUNIT *instr, int nargs) if (Py_TYPE(tp) != &PyType_Type) { goto generic; } + #ifndef Py_GIL_DISABLED if (tp->tp_new == PyBaseObject_Type.tp_new) { PyFunctionObject *init = get_init_for_simple_managed_python_class(tp); if (type_get_version(tp, CALL) == 0) { @@ -1997,6 +1998,7 @@ specialize_class_call(PyObject *callable, _Py_CODEUNIT *instr, int nargs) return; } } + #endif generic: specialize(instr, CALL_NON_PY_GENERAL); } @@ -2159,7 +2161,7 @@ _Py_Specialize_Call(_PyStackRef callable_st, _Py_CODEUNIT *instr, int nargs) { PyObject *callable = PyStackRef_AsPyObjectBorrow(callable_st); - assert(ENABLE_SPECIALIZATION); + assert(ENABLE_SPECIALIZATION_FT); assert(_PyOpcode_Caches[CALL] == INLINE_CACHE_ENTRIES_CALL); assert(_Py_OPCODE(*instr) != INSTRUMENTED_CALL); if (PyCFunction_CheckExact(callable)) { From d5375f1780ccea968288328d71ecc275fc45650a Mon Sep 17 00:00:00 2001 From: Matt Page Date: Fri, 18 Oct 2024 10:52:09 -0700 Subject: [PATCH 07/25] Regen files --- Python/generated_cases.c.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index f3db2f9abc79d0..3524ad523b0246 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -874,7 +874,7 @@ callable = &stack_pointer[-2 - oparg]; uint16_t counter = read_u16(&this_instr[1].cache); (void)counter; - #if ENABLE_SPECIALIZATION + #if ENABLE_SPECIALIZATION_FT if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; _PyFrame_SetStackPointer(frame, stack_pointer); @@ -884,7 +884,7 @@ } OPCODE_DEFERRED_INC(CALL); ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); - #endif /* ENABLE_SPECIALIZATION */ + #endif /* ENABLE_SPECIALIZATION_FT */ } /* Skip 2 cache entries */ // _MAYBE_EXPAND_METHOD From a2ca19258086d17fbf0f19abb8aba8584e0570ab Mon Sep 17 00:00:00 2001 From: Matt Page Date: Fri, 18 Oct 2024 14:23:24 -0700 Subject: [PATCH 08/25] Fix implementation of CALL_LIST_APPEND in free-threaded builds This needs to acquire a critical section on the list. --- Include/internal/pycore_list.h | 3 +++ Objects/listobject.c | 10 ++++++++++ Python/bytecodes.c | 5 +++++ 3 files changed, 18 insertions(+) diff --git a/Include/internal/pycore_list.h b/Include/internal/pycore_list.h index 2c666f9be4bd79..6830fa26c28303 100644 --- a/Include/internal/pycore_list.h +++ b/Include/internal/pycore_list.h @@ -37,6 +37,9 @@ _PyList_AppendTakeRef(PyListObject *self, PyObject *newitem) return _PyList_AppendTakeRefListResize(self, newitem); } +// Like _PyList_AppendTakeRef, but locks self in free-threaded builds. +extern int _PyList_AppendTakeRefAndLock(PyListObject *self, PyObject *newitem); + // Repeat the bytes of a buffer in place static inline void _Py_memory_repeat(char* dest, Py_ssize_t len_dest, Py_ssize_t len_src) diff --git a/Objects/listobject.c b/Objects/listobject.c index bb0040cbe9f272..48736f9f51e728 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -490,6 +490,16 @@ PyList_Append(PyObject *op, PyObject *newitem) return -1; } +int +_PyList_AppendTakeRefAndLock(PyListObject *self, PyObject *newitem) +{ + int ret; + Py_BEGIN_CRITICAL_SECTION(self); + ret = _PyList_AppendTakeRef((PyListObject *)self, newitem); + Py_END_CRITICAL_SECTION(); + return ret; +} + /* Methods */ static void diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 0467155826d99a..26a955a18cde94 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -3980,7 +3980,12 @@ dummy_func( assert(self_o != NULL); DEOPT_IF(!PyList_Check(self_o)); STAT_INC(CALL, hit); + #ifdef Py_GIL_DISABLED + int err; + err = _PyList_AppendTakeRefAndLock((PyListObject *)self_o, PyStackRef_AsPyObjectSteal(arg)); + #else int err = _PyList_AppendTakeRef((PyListObject *)self_o, PyStackRef_AsPyObjectSteal(arg)); + #endif PyStackRef_CLOSE(self); PyStackRef_CLOSE(callable); ERROR_IF(err, error); From 2ab08f2a4f1aca2849c0b8f563bd05db8a904d42 Mon Sep 17 00:00:00 2001 From: Matt Page Date: Fri, 18 Oct 2024 14:31:20 -0700 Subject: [PATCH 09/25] Regenerate interpreter and friends --- Include/internal/pycore_opcode_metadata.h | 2 +- Include/internal/pycore_uop_metadata.h | 2 +- Python/executor_cases.c.h | 7 +++++++ Python/generated_cases.c.h | 7 +++++++ 4 files changed, 16 insertions(+), 2 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 58e583eabbcc46..99259d73985d0b 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1054,7 +1054,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[266] = { [CALL_KW_NON_PY] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [CALL_KW_PY] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [CALL_LEN] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, - [CALL_LIST_APPEND] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG }, + [CALL_LIST_APPEND] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [CALL_METHOD_DESCRIPTOR_FAST] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [CALL_METHOD_DESCRIPTOR_NOARGS] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 1b2880cb6bb67e..c7cc238c9beb7f 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -242,7 +242,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_CALL_BUILTIN_FAST_WITH_KEYWORDS] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_CALL_LEN] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, [_CALL_ISINSTANCE] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, - [_CALL_LIST_APPEND] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG, + [_CALL_LIST_APPEND] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_CALL_METHOD_DESCRIPTOR_O] = HAS_ARG_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS] = HAS_ARG_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_CALL_METHOD_DESCRIPTOR_NOARGS] = HAS_ARG_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 976a3429b2e603..eacb41b90ab4f6 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -4871,7 +4871,14 @@ JUMP_TO_JUMP_TARGET(); } STAT_INC(CALL, hit); + #ifdef Py_GIL_DISABLED + int err; + _PyFrame_SetStackPointer(frame, stack_pointer); + err = _PyList_AppendTakeRefAndLock((PyListObject *)self_o, PyStackRef_AsPyObjectSteal(arg)); + stack_pointer = _PyFrame_GetStackPointer(frame); + #else int err = _PyList_AppendTakeRef((PyListObject *)self_o, PyStackRef_AsPyObjectSteal(arg)); + #endif PyStackRef_CLOSE(self); PyStackRef_CLOSE(callable); if (err) JUMP_TO_ERROR(); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 3524ad523b0246..232cd072c9f809 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -2378,7 +2378,14 @@ assert(self_o != NULL); DEOPT_IF(!PyList_Check(self_o), CALL); STAT_INC(CALL, hit); + #ifdef Py_GIL_DISABLED + int err; + _PyFrame_SetStackPointer(frame, stack_pointer); + err = _PyList_AppendTakeRefAndLock((PyListObject *)self_o, PyStackRef_AsPyObjectSteal(arg)); + stack_pointer = _PyFrame_GetStackPointer(frame); + #else int err = _PyList_AppendTakeRef((PyListObject *)self_o, PyStackRef_AsPyObjectSteal(arg)); + #endif PyStackRef_CLOSE(self); PyStackRef_CLOSE(callable); if (err) goto pop_3_error; From 353424671a3af039a8c32eff0a2c0bcf1008ea02 Mon Sep 17 00:00:00 2001 From: Matt Page Date: Tue, 22 Oct 2024 16:57:13 -0700 Subject: [PATCH 10/25] Refactor PyType_LookupRef to return version --- Include/internal/pycore_object.h | 2 ++ Objects/typeobject.c | 42 ++++++++++++++++++++++++++------ 2 files changed, 37 insertions(+), 7 deletions(-) diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index cafc02f892499c..d7d7cbcaf5c9c7 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -820,6 +820,8 @@ extern int _PyObject_StoreInstanceAttribute(PyObject *obj, PyObject *name, PyObject *value); extern bool _PyObject_TryGetInstanceAttribute(PyObject *obj, PyObject *name, PyObject **attr); +extern PyObject *_PyType_LookupRefAndVersion(PyTypeObject *, PyObject *, + unsigned int *); #ifdef Py_GIL_DISABLED # define MANAGED_DICT_OFFSET (((Py_ssize_t)sizeof(PyObject *))*-1) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 840d004d3d98c7..9721109ff906b6 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -5527,9 +5527,12 @@ _PyTypes_AfterFork(void) } /* Internal API to look for a name through the MRO. - This returns a borrowed reference, and doesn't set an exception! */ + This returns a strong reference, and doesn't set an exception! + If nonzero, version is set to the value of type->tp_version at the time of + the lookup. +*/ PyObject * -_PyType_LookupRef(PyTypeObject *type, PyObject *name) +_PyType_LookupRefAndVersion(PyTypeObject *type, PyObject *name, unsigned int *version) { PyObject *res; int error; @@ -5552,6 +5555,9 @@ _PyType_LookupRef(PyTypeObject *type, PyObject *name) // If the sequence is still valid then we're done if (value == NULL || _Py_TryIncref(value)) { if (_PySeqLock_EndRead(&entry->sequence, sequence)) { + if (version != NULL) { + *version = entry_version; + } return value; } Py_XDECREF(value); @@ -5573,6 +5579,9 @@ _PyType_LookupRef(PyTypeObject *type, PyObject *name) OBJECT_STAT_INC_COND(type_cache_hits, !is_dunder_name(name)); OBJECT_STAT_INC_COND(type_cache_dunder_hits, is_dunder_name(name)); Py_XINCREF(entry->value); + if (version != NULL) { + *version = entry->version; + } return entry->value; } #endif @@ -5586,12 +5595,12 @@ _PyType_LookupRef(PyTypeObject *type, PyObject *name) // anyone else can modify our mro or mutate the type. int has_version = 0; - int version = 0; + unsigned int assigned_version = 0; BEGIN_TYPE_LOCK(); res = find_name_in_mro(type, name, &error); if (MCACHE_CACHEABLE_NAME(name)) { has_version = assign_version_tag(interp, type); - version = type->tp_version_tag; + assigned_version = type->tp_version_tag; } END_TYPE_LOCK(); @@ -5608,24 +5617,43 @@ _PyType_LookupRef(PyTypeObject *type, PyObject *name) if (error == -1) { PyErr_Clear(); } + if (version != NULL) { + // 0 is not a valid version + *version = 0; + } return NULL; } if (has_version) { #if Py_GIL_DISABLED - update_cache_gil_disabled(entry, name, version, res); + update_cache_gil_disabled(entry, name, assigned_version, res); #else - PyObject *old_value = update_cache(entry, name, version, res); + PyObject *old_value = update_cache(entry, name, assigned_version, res); Py_DECREF(old_value); #endif } + if (version != NULL) { + // 0 is not a valid version + *version = has_version ? assigned_version : 0; + } return res; } +/* Internal API to look for a name through the MRO. + This returns a strong reference, and doesn't set an exception! +*/ +PyObject * +_PyType_LookupRef(PyTypeObject *type, PyObject *name) +{ + return _PyType_LookupRefAndVersion(type, name, NULL); +} + +/* Internal API to look for a name through the MRO. + This returns a borrowed reference, and doesn't set an exception! */ PyObject * _PyType_Lookup(PyTypeObject *type, PyObject *name) { - PyObject *res = _PyType_LookupRef(type, name); + PyObject *res = _PyType_LookupRefAndVersion(type, name, NULL); Py_XDECREF(res); return res; } From acda1c6bb06e3dde08748c8d9ea2877802fdb42f Mon Sep 17 00:00:00 2001 From: Matt Page Date: Thu, 24 Oct 2024 15:38:23 -0700 Subject: [PATCH 11/25] Make CALL_ALLOC_AND_ENTER_INIT thread-safe - Modify `get_init_for_simple_managed_python_class` to return both init as well as the type version at the time of lookup. - Modify caching logic to verify that the current version of the type matches the version at the time of lookup. This prevents potentially caching a stale value if we race with an update to __init__. - Only cache __init__ functions that are deferred in free-threaded builds. This ensures that the borrowed reference to __init__ that is stored in the cache is valid if the type version guard in _CHECK_AND_ALLOCATE_OBJECT passes: 1. The type version is cleared before the reference in the MRO to __init__ is destroyed. 2. If the reference in (1) was the last reference then the __init__ method will be queued for deletion the next time GC runs. 3. GC requires stopping the world, which forces a synchronizes-with operation between all threads. 4. If the GC collects the cached __init__, then type's version will have been updated *and* the update will be visible to all threads, so the guard cannot pass. - There are no escaping calls in between loading from the specialization cache and pushing the frame. This is a requirement for the default build. --- Include/internal/pycore_object.h | 2 ++ Objects/typeobject.c | 22 +++++++++++++++ Python/bytecodes.c | 4 +-- Python/specialize.c | 48 ++++++++++++++++++++------------ 4 files changed, 56 insertions(+), 20 deletions(-) diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index d7d7cbcaf5c9c7..135041a155ac5c 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -822,6 +822,8 @@ extern bool _PyObject_TryGetInstanceAttribute(PyObject *obj, PyObject *name, PyObject **attr); extern PyObject *_PyType_LookupRefAndVersion(PyTypeObject *, PyObject *, unsigned int *); +extern int _PyType_CacheInitForSpecialization(PyTypeObject *, PyObject *, + unsigned int); #ifdef Py_GIL_DISABLED # define MANAGED_DICT_OFFSET (((Py_ssize_t)sizeof(PyObject *))*-1) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 9721109ff906b6..5a36e114628d9e 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -5658,6 +5658,28 @@ _PyType_Lookup(PyTypeObject *type, PyObject *name) return res; } + +int +_PyType_CacheInitForSpecialization(PyTypeObject *type, PyObject *init, + unsigned int tp_version) +{ + if (!init || !tp_version) { + return 0; + } + int can_cache; + BEGIN_TYPE_LOCK(); + can_cache = type->tp_version_tag == tp_version; + #ifdef Py_GIL_DISABLED + can_cache = can_cache && _PyObject_HasDeferredRefcount(init); + #endif + if (can_cache) { + PyHeapTypeObject *ht = (PyHeapTypeObject*) type; + FT_ATOMIC_STORE_PTR_RELAXED(ht->_spec_cache.init, init); + } + END_TYPE_LOCK(); + return can_cache; +} + static void set_flags(PyTypeObject *self, unsigned long mask, unsigned long flags) { diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 26a955a18cde94..c834ae8ce293e2 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -3701,10 +3701,10 @@ dummy_func( DEOPT_IF(!PyStackRef_IsNull(null[0])); DEOPT_IF(!PyType_Check(callable_o)); PyTypeObject *tp = (PyTypeObject *)callable_o; - DEOPT_IF(tp->tp_version_tag != type_version); + DEOPT_IF(FT_ATOMIC_LOAD_UINT32_RELAXED(tp->tp_version_tag) != type_version); assert(tp->tp_flags & Py_TPFLAGS_INLINE_VALUES); PyHeapTypeObject *cls = (PyHeapTypeObject *)callable_o; - PyFunctionObject *init_func = (PyFunctionObject *)cls->_spec_cache.init; + PyFunctionObject *init_func = (PyFunctionObject *)FT_ATOMIC_LOAD_PTR_RELAXED(cls->_spec_cache.init); PyCodeObject *code = (PyCodeObject *)init_func->func_code; DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize + _Py_InitCleanup.co_framesize)); STAT_INC(CALL, hit); diff --git a/Python/specialize.c b/Python/specialize.c index 91d2227f8ebda8..123625092c4681 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -1920,38 +1920,38 @@ _Py_Specialize_StoreSubscr(_PyStackRef container_st, _PyStackRef sub_st, _Py_COD cache->counter = adaptive_counter_cooldown(); } -/* Returns a borrowed reference. - * The reference is only valid if guarded by a type version check. - */ -static PyFunctionObject * -get_init_for_simple_managed_python_class(PyTypeObject *tp) +/* Returns a strong reference. */ +static PyObject * +get_init_for_simple_managed_python_class(PyTypeObject *tp, unsigned int *tp_version) { assert(tp->tp_new == PyBaseObject_Type.tp_new); if (tp->tp_alloc != PyType_GenericAlloc) { SPECIALIZATION_FAIL(CALL, SPEC_FAIL_OVERRIDDEN); return NULL; } - if ((tp->tp_flags & Py_TPFLAGS_INLINE_VALUES) == 0) { + unsigned long tp_flags = PyType_GetFlags(tp); + if ((tp_flags & Py_TPFLAGS_INLINE_VALUES) == 0) { SPECIALIZATION_FAIL(CALL, SPEC_FAIL_CALL_INIT_NOT_INLINE_VALUES); return NULL; } - if (!(tp->tp_flags & Py_TPFLAGS_HEAPTYPE)) { + if (!(tp_flags & Py_TPFLAGS_HEAPTYPE)) { /* Is this possible? */ SPECIALIZATION_FAIL(CALL, SPEC_FAIL_EXPECTED_ERROR); return NULL; } - PyObject *init = _PyType_Lookup(tp, &_Py_ID(__init__)); + PyObject *init = _PyType_LookupRefAndVersion(tp, &_Py_ID(__init__), tp_version); if (init == NULL || !PyFunction_Check(init)) { SPECIALIZATION_FAIL(CALL, SPEC_FAIL_CALL_INIT_NOT_PYTHON); + Py_XDECREF(init); return NULL; } int kind = function_kind((PyCodeObject *)PyFunction_GET_CODE(init)); if (kind != SIMPLE_FUNCTION) { SPECIALIZATION_FAIL(CALL, SPEC_FAIL_CALL_INIT_NOT_SIMPLE); + Py_DECREF(init); return NULL; } - ((PyHeapTypeObject *)tp)->_spec_cache.init = init; - return (PyFunctionObject *)init; + return init; } static void @@ -1984,21 +1984,23 @@ specialize_class_call(PyObject *callable, _Py_CODEUNIT *instr, int nargs) if (Py_TYPE(tp) != &PyType_Type) { goto generic; } - #ifndef Py_GIL_DISABLED if (tp->tp_new == PyBaseObject_Type.tp_new) { - PyFunctionObject *init = get_init_for_simple_managed_python_class(tp); - if (type_get_version(tp, CALL) == 0) { - unspecialize(instr, SPEC_FAIL_CALL_NO_TYPE_VERSION); + unsigned int tp_version = 0; + PyObject *init = get_init_for_simple_managed_python_class(tp, &tp_version); + if (!tp_version) { + unspecialize(instr, SPEC_FAIL_OUT_OF_VERSIONS); + Py_XDECREF(init); return; } - if (init != NULL) { + if (init != NULL && _PyType_CacheInitForSpecialization(tp, init, tp_version)) { _PyCallCache *cache = (_PyCallCache *)(instr + 1); - write_u32(cache->func_version, tp->tp_version_tag); + write_u32(cache->func_version, tp_version); specialize(instr, CALL_ALLOC_AND_ENTER_INIT); + Py_DECREF(init); return; } + Py_XDECREF(init); } - #endif generic: specialize(instr, CALL_NON_PY_GENERAL); } @@ -2186,7 +2188,7 @@ _Py_Specialize_Call(_PyStackRef callable_st, _Py_CODEUNIT *instr, int nargs) } } else { - specialize(instr, CALL_NON_PY_GENERAL, NULL); + specialize(instr, CALL_NON_PY_GENERAL); } } @@ -2806,6 +2808,13 @@ static const PyBytesObject no_location = { .ob_sval = { NO_LOC_4 } }; +#ifdef Py_GIL_DISABLED +static _PyCodeArray init_cleanup_tlbc = { + .size = 1, + .entries = {(char*) &_Py_InitCleanup.co_code_adaptive}, +}; +#endif + const struct _PyCode8 _Py_InitCleanup = { _PyVarObject_HEAD_INIT(&PyCode_Type, 3), .co_consts = (PyObject *)&_Py_SINGLETON(tuple_empty), @@ -2821,6 +2830,9 @@ const struct _PyCode8 _Py_InitCleanup = { ._co_firsttraceable = 4, .co_stacksize = 2, .co_framesize = 2 + FRAME_SPECIALS_SIZE, +#ifdef Py_GIL_DISABLED + .co_tlbc = &init_cleanup_tlbc, +#endif .co_code_adaptive = { EXIT_INIT_CHECK, 0, RETURN_VALUE, 0, From fdfa678008bfc1b27a61549dc22b97c5a4094709 Mon Sep 17 00:00:00 2001 From: Matt Page Date: Thu, 24 Oct 2024 15:38:40 -0700 Subject: [PATCH 12/25] Regenerate files --- Python/executor_cases.c.h | 4 ++-- Python/generated_cases.c.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index eacb41b90ab4f6..fdd98216736cc9 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -4462,13 +4462,13 @@ JUMP_TO_JUMP_TARGET(); } PyTypeObject *tp = (PyTypeObject *)callable_o; - if (tp->tp_version_tag != type_version) { + if (FT_ATOMIC_LOAD_UINT32_RELAXED(tp->tp_version_tag) != type_version) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } assert(tp->tp_flags & Py_TPFLAGS_INLINE_VALUES); PyHeapTypeObject *cls = (PyHeapTypeObject *)callable_o; - PyFunctionObject *init_func = (PyFunctionObject *)cls->_spec_cache.init; + PyFunctionObject *init_func = (PyFunctionObject *)FT_ATOMIC_LOAD_PTR_RELAXED(cls->_spec_cache.init); PyCodeObject *code = (PyCodeObject *)init_func->func_code; if (!_PyThreadState_HasStackSpace(tstate, code->co_framesize + _Py_InitCleanup.co_framesize)) { UOP_STAT_INC(uopcode, miss); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 232cd072c9f809..d9f71af7c2ba74 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -1042,10 +1042,10 @@ DEOPT_IF(!PyStackRef_IsNull(null[0]), CALL); DEOPT_IF(!PyType_Check(callable_o), CALL); PyTypeObject *tp = (PyTypeObject *)callable_o; - DEOPT_IF(tp->tp_version_tag != type_version, CALL); + DEOPT_IF(FT_ATOMIC_LOAD_UINT32_RELAXED(tp->tp_version_tag) != type_version, CALL); assert(tp->tp_flags & Py_TPFLAGS_INLINE_VALUES); PyHeapTypeObject *cls = (PyHeapTypeObject *)callable_o; - PyFunctionObject *init_func = (PyFunctionObject *)cls->_spec_cache.init; + PyFunctionObject *init_func = (PyFunctionObject *)FT_ATOMIC_LOAD_PTR_RELAXED(cls->_spec_cache.init); PyCodeObject *code = (PyCodeObject *)init_func->func_code; DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize + _Py_InitCleanup.co_framesize), CALL); STAT_INC(CALL, hit); From ad2e15ce5c91d40ff52e071eb6481b47495b5137 Mon Sep 17 00:00:00 2001 From: Matt Page Date: Mon, 18 Nov 2024 22:08:31 -0800 Subject: [PATCH 13/25] Stop the world around assignments to `tstate->eval_frame` --- Python/perf_trampoline.c | 14 +++++++++++--- Python/pystate.c | 2 ++ 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/Python/perf_trampoline.c b/Python/perf_trampoline.c index f144f7d436fe68..22921876d1654d 100644 --- a/Python/perf_trampoline.c +++ b/Python/perf_trampoline.c @@ -471,6 +471,14 @@ _PyPerfTrampoline_SetCallbacks(_PyPerf_Callbacks *callbacks) return 0; } +static void +set_eval_frame(PyThreadState *tstate, _PyFrameEvalFunction eval_frame) +{ + _PyEval_StopTheWorld(tstate->interp); + tstate->interp->eval_frame = eval_frame; + _PyEval_StartTheWorld(tstate->interp); +} + int _PyPerfTrampoline_Init(int activate) { @@ -484,11 +492,11 @@ _PyPerfTrampoline_Init(int activate) return -1; } if (!activate) { - tstate->interp->eval_frame = NULL; + set_eval_frame(tstate, NULL); perf_status = PERF_STATUS_NO_INIT; } else { - tstate->interp->eval_frame = py_trampoline_evaluator; + set_eval_frame(tstate, py_trampoline_evaluator); if (new_code_arena() < 0) { return -1; } @@ -514,7 +522,7 @@ _PyPerfTrampoline_Fini(void) } PyThreadState *tstate = _PyThreadState_GET(); if (tstate->interp->eval_frame == py_trampoline_evaluator) { - tstate->interp->eval_frame = NULL; + set_eval_frame(tstate, NULL); } if (perf_status == PERF_STATUS_OK) { trampoline_api.free_state(trampoline_api.state); diff --git a/Python/pystate.c b/Python/pystate.c index 44f55be5b5b7a8..5162e7555399cd 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -2842,7 +2842,9 @@ _PyInterpreterState_SetEvalFrameFunc(PyInterpreterState *interp, } #endif RARE_EVENT_INC(set_eval_frame_func); + _PyEval_StopTheWorld(interp); interp->eval_frame = eval_frame; + _PyEval_StartTheWorld(interp); } From 0003d00e1bd1865aaef7ef0240510e6e8bf0acaa Mon Sep 17 00:00:00 2001 From: Matt Page Date: Tue, 19 Nov 2024 17:04:49 -0800 Subject: [PATCH 14/25] Document restriction on _Py_InitCleanup bytecode --- Python/bytecodes.c | 1 + Python/executor_cases.c.h | 1 + Python/generated_cases.c.h | 1 + Python/specialize.c | 10 ++++++++++ 4 files changed, 13 insertions(+) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index c834ae8ce293e2..3a018a2cd82e7e 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -3722,6 +3722,7 @@ dummy_func( _PyInterpreterFrame *shim = _PyFrame_PushTrampolineUnchecked( tstate, (PyCodeObject *)&_Py_InitCleanup, 1, frame); assert(_PyFrame_GetBytecode(shim)[0].op.code == EXIT_INIT_CHECK); + assert(_PyFrame_GetBytecode(shim)[1].op.code == RETURN_VALUE); /* Push self onto stack of shim */ shim->localsplus[0] = PyStackRef_DUP(self[0]); DEAD(init); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index fdd98216736cc9..4698a398f9c5da 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -4499,6 +4499,7 @@ _PyInterpreterFrame *shim = _PyFrame_PushTrampolineUnchecked( tstate, (PyCodeObject *)&_Py_InitCleanup, 1, frame); assert(_PyFrame_GetBytecode(shim)[0].op.code == EXIT_INIT_CHECK); + assert(_PyFrame_GetBytecode(shim)[1].op.code == RETURN_VALUE); stack_pointer = _PyFrame_GetStackPointer(frame); /* Push self onto stack of shim */ shim->localsplus[0] = PyStackRef_DUP(self[0]); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index d9f71af7c2ba74..0fd17c2996950e 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -1067,6 +1067,7 @@ _PyInterpreterFrame *shim = _PyFrame_PushTrampolineUnchecked( tstate, (PyCodeObject *)&_Py_InitCleanup, 1, frame); assert(_PyFrame_GetBytecode(shim)[0].op.code == EXIT_INIT_CHECK); + assert(_PyFrame_GetBytecode(shim)[1].op.code == RETURN_VALUE); stack_pointer = _PyFrame_GetStackPointer(frame); /* Push self onto stack of shim */ shim->localsplus[0] = PyStackRef_DUP(self[0]); diff --git a/Python/specialize.c b/Python/specialize.c index 123625092c4681..fd3573143ee1f1 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -2799,6 +2799,16 @@ _Py_Specialize_ContainsOp(_PyStackRef value_st, _Py_CODEUNIT *instr) * Ends with a RESUME so that it is not traced. * This is used as a plain code object, not a function, * so must not access globals or builtins. + * There are a few other constraints imposed on the code + * by the free-threaded build: + * + * 1. The RESUME instruction must not be executed. Otherwise we may attempt to + * free the statically allocated TLBC array. + * 2. It must contain no specializable instructions. Specializing multiple + * copies of the same bytecode is not thread-safe in free-threaded builds. + * + * This should be dynamically allocated if either of those restrictions need to + * be lifted. */ #define NO_LOC_4 (128 | (PY_CODE_LOCATION_INFO_NONE << 3) | 3) From 8ebd3316e0da461d17e11dfff188b6dd87849e9b Mon Sep 17 00:00:00 2001 From: Matt Page Date: Wed, 20 Nov 2024 12:00:12 -0800 Subject: [PATCH 15/25] Undo refactor --- Python/specialize.c | 94 +++++++++++++++++++++++++-------------------- 1 file changed, 52 insertions(+), 42 deletions(-) diff --git a/Python/specialize.c b/Python/specialize.c index fd3573143ee1f1..7e9544e5dce309 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -1954,7 +1954,7 @@ get_init_for_simple_managed_python_class(PyTypeObject *tp, unsigned int *tp_vers return init; } -static void +static int specialize_class_call(PyObject *callable, _Py_CODEUNIT *instr, int nargs) { assert(PyType_Check(callable)); @@ -1964,20 +1964,20 @@ specialize_class_call(PyObject *callable, _Py_CODEUNIT *instr, int nargs) if (nargs == 1 && oparg == 1) { if (tp == &PyUnicode_Type) { specialize(instr, CALL_STR_1); - return; + return 0; } else if (tp == &PyType_Type) { specialize(instr, CALL_TYPE_1); - return; + return 0; } else if (tp == &PyTuple_Type) { specialize(instr, CALL_TUPLE_1); - return; + return 0; } } if (tp->tp_vectorcall != NULL) { specialize(instr, CALL_BUILTIN_CLASS); - return; + return 0; } goto generic; } @@ -1988,24 +1988,25 @@ specialize_class_call(PyObject *callable, _Py_CODEUNIT *instr, int nargs) unsigned int tp_version = 0; PyObject *init = get_init_for_simple_managed_python_class(tp, &tp_version); if (!tp_version) { - unspecialize(instr, SPEC_FAIL_OUT_OF_VERSIONS); + SPECIALIZATION_FAIL(CALL, SPEC_FAIL_OUT_OF_VERSIONS); Py_XDECREF(init); - return; + return -1; } if (init != NULL && _PyType_CacheInitForSpecialization(tp, init, tp_version)) { _PyCallCache *cache = (_PyCallCache *)(instr + 1); write_u32(cache->func_version, tp_version); specialize(instr, CALL_ALLOC_AND_ENTER_INIT); Py_DECREF(init); - return; + return 0; } Py_XDECREF(init); } generic: specialize(instr, CALL_NON_PY_GENERAL); + return 0; } -static void +static int specialize_method_descriptor(PyMethodDescrObject *descr, _Py_CODEUNIT *instr, int nargs) { @@ -2014,16 +2015,16 @@ specialize_method_descriptor(PyMethodDescrObject *descr, _Py_CODEUNIT *instr, METH_KEYWORDS | METH_METHOD)) { case METH_NOARGS: { if (nargs != 1) { - unspecialize(instr, SPEC_FAIL_WRONG_NUMBER_ARGUMENTS); - return; + SPECIALIZATION_FAIL(CALL, SPEC_FAIL_WRONG_NUMBER_ARGUMENTS); + return -1; } specialize(instr, CALL_METHOD_DESCRIPTOR_NOARGS); - return; + return 0; } case METH_O: { if (nargs != 2) { - unspecialize(instr, SPEC_FAIL_WRONG_NUMBER_ARGUMENTS); - return; + SPECIALIZATION_FAIL(CALL, SPEC_FAIL_WRONG_NUMBER_ARGUMENTS); + return -1; } PyInterpreterState *interp = _PyInterpreterState_GET(); PyObject *list_append = interp->callable_cache.list_append; @@ -2032,24 +2033,25 @@ specialize_method_descriptor(PyMethodDescrObject *descr, _Py_CODEUNIT *instr, int oparg = instr->op.arg; if ((PyObject *)descr == list_append && oparg == 1 && pop) { specialize(instr, CALL_LIST_APPEND); - return; + return 0; } specialize(instr, CALL_METHOD_DESCRIPTOR_O); - return; + return 0; } case METH_FASTCALL: { specialize(instr, CALL_METHOD_DESCRIPTOR_FAST); - return; + return 0; } case METH_FASTCALL | METH_KEYWORDS: { specialize(instr, CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS); - return; + return 0; } } specialize(instr, CALL_NON_PY_GENERAL); + return 0; } -static void +static int specialize_py_call(PyFunctionObject *func, _Py_CODEUNIT *instr, int nargs, bool bound_method) { @@ -2058,21 +2060,21 @@ specialize_py_call(PyFunctionObject *func, _Py_CODEUNIT *instr, int nargs, int kind = function_kind(code); /* Don't specialize if PEP 523 is active */ if (_PyInterpreterState_GET()->eval_frame) { - unspecialize(instr, SPEC_FAIL_CALL_PEP_523); - return; + SPECIALIZATION_FAIL(CALL, SPEC_FAIL_CALL_PEP_523); + return -1; } int argcount = -1; if (kind == SPEC_FAIL_CODE_NOT_OPTIMIZED) { - unspecialize(instr, SPEC_FAIL_CODE_NOT_OPTIMIZED); - return; + SPECIALIZATION_FAIL(CALL, SPEC_FAIL_CODE_NOT_OPTIMIZED); + return -1; } if (kind == SIMPLE_FUNCTION) { argcount = code->co_argcount; } int version = _PyFunction_GetVersionForCurrentState(func); if (!_PyFunction_IsVersionValid(version)) { - unspecialize(instr, SPEC_FAIL_OUT_OF_VERSIONS); - return; + SPECIALIZATION_FAIL(CALL, SPEC_FAIL_OUT_OF_VERSIONS); + return -1; } write_u32(cache->func_version, version); uint8_t opcode; @@ -2084,8 +2086,10 @@ specialize_py_call(PyFunctionObject *func, _Py_CODEUNIT *instr, int nargs, opcode = bound_method ? CALL_BOUND_METHOD_GENERAL : CALL_PY_GENERAL; } specialize(instr, opcode); + return 0; } + static int specialize_py_call_kw(PyFunctionObject *func, _Py_CODEUNIT *instr, int nargs, bool bound_method) @@ -2112,29 +2116,29 @@ specialize_py_call_kw(PyFunctionObject *func, _Py_CODEUNIT *instr, int nargs, return 0; } -static void +static int specialize_c_call(PyObject *callable, _Py_CODEUNIT *instr, int nargs) { if (PyCFunction_GET_FUNCTION(callable) == NULL) { - unspecialize(instr, SPEC_FAIL_OTHER); - return; + SPECIALIZATION_FAIL(CALL, SPEC_FAIL_OTHER); + return 1; } switch (PyCFunction_GET_FLAGS(callable) & (METH_VARARGS | METH_FASTCALL | METH_NOARGS | METH_O | METH_KEYWORDS | METH_METHOD)) { case METH_O: { if (nargs != 1) { - unspecialize(instr, SPEC_FAIL_WRONG_NUMBER_ARGUMENTS); - return; + SPECIALIZATION_FAIL(CALL, SPEC_FAIL_WRONG_NUMBER_ARGUMENTS); + return 1; } /* len(o) */ PyInterpreterState *interp = _PyInterpreterState_GET(); if (callable == interp->callable_cache.len) { specialize(instr, CALL_LEN); - return; + return 0; } specialize(instr, CALL_BUILTIN_O); - return; + return 0; } case METH_FASTCALL: { if (nargs == 2) { @@ -2142,19 +2146,19 @@ specialize_c_call(PyObject *callable, _Py_CODEUNIT *instr, int nargs) PyInterpreterState *interp = _PyInterpreterState_GET(); if (callable == interp->callable_cache.isinstance) { specialize(instr, CALL_ISINSTANCE); - return; + return 0; } } specialize(instr, CALL_BUILTIN_FAST); - return; + return 0; } case METH_FASTCALL | METH_KEYWORDS: { specialize(instr, CALL_BUILTIN_FAST_WITH_KEYWORDS); - return; + return 0; } default: specialize(instr, CALL_NON_PY_GENERAL); - return; + return 0; } } @@ -2166,29 +2170,35 @@ _Py_Specialize_Call(_PyStackRef callable_st, _Py_CODEUNIT *instr, int nargs) assert(ENABLE_SPECIALIZATION_FT); assert(_PyOpcode_Caches[CALL] == INLINE_CACHE_ENTRIES_CALL); assert(_Py_OPCODE(*instr) != INSTRUMENTED_CALL); + int fail; if (PyCFunction_CheckExact(callable)) { - specialize_c_call(callable, instr, nargs); + fail = specialize_c_call(callable, instr, nargs); } else if (PyFunction_Check(callable)) { - specialize_py_call((PyFunctionObject *)callable, instr, nargs, false); + fail = specialize_py_call((PyFunctionObject *)callable, instr, nargs, false); } else if (PyType_Check(callable)) { - specialize_class_call(callable, instr, nargs); + fail = specialize_class_call(callable, instr, nargs); } else if (Py_IS_TYPE(callable, &PyMethodDescr_Type)) { - specialize_method_descriptor((PyMethodDescrObject *)callable, instr, nargs); + fail = specialize_method_descriptor((PyMethodDescrObject *)callable, instr, nargs); } else if (PyMethod_Check(callable)) { PyObject *func = ((PyMethodObject *)callable)->im_func; if (PyFunction_Check(func)) { - specialize_py_call((PyFunctionObject *)func, instr, nargs, true); + fail = specialize_py_call((PyFunctionObject *)func, instr, nargs, true); } else { - unspecialize(instr, SPEC_FAIL_CALL_BOUND_METHOD); + SPECIALIZATION_FAIL(CALL, SPEC_FAIL_CALL_BOUND_METHOD); + fail = -1; } } else { specialize(instr, CALL_NON_PY_GENERAL); + fail = 0; + } + if (fail) { + unspecialize(instr); } } From 4c1ad6c08a6fb867c1bd1389e4b5c452592d5f5e Mon Sep 17 00:00:00 2001 From: Matt Page Date: Wed, 20 Nov 2024 15:41:59 -0800 Subject: [PATCH 16/25] Undo workaround for now-fixed cases_generator bug --- Python/bytecodes.c | 3 +-- Python/executor_cases.c.h | 3 +-- Python/generated_cases.c.h | 3 +-- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 3a018a2cd82e7e..f35ad07e912e97 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -3982,8 +3982,7 @@ dummy_func( DEOPT_IF(!PyList_Check(self_o)); STAT_INC(CALL, hit); #ifdef Py_GIL_DISABLED - int err; - err = _PyList_AppendTakeRefAndLock((PyListObject *)self_o, PyStackRef_AsPyObjectSteal(arg)); + int err = _PyList_AppendTakeRefAndLock((PyListObject *)self_o, PyStackRef_AsPyObjectSteal(arg)); #else int err = _PyList_AppendTakeRef((PyListObject *)self_o, PyStackRef_AsPyObjectSteal(arg)); #endif diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 4698a398f9c5da..75b4a43e3148ca 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -4873,9 +4873,8 @@ } STAT_INC(CALL, hit); #ifdef Py_GIL_DISABLED - int err; _PyFrame_SetStackPointer(frame, stack_pointer); - err = _PyList_AppendTakeRefAndLock((PyListObject *)self_o, PyStackRef_AsPyObjectSteal(arg)); + int err = _PyList_AppendTakeRefAndLock((PyListObject *)self_o, PyStackRef_AsPyObjectSteal(arg)); stack_pointer = _PyFrame_GetStackPointer(frame); #else int err = _PyList_AppendTakeRef((PyListObject *)self_o, PyStackRef_AsPyObjectSteal(arg)); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 0fd17c2996950e..19a4c889de9cb1 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -2380,9 +2380,8 @@ DEOPT_IF(!PyList_Check(self_o), CALL); STAT_INC(CALL, hit); #ifdef Py_GIL_DISABLED - int err; _PyFrame_SetStackPointer(frame, stack_pointer); - err = _PyList_AppendTakeRefAndLock((PyListObject *)self_o, PyStackRef_AsPyObjectSteal(arg)); + int err = _PyList_AppendTakeRefAndLock((PyListObject *)self_o, PyStackRef_AsPyObjectSteal(arg)); stack_pointer = _PyFrame_GetStackPointer(frame); #else int err = _PyList_AppendTakeRef((PyListObject *)self_o, PyStackRef_AsPyObjectSteal(arg)); From 57ba52dabcfa3c23c24c9e525ada546ce8bd99ca Mon Sep 17 00:00:00 2001 From: Matt Page Date: Wed, 20 Nov 2024 16:19:20 -0800 Subject: [PATCH 17/25] Document _PyType_CacheInitForSpecialization --- Include/internal/pycore_object.h | 14 ++++++++++++-- Objects/typeobject.c | 8 +++----- Python/specialize.c | 3 ++- 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index 135041a155ac5c..64b19e69e8e243 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -822,8 +822,18 @@ extern bool _PyObject_TryGetInstanceAttribute(PyObject *obj, PyObject *name, PyObject **attr); extern PyObject *_PyType_LookupRefAndVersion(PyTypeObject *, PyObject *, unsigned int *); -extern int _PyType_CacheInitForSpecialization(PyTypeObject *, PyObject *, - unsigned int); + +// Cache the provided init method in the specialization cache of type if the +// provided type version matches the current version of the type. +// +// The cached value is borrowed and is only valid if guarded by a type +// version check. In free-threaded builds the init method must also use +// deferred reference counting. +// +// Returns 1 if the value was cached or 0 otherwise. +extern int _PyType_CacheInitForSpecialization(PyHeapTypeObject *type, + PyObject *init, + unsigned int tp_version); #ifdef Py_GIL_DISABLED # define MANAGED_DICT_OFFSET (((Py_ssize_t)sizeof(PyObject *))*-1) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 5a36e114628d9e..de5fd9546ff48d 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -5658,9 +5658,8 @@ _PyType_Lookup(PyTypeObject *type, PyObject *name) return res; } - int -_PyType_CacheInitForSpecialization(PyTypeObject *type, PyObject *init, +_PyType_CacheInitForSpecialization(PyHeapTypeObject *type, PyObject *init, unsigned int tp_version) { if (!init || !tp_version) { @@ -5668,13 +5667,12 @@ _PyType_CacheInitForSpecialization(PyTypeObject *type, PyObject *init, } int can_cache; BEGIN_TYPE_LOCK(); - can_cache = type->tp_version_tag == tp_version; + can_cache = ((PyTypeObject*)type)->tp_version_tag == tp_version; #ifdef Py_GIL_DISABLED can_cache = can_cache && _PyObject_HasDeferredRefcount(init); #endif if (can_cache) { - PyHeapTypeObject *ht = (PyHeapTypeObject*) type; - FT_ATOMIC_STORE_PTR_RELAXED(ht->_spec_cache.init, init); + FT_ATOMIC_STORE_PTR_RELAXED(type->_spec_cache.init, init); } END_TYPE_LOCK(); return can_cache; diff --git a/Python/specialize.c b/Python/specialize.c index 7e9544e5dce309..732dc21ad9137f 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -1992,7 +1992,8 @@ specialize_class_call(PyObject *callable, _Py_CODEUNIT *instr, int nargs) Py_XDECREF(init); return -1; } - if (init != NULL && _PyType_CacheInitForSpecialization(tp, init, tp_version)) { + if (init != NULL && _PyType_CacheInitForSpecialization( + (PyHeapTypeObject *)tp, init, tp_version)) { _PyCallCache *cache = (_PyCallCache *)(instr + 1); write_u32(cache->func_version, tp_version); specialize(instr, CALL_ALLOC_AND_ENTER_INIT); From 8ebd73db82ea765bc483354025d22c48fdfb7db8 Mon Sep 17 00:00:00 2001 From: Matt Page Date: Wed, 20 Nov 2024 22:27:45 -0800 Subject: [PATCH 18/25] Remove unused define --- Python/specialize.c | 1 - 1 file changed, 1 deletion(-) diff --git a/Python/specialize.c b/Python/specialize.c index 732dc21ad9137f..c4e2ecd3a3d03c 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -608,7 +608,6 @@ _PyCode_Quicken(_Py_CODEUNIT *instructions, Py_ssize_t size, PyObject *consts, #define SPEC_FAIL_CALL_INIT_NOT_SIMPLE 30 #define SPEC_FAIL_CALL_METACLASS 31 #define SPEC_FAIL_CALL_INIT_NOT_INLINE_VALUES 32 -#define SPEC_FAIL_CALL_NO_TYPE_VERSION 33 /* COMPARE_OP */ #define SPEC_FAIL_COMPARE_OP_DIFFERENT_TYPES 12 From 8651ebea93e35bc17877ee908cfb0a0c975f41a4 Mon Sep 17 00:00:00 2001 From: Matt Page Date: Thu, 21 Nov 2024 15:05:19 -0800 Subject: [PATCH 19/25] Enable tests --- Lib/test/test_monitoring.py | 17 ++++++++++------- Lib/test/test_opcache.py | 13 ++++++++----- Lib/test/test_type_cache.py | 9 +++++++-- 3 files changed, 25 insertions(+), 14 deletions(-) diff --git a/Lib/test/test_monitoring.py b/Lib/test/test_monitoring.py index b640aa08e4a812..df74899ac87c00 100644 --- a/Lib/test/test_monitoring.py +++ b/Lib/test/test_monitoring.py @@ -11,7 +11,7 @@ import unittest import test.support -from test.support import requires_specialization, script_helper +from test.support import requires_specialization_ft, script_helper from test.support.import_helper import import_module _testcapi = test.support.import_helper.import_module("_testcapi") @@ -850,6 +850,13 @@ def __init__(self, events): def __call__(self, code, offset, val): self.events.append(("return", code.co_name, val)) +# CALL_ALLOC_AND_ENTER_INIT will only cache __init__ methods that are +# deferred. We only defer functions defined at the top-level. +class ValueErrorRaiser: + def __init__(self): + raise ValueError() + + class ExceptionMonitoringTest(CheckEvents): exception_recorders = ( @@ -1045,16 +1052,12 @@ def func(): ) self.assertEqual(events[0], ("throw", IndexError)) - @requires_specialization + @requires_specialization_ft def test_no_unwind_for_shim_frame(self): - class B: - def __init__(self): - raise ValueError() - def f(): try: - return B() + return ValueErrorRaiser() except ValueError: pass diff --git a/Lib/test/test_opcache.py b/Lib/test/test_opcache.py index 78e4bf44f7ea0c..e69a65604e5121 100644 --- a/Lib/test/test_opcache.py +++ b/Lib/test/test_opcache.py @@ -493,6 +493,13 @@ def f(): self.assertFalse(f()) +# CALL_ALLOC_AND_ENTER_INIT will only cache __init__ methods that are +# deferred. We only defer functions defined at the top-level. +class MyClass: + def __init__(self): + pass + + class TestCallCache(TestBase): def test_too_many_defaults_0(self): def f(): @@ -522,12 +529,8 @@ def f(x, y): f() @disabling_optimizer - @requires_specialization + @requires_specialization_ft def test_assign_init_code(self): - class MyClass: - def __init__(self): - pass - def instantiate(): return MyClass() diff --git a/Lib/test/test_type_cache.py b/Lib/test/test_type_cache.py index 66abe73f8d766d..e109a65741309a 100644 --- a/Lib/test/test_type_cache.py +++ b/Lib/test/test_type_cache.py @@ -2,7 +2,7 @@ import unittest import dis from test import support -from test.support import import_helper, requires_specialization +from test.support import import_helper, requires_specialization, requires_specialization_ft try: from sys import _clear_type_cache except ImportError: @@ -110,7 +110,6 @@ class HolderSub(Holder): HolderSub.value @support.cpython_only -@requires_specialization class TypeCacheWithSpecializationTests(unittest.TestCase): def tearDown(self): _clear_type_cache() @@ -140,6 +139,7 @@ def _check_specialization(self, func, arg, opname, *, should_specialize): else: self.assertIn(opname, self._all_opnames(func)) + @requires_specialization def test_class_load_attr_specialization_user_type(self): class A: def foo(self): @@ -160,6 +160,7 @@ def load_foo_2(type_): self._check_specialization(load_foo_2, A, "LOAD_ATTR", should_specialize=False) + @requires_specialization def test_class_load_attr_specialization_static_type(self): self.assertNotEqual(type_get_version(str), 0) self.assertNotEqual(type_get_version(bytes), 0) @@ -171,6 +172,7 @@ def get_capitalize_1(type_): self.assertEqual(get_capitalize_1(str)('hello'), 'Hello') self.assertEqual(get_capitalize_1(bytes)(b'hello'), b'Hello') + @requires_specialization def test_property_load_attr_specialization_user_type(self): class G: @property @@ -192,6 +194,7 @@ def load_x_2(instance): self._check_specialization(load_x_2, G(), "LOAD_ATTR", should_specialize=False) + @requires_specialization def test_store_attr_specialization_user_type(self): class B: __slots__ = ("bar",) @@ -211,6 +214,7 @@ def store_bar_2(type_): self._check_specialization(store_bar_2, B(), "STORE_ATTR", should_specialize=False) + @requires_specialization_ft def test_class_call_specialization_user_type(self): class F: def __init__(self): @@ -231,6 +235,7 @@ def call_class_2(type_): self._check_specialization(call_class_2, F, "CALL", should_specialize=False) + @requires_specialization def test_to_bool_specialization_user_type(self): class H: pass From 4c7837f383582b03abbe67cd3b8b10450818ed70 Mon Sep 17 00:00:00 2001 From: Matt Page Date: Thu, 21 Nov 2024 16:51:33 -0800 Subject: [PATCH 20/25] Fix warning about unused function on macos --- Python/perf_trampoline.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Python/perf_trampoline.c b/Python/perf_trampoline.c index 22921876d1654d..9b8249419970ab 100644 --- a/Python/perf_trampoline.c +++ b/Python/perf_trampoline.c @@ -471,6 +471,7 @@ _PyPerfTrampoline_SetCallbacks(_PyPerf_Callbacks *callbacks) return 0; } +#ifdef PY_HAVE_PERF_TRAMPOLINE static void set_eval_frame(PyThreadState *tstate, _PyFrameEvalFunction eval_frame) { @@ -478,6 +479,7 @@ set_eval_frame(PyThreadState *tstate, _PyFrameEvalFunction eval_frame) tstate->interp->eval_frame = eval_frame; _PyEval_StartTheWorld(tstate->interp); } +#endif int _PyPerfTrampoline_Init(int activate) From d8a67c2ce2cccd6b133bff270e2e18bf5a75bccd Mon Sep 17 00:00:00 2001 From: Matt Page Date: Mon, 25 Nov 2024 15:26:44 -0800 Subject: [PATCH 21/25] Tag workarounds for not deferring nested functions on classes with gh issue --- Lib/test/test_monitoring.py | 4 ++-- Lib/test/test_opcache.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Lib/test/test_monitoring.py b/Lib/test/test_monitoring.py index df74899ac87c00..5a4bcebedf19de 100644 --- a/Lib/test/test_monitoring.py +++ b/Lib/test/test_monitoring.py @@ -850,8 +850,8 @@ def __init__(self, events): def __call__(self, code, offset, val): self.events.append(("return", code.co_name, val)) -# CALL_ALLOC_AND_ENTER_INIT will only cache __init__ methods that are -# deferred. We only defer functions defined at the top-level. +# gh-127274: CALL_ALLOC_AND_ENTER_INIT will only cache __init__ methods that +# are deferred. We only defer functions defined at the top-level. class ValueErrorRaiser: def __init__(self): raise ValueError() diff --git a/Lib/test/test_opcache.py b/Lib/test/test_opcache.py index 1fd200501a3d55..a85683f2a50268 100644 --- a/Lib/test/test_opcache.py +++ b/Lib/test/test_opcache.py @@ -493,8 +493,8 @@ def f(): self.assertFalse(f()) -# CALL_ALLOC_AND_ENTER_INIT will only cache __init__ methods that are -# deferred. We only defer functions defined at the top-level. +# gh-127274: CALL_ALLOC_AND_ENTER_INIT will only cache __init__ methods that +# are deferred. We only defer functions defined at the top-level. class MyClass: def __init__(self): pass From 3e8d85e9a7903796f8ebd0ba08139c4f3f3bb786 Mon Sep 17 00:00:00 2001 From: Matt Page Date: Mon, 25 Nov 2024 15:42:50 -0800 Subject: [PATCH 22/25] Use `_PyInterpreterState_SetEvalFrameFunc` when setting / clearing perf trampoline --- Python/perf_trampoline.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/Python/perf_trampoline.c b/Python/perf_trampoline.c index 9b8249419970ab..ad077dc861b0a7 100644 --- a/Python/perf_trampoline.c +++ b/Python/perf_trampoline.c @@ -471,16 +471,6 @@ _PyPerfTrampoline_SetCallbacks(_PyPerf_Callbacks *callbacks) return 0; } -#ifdef PY_HAVE_PERF_TRAMPOLINE -static void -set_eval_frame(PyThreadState *tstate, _PyFrameEvalFunction eval_frame) -{ - _PyEval_StopTheWorld(tstate->interp); - tstate->interp->eval_frame = eval_frame; - _PyEval_StartTheWorld(tstate->interp); -} -#endif - int _PyPerfTrampoline_Init(int activate) { @@ -494,11 +484,11 @@ _PyPerfTrampoline_Init(int activate) return -1; } if (!activate) { - set_eval_frame(tstate, NULL); + _PyInterpreterState_SetEvalFrameFunc(tstate->interp, NULL); perf_status = PERF_STATUS_NO_INIT; } else { - set_eval_frame(tstate, py_trampoline_evaluator); + _PyInterpreterState_SetEvalFrameFunc(tstate->interp, py_trampoline_evaluator); if (new_code_arena() < 0) { return -1; } @@ -524,7 +514,7 @@ _PyPerfTrampoline_Fini(void) } PyThreadState *tstate = _PyThreadState_GET(); if (tstate->interp->eval_frame == py_trampoline_evaluator) { - set_eval_frame(tstate, NULL); + _PyInterpreterState_SetEvalFrameFunc(tstate->interp, NULL); } if (perf_status == PERF_STATUS_OK) { trampoline_api.free_state(trampoline_api.state); From de0e2ee20f501bdfbd57613981985e011c4e23bd Mon Sep 17 00:00:00 2001 From: Matt Page Date: Tue, 26 Nov 2024 13:48:59 -0800 Subject: [PATCH 23/25] Fix issue with `_CREATE_INIT_FRAME` Fix a bug in `_CREATE_INIT_FRAME` where the frame is pushed to the stack on failure. `_CREATE_INIT_FRAME` pushes a pointer to the new frame onto the stack for consumption by the next uop. When pushing the frame fails, we do not want to push the result (NULL) to the stack because it is not a valid stackref and will be exposed to the generic error handling code in the interpreter loop. This worked in default builds because `PyStackRef_NULL` is `NULL` in default builds, which is not the case in free-threaded builds. --- Lib/test/test_opcache.py | 19 +++++++++++++++++++ Python/bytecodes.c | 5 +++-- Python/executor_cases.c.h | 11 +++++++---- Python/generated_cases.c.h | 10 ++++------ 4 files changed, 33 insertions(+), 12 deletions(-) diff --git a/Lib/test/test_opcache.py b/Lib/test/test_opcache.py index a85683f2a50268..68e11aa19ed79b 100644 --- a/Lib/test/test_opcache.py +++ b/Lib/test/test_opcache.py @@ -500,6 +500,11 @@ def __init__(self): pass +class InitTakesArg: + def __init__(self, arg): + self.arg = arg + + class TestCallCache(TestBase): def test_too_many_defaults_0(self): def f(): @@ -547,6 +552,20 @@ def count_args(self, *args): MyClass.__init__.__code__ = count_args.__code__ instantiate() + @disabling_optimizer + @requires_specialization_ft + def test_push_init_frame_fails(self): + def instantiate(): + return InitTakesArg() + + for _ in range(2): + with self.assertRaises(TypeError): + instantiate() + self.assert_specialized(instantiate, "CALL_ALLOC_AND_ENTER_INIT") + + with self.assertRaises(TypeError): + instantiate() + @threading_helper.requires_working_threading() class TestRacesDoNotCrash(TestBase): diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 03f305ef163693..48210b193a0e42 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -3744,13 +3744,14 @@ dummy_func( shim->localsplus[0] = PyStackRef_DUP(self[0]); DEAD(init); DEAD(self); - init_frame = _PyEvalFramePushAndInit( + _PyInterpreterFrame *temp = _PyEvalFramePushAndInit( tstate, init[0], NULL, args-1, oparg+1, NULL, shim); SYNC_SP(); - if (init_frame == NULL) { + if (temp == NULL) { _PyEval_FrameClearAndPop(tstate, shim); ERROR_NO_POP(); } + init_frame = temp; frame->return_offset = 1 + INLINE_CACHE_ENTRIES_CALL; /* Account for pushing the extra frame. * We don't check recursion depth here, diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index aa168bb0976657..b7e58b41ae50b2 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -4533,21 +4533,24 @@ /* Push self onto stack of shim */ shim->localsplus[0] = PyStackRef_DUP(self[0]); _PyFrame_SetStackPointer(frame, stack_pointer); - init_frame = _PyEvalFramePushAndInit( + _PyInterpreterFrame *temp = _PyEvalFramePushAndInit( tstate, init[0], NULL, args-1, oparg+1, NULL, shim); stack_pointer = _PyFrame_GetStackPointer(frame); - stack_pointer[-2 - oparg].bits = (uintptr_t)init_frame; - stack_pointer += -1 - oparg; + stack_pointer += -2 - oparg; assert(WITHIN_STACK_BOUNDS()); - if (init_frame == NULL) { + if (temp == NULL) { _PyEval_FrameClearAndPop(tstate, shim); JUMP_TO_ERROR(); } + init_frame = temp; frame->return_offset = 1 + INLINE_CACHE_ENTRIES_CALL; /* Account for pushing the extra frame. * We don't check recursion depth here, * as it will be checked after start_frame */ tstate->py_recursion_remaining--; + stack_pointer[0].bits = (uintptr_t)init_frame; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); break; } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 74508d1925ecce..22d72b9d7121de 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -1070,16 +1070,16 @@ /* Push self onto stack of shim */ shim->localsplus[0] = PyStackRef_DUP(self[0]); _PyFrame_SetStackPointer(frame, stack_pointer); - init_frame = _PyEvalFramePushAndInit( + _PyInterpreterFrame *temp = _PyEvalFramePushAndInit( tstate, init[0], NULL, args-1, oparg+1, NULL, shim); stack_pointer = _PyFrame_GetStackPointer(frame); - stack_pointer[-2 - oparg].bits = (uintptr_t)init_frame; - stack_pointer += -1 - oparg; + stack_pointer += -2 - oparg; assert(WITHIN_STACK_BOUNDS()); - if (init_frame == NULL) { + if (temp == NULL) { _PyEval_FrameClearAndPop(tstate, shim); goto error; } + init_frame = temp; frame->return_offset = 1 + INLINE_CACHE_ENTRIES_CALL; /* Account for pushing the extra frame. * We don't check recursion depth here, @@ -1093,8 +1093,6 @@ // Eventually this should be the only occurrence of this code. assert(tstate->interp->eval_frame == NULL); _PyInterpreterFrame *temp = new_frame; - stack_pointer += -1; - assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); assert(new_frame->previous == frame || new_frame->previous->previous == frame); CALL_STAT_INC(inlined_py_calls); From b3aa63cd24181b73eb03b1af3346b59f52ff997f Mon Sep 17 00:00:00 2001 From: Matt Page Date: Mon, 2 Dec 2024 16:32:49 -0800 Subject: [PATCH 24/25] Use release/acquire for the specialization cache --- Objects/typeobject.c | 2 +- Python/bytecodes.c | 2 +- Python/executor_cases.c.h | 2 +- Python/generated_cases.c.h | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 001b8aa27a3267..33cedc1216c56c 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -5672,7 +5672,7 @@ _PyType_CacheInitForSpecialization(PyHeapTypeObject *type, PyObject *init, can_cache = can_cache && _PyObject_HasDeferredRefcount(init); #endif if (can_cache) { - FT_ATOMIC_STORE_PTR_RELAXED(type->_spec_cache.init, init); + FT_ATOMIC_STORE_PTR_RELEASE(type->_spec_cache.init, init); } END_TYPE_LOCK(); return can_cache; diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 381274d6717d44..757e334a740526 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -3725,7 +3725,7 @@ dummy_func( DEOPT_IF(FT_ATOMIC_LOAD_UINT32_RELAXED(tp->tp_version_tag) != type_version); assert(tp->tp_flags & Py_TPFLAGS_INLINE_VALUES); PyHeapTypeObject *cls = (PyHeapTypeObject *)callable_o; - PyFunctionObject *init_func = (PyFunctionObject *)FT_ATOMIC_LOAD_PTR_RELAXED(cls->_spec_cache.init); + PyFunctionObject *init_func = (PyFunctionObject *)FT_ATOMIC_LOAD_PTR_ACQUIRE(cls->_spec_cache.init); PyCodeObject *code = (PyCodeObject *)init_func->func_code; DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize + _Py_InitCleanup.co_framesize)); STAT_INC(CALL, hit); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 03d1d49c344104..5755a4f196fccc 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -4506,7 +4506,7 @@ } assert(tp->tp_flags & Py_TPFLAGS_INLINE_VALUES); PyHeapTypeObject *cls = (PyHeapTypeObject *)callable_o; - PyFunctionObject *init_func = (PyFunctionObject *)FT_ATOMIC_LOAD_PTR_RELAXED(cls->_spec_cache.init); + PyFunctionObject *init_func = (PyFunctionObject *)FT_ATOMIC_LOAD_PTR_ACQUIRE(cls->_spec_cache.init); PyCodeObject *code = (PyCodeObject *)init_func->func_code; if (!_PyThreadState_HasStackSpace(tstate, code->co_framesize + _Py_InitCleanup.co_framesize)) { UOP_STAT_INC(uopcode, miss); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index c36ee665f0c44c..1f3017f0649d02 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -1051,7 +1051,7 @@ DEOPT_IF(FT_ATOMIC_LOAD_UINT32_RELAXED(tp->tp_version_tag) != type_version, CALL); assert(tp->tp_flags & Py_TPFLAGS_INLINE_VALUES); PyHeapTypeObject *cls = (PyHeapTypeObject *)callable_o; - PyFunctionObject *init_func = (PyFunctionObject *)FT_ATOMIC_LOAD_PTR_RELAXED(cls->_spec_cache.init); + PyFunctionObject *init_func = (PyFunctionObject *)FT_ATOMIC_LOAD_PTR_ACQUIRE(cls->_spec_cache.init); PyCodeObject *code = (PyCodeObject *)init_func->func_code; DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize + _Py_InitCleanup.co_framesize), CALL); STAT_INC(CALL, hit); From 6b591c321087c1a567da841fd376feecc1e989e3 Mon Sep 17 00:00:00 2001 From: Matt Page Date: Mon, 2 Dec 2024 16:45:11 -0800 Subject: [PATCH 25/25] Use locking macros instead of helper function --- Include/internal/pycore_list.h | 3 --- Include/internal/pycore_opcode_metadata.h | 2 +- Include/internal/pycore_uop_metadata.h | 2 +- Objects/listobject.c | 10 ---------- Python/bytecodes.c | 6 ++---- Python/executor_cases.c.h | 11 +++++------ Python/generated_cases.c.h | 8 ++------ 7 files changed, 11 insertions(+), 31 deletions(-) diff --git a/Include/internal/pycore_list.h b/Include/internal/pycore_list.h index 0027baa6c32134..f03e484f5ef8b0 100644 --- a/Include/internal/pycore_list.h +++ b/Include/internal/pycore_list.h @@ -40,9 +40,6 @@ _PyList_AppendTakeRef(PyListObject *self, PyObject *newitem) return _PyList_AppendTakeRefListResize(self, newitem); } -// Like _PyList_AppendTakeRef, but locks self in free-threaded builds. -extern int _PyList_AppendTakeRefAndLock(PyListObject *self, PyObject *newitem); - // Repeat the bytes of a buffer in place static inline void _Py_memory_repeat(char* dest, Py_ssize_t len_dest, Py_ssize_t len_src) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 83f3d5f0dcd24b..81dde66a6f26c2 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1979,7 +1979,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[266] = { [CALL_KW_NON_PY] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [CALL_KW_PY] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, [CALL_LEN] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, - [CALL_LIST_APPEND] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [CALL_LIST_APPEND] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG }, [CALL_METHOD_DESCRIPTOR_FAST] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [CALL_METHOD_DESCRIPTOR_NOARGS] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index dc75adb269d831..89fce193f40bd8 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -242,7 +242,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_CALL_BUILTIN_FAST_WITH_KEYWORDS] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_CALL_LEN] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, [_CALL_ISINSTANCE] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, - [_CALL_LIST_APPEND] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_CALL_LIST_APPEND] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG, [_CALL_METHOD_DESCRIPTOR_O] = HAS_ARG_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS] = HAS_ARG_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_CALL_METHOD_DESCRIPTOR_NOARGS] = HAS_ARG_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, diff --git a/Objects/listobject.c b/Objects/listobject.c index 1a39164c675245..8abe9e8933420b 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -512,16 +512,6 @@ PyList_Append(PyObject *op, PyObject *newitem) return -1; } -int -_PyList_AppendTakeRefAndLock(PyListObject *self, PyObject *newitem) -{ - int ret; - Py_BEGIN_CRITICAL_SECTION(self); - ret = _PyList_AppendTakeRef((PyListObject *)self, newitem); - Py_END_CRITICAL_SECTION(); - return ret; -} - /* Methods */ static void diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 757e334a740526..02c2e23f74fdb3 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -4002,12 +4002,10 @@ dummy_func( DEOPT_IF(callable_o != interp->callable_cache.list_append); assert(self_o != NULL); DEOPT_IF(!PyList_Check(self_o)); + DEOPT_IF(!LOCK_OBJECT(self_o)); STAT_INC(CALL, hit); - #ifdef Py_GIL_DISABLED - int err = _PyList_AppendTakeRefAndLock((PyListObject *)self_o, PyStackRef_AsPyObjectSteal(arg)); - #else int err = _PyList_AppendTakeRef((PyListObject *)self_o, PyStackRef_AsPyObjectSteal(arg)); - #endif + UNLOCK_OBJECT(self_o); PyStackRef_CLOSE(self); PyStackRef_CLOSE(callable); ERROR_IF(err, error); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 5755a4f196fccc..987ff2e6419669 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -4912,14 +4912,13 @@ UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } + if (!LOCK_OBJECT(self_o)) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } STAT_INC(CALL, hit); - #ifdef Py_GIL_DISABLED - _PyFrame_SetStackPointer(frame, stack_pointer); - int err = _PyList_AppendTakeRefAndLock((PyListObject *)self_o, PyStackRef_AsPyObjectSteal(arg)); - stack_pointer = _PyFrame_GetStackPointer(frame); - #else int err = _PyList_AppendTakeRef((PyListObject *)self_o, PyStackRef_AsPyObjectSteal(arg)); - #endif + UNLOCK_OBJECT(self_o); PyStackRef_CLOSE(self); PyStackRef_CLOSE(callable); if (err) JUMP_TO_ERROR(); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 1f3017f0649d02..10a1ccd17bc28e 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -2382,14 +2382,10 @@ DEOPT_IF(callable_o != interp->callable_cache.list_append, CALL); assert(self_o != NULL); DEOPT_IF(!PyList_Check(self_o), CALL); + DEOPT_IF(!LOCK_OBJECT(self_o), CALL); STAT_INC(CALL, hit); - #ifdef Py_GIL_DISABLED - _PyFrame_SetStackPointer(frame, stack_pointer); - int err = _PyList_AppendTakeRefAndLock((PyListObject *)self_o, PyStackRef_AsPyObjectSteal(arg)); - stack_pointer = _PyFrame_GetStackPointer(frame); - #else int err = _PyList_AppendTakeRef((PyListObject *)self_o, PyStackRef_AsPyObjectSteal(arg)); - #endif + UNLOCK_OBJECT(self_o); PyStackRef_CLOSE(self); PyStackRef_CLOSE(callable); if (err) goto pop_3_error;