From 21dc79ce1011d94c0aa7fc1f6435490e9fc96b22 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Mon, 18 Mar 2024 16:16:11 -0700 Subject: [PATCH 01/13] Leave func_version zero after critical mutations Also assignment to `__annotations__` is not critical. --- Objects/funcobject.c | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/Objects/funcobject.c b/Objects/funcobject.c index a506166916de48..5bbf3946b717ee 100644 --- a/Objects/funcobject.c +++ b/Objects/funcobject.c @@ -291,19 +291,7 @@ _PyFunction_LookupByVersion(uint32_t version) uint32_t _PyFunction_GetVersionForCurrentState(PyFunctionObject *func) { - if (func->func_version != 0) { - return func->func_version; - } - if (func->vectorcall != _PyFunction_Vectorcall) { - return 0; - } - PyInterpreterState *interp = _PyInterpreterState_GET(); - if (interp->func_state.next_version == 0) { - return 0; - } - uint32_t v = interp->func_state.next_version++; - _PyFunction_SetVersion(func, v); - return v; + return func->func_version; } PyObject * @@ -507,7 +495,6 @@ PyFunction_SetAnnotations(PyObject *op, PyObject *annotations) "non-dict annotations"); return -1; } - _PyFunction_SetVersion((PyFunctionObject *)op, 0); Py_XSETREF(((PyFunctionObject *)op)->func_annotations, annotations); return 0; } @@ -731,7 +718,6 @@ func_set_annotations(PyFunctionObject *op, PyObject *value, void *Py_UNUSED(igno "__annotations__ must be set to a dict object"); return -1; } - _PyFunction_SetVersion(op, 0); Py_XSETREF(op->func_annotations, Py_XNewRef(value)); return 0; } From 51bc6260f9bc503ca8cd04f955dbf3cd2160eefe Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Mon, 18 Mar 2024 18:00:10 -0700 Subject: [PATCH 02/13] Change the function version cache to contain a (func, code) pair It is possible that func is NULL but code isn't. There is some cleanup that needs to done still to make all tests pass. --- Include/internal/pycore_function.h | 15 ++++++--- Objects/codeobject.c | 1 + Objects/funcobject.c | 54 ++++++++++++++++++++++++------ Python/optimizer.c | 12 +++---- 4 files changed, 62 insertions(+), 20 deletions(-) diff --git a/Include/internal/pycore_function.h b/Include/internal/pycore_function.h index dad6a89af77dec..24fbb3ddbee602 100644 --- a/Include/internal/pycore_function.h +++ b/Include/internal/pycore_function.h @@ -17,20 +17,27 @@ extern PyObject* _PyFunction_Vectorcall( #define FUNC_MAX_WATCHERS 8 #define FUNC_VERSION_CACHE_SIZE (1<<12) /* Must be a power of 2 */ + +struct _func_version_cache_item { + PyFunctionObject *func; + PyObject *code; +}; + struct _py_func_state { uint32_t next_version; - // Borrowed references to function objects whose + // Borrowed references to function and code objects whose // func_version % FUNC_VERSION_CACHE_SIZE // once was equal to the index in the table. - // They are cleared when the function is deallocated. - PyFunctionObject *func_version_cache[FUNC_VERSION_CACHE_SIZE]; + // They are cleared when the function or code object is deallocated. + struct _func_version_cache_item func_version_cache[FUNC_VERSION_CACHE_SIZE]; }; extern PyFunctionObject* _PyFunction_FromConstructor(PyFrameConstructor *constr); extern uint32_t _PyFunction_GetVersionForCurrentState(PyFunctionObject *func); PyAPI_FUNC(void) _PyFunction_SetVersion(PyFunctionObject *func, uint32_t version); -PyFunctionObject *_PyFunction_LookupByVersion(uint32_t version); +void _PyFunction_ClearCodeByVersion(uint32_t version); +PyFunctionObject *_PyFunction_LookupByVersion(uint32_t version, PyObject **p_code); extern PyObject *_Py_set_function_type_params( PyThreadState* unused, PyObject *func, PyObject *type_params); diff --git a/Objects/codeobject.c b/Objects/codeobject.c index 3df733eb4ee578..bdde12d77caf07 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -1710,6 +1710,7 @@ code_dealloc(PyCodeObject *co) } Py_SET_REFCNT(co, 0); + _PyFunction_ClearCodeByVersion(co->co_version); if (co->co_extra != NULL) { PyInterpreterState *interp = _PyInterpreterState_GET(); _PyCodeObjectExtra *co_extra = co->co_extra; diff --git a/Objects/funcobject.c b/Objects/funcobject.c index 5bbf3946b717ee..d22dea604b0643 100644 --- a/Objects/funcobject.c +++ b/Objects/funcobject.c @@ -218,6 +218,8 @@ PyFunction_NewWithQualName(PyObject *code, PyObject *globals, PyObject *qualname } /* +XXX TODO XXX Rewrite this entire comment to match the new reality! + Function versions ----------------- @@ -262,28 +264,60 @@ _PyFunction_SetVersion(PyFunctionObject *func, uint32_t version) { PyInterpreterState *interp = _PyInterpreterState_GET(); if (func->func_version != 0) { - PyFunctionObject **slot = + struct _func_version_cache_item *slot = interp->func_state.func_version_cache + (func->func_version % FUNC_VERSION_CACHE_SIZE); - if (*slot == func) { - *slot = NULL; + if (slot->func == func) { + slot->func = NULL; + // Leave slot->code alone, there may be use for it. } } func->func_version = version; if (version != 0) { - interp->func_state.func_version_cache[ - version % FUNC_VERSION_CACHE_SIZE] = func; + struct _func_version_cache_item *slot = + interp->func_state.func_version_cache + + (version % FUNC_VERSION_CACHE_SIZE); + slot->func = func; + slot->code = func->func_code; + } +} + +void +_PyFunction_ClearCodeByVersion(uint32_t version) +{ + PyInterpreterState *interp = _PyInterpreterState_GET(); + struct _func_version_cache_item *slot = + interp->func_state.func_version_cache + + (version % FUNC_VERSION_CACHE_SIZE); + if (slot->code) { + assert(PyCode_Check(slot->code)); + PyCodeObject *code = (PyCodeObject *)slot->code; + if (code->co_version == version) { + slot->code = NULL; + } } } PyFunctionObject * -_PyFunction_LookupByVersion(uint32_t version) +_PyFunction_LookupByVersion(uint32_t version, PyObject **p_code) { PyInterpreterState *interp = _PyInterpreterState_GET(); - PyFunctionObject *func = interp->func_state.func_version_cache[ - version % FUNC_VERSION_CACHE_SIZE]; - if (func != NULL && func->func_version == version) { - return func; + struct _func_version_cache_item *slot = + interp->func_state.func_version_cache + + (version % FUNC_VERSION_CACHE_SIZE); + if (slot->code) { + assert(PyCode_Check(slot->code)); + PyCodeObject *code = (PyCodeObject *)slot->code; + if (code->co_version == version) { + *p_code = slot->code; + } + } + else { + *p_code = NULL; + } + if (slot->func && slot->func->func_version == version) { + assert(slot->func->func_code == slot->code); + return slot->func; } return NULL; } diff --git a/Python/optimizer.c b/Python/optimizer.c index bb00e0d2575784..66d77cf48c95c7 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -738,10 +738,10 @@ translate_bytecode_to_trace( // Add one to account for the actual opcode/oparg pair: + 1; uint32_t func_version = read_u32(&instr[func_version_offset].cache); - PyFunctionObject *new_func = _PyFunction_LookupByVersion(func_version); - DPRINTF(2, "Function: version=%#x; object=%p\n", (int)func_version, new_func); - if (new_func != NULL) { - PyCodeObject *new_code = (PyCodeObject *)PyFunction_GET_CODE(new_func); + PyCodeObject *new_code = NULL; + PyFunctionObject *new_func = _PyFunction_LookupByVersion(func_version, &new_code); + DPRINTF(2, "Function: version=%#x; object=%p, code=%p\n", (int)func_version, new_func, new_code); + if (new_code != NULL) { if (new_code == code) { // Recursive call, bail (we could be here forever). DPRINTF(2, "Bailing on recursive call to %s (%s:%d)\n", @@ -780,8 +780,8 @@ translate_bytecode_to_trace( 2 * INSTR_IP(instr, code)); goto top; } - DPRINTF(2, "Bail, new_func == NULL\n"); - ADD_TO_TRACE(uop, oparg, operand, target); + DPRINTF(2, "Bail, new_code == NULL\n"); + ADD_TO_TRACE(uop, oparg, 0, target); ADD_TO_TRACE(_EXIT_TRACE, 0, 0, 0); goto done; } From 0d5f84945ce64a7a77ffda055e084967ff043dcc Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 19 Mar 2024 10:21:24 -0700 Subject: [PATCH 03/13] Use _PyFrame_GetCode where needed; tweak a debug print --- Python/optimizer.c | 9 +++++---- Python/optimizer_analysis.c | 4 ++-- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/Python/optimizer.c b/Python/optimizer.c index 66d77cf48c95c7..07b54f4d31232d 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -211,7 +211,7 @@ _PyOptimizer_Optimize( _PyInterpreterFrame *frame, _Py_CODEUNIT *start, PyObject **stack_pointer, _PyExecutorObject **executor_ptr) { - PyCodeObject *code = (PyCodeObject *)frame->f_executable; + PyCodeObject *code = _PyFrame_GetCode(frame); assert(PyCode_Check(code)); PyInterpreterState *interp = _PyInterpreterState_GET(); if (!has_space_for_executor(code, start)) { @@ -505,7 +505,7 @@ translate_bytecode_to_trace( _PyBloomFilter *dependencies) { bool progress_needed = true; - PyCodeObject *code = (PyCodeObject *)frame->f_executable; + PyCodeObject *code = _PyFrame_GetCode(frame); PyFunctionObject *func = (PyFunctionObject *)frame->f_funcobj; assert(PyFunction_Check(func)); PyCodeObject *initial_code = code; @@ -740,7 +740,8 @@ translate_bytecode_to_trace( uint32_t func_version = read_u32(&instr[func_version_offset].cache); PyCodeObject *new_code = NULL; PyFunctionObject *new_func = _PyFunction_LookupByVersion(func_version, &new_code); - DPRINTF(2, "Function: version=%#x; object=%p, code=%p\n", (int)func_version, new_func, new_code); + DPRINTF(2, "Function: version=%#x; new_func=%p, new_code=%p\n", + (int)func_version, new_func, new_code); if (new_code != NULL) { if (new_code == code) { // Recursive call, bail (we could be here forever). @@ -1116,7 +1117,7 @@ counter_optimize( int Py_UNUSED(curr_stackentries) ) { - PyCodeObject *code = (PyCodeObject *)frame->f_executable; + PyCodeObject *code = _PyFrame_GetCode(frame); int oparg = instr->op.arg; while (instr->op.code == EXTENDED_ARG) { instr++; diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 0c95616848a85b..13e23453b6479c 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -511,7 +511,7 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) static void peephole_opt(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, int buffer_size) { - PyCodeObject *co = (PyCodeObject *)frame->f_executable; + PyCodeObject *co = _PyFrame_GetCode(frame); for (int pc = 0; pc < buffer_size; pc++) { int opcode = buffer[pc].opcode; switch(opcode) { @@ -576,7 +576,7 @@ _Py_uop_analyze_and_optimize( peephole_opt(frame, buffer, buffer_size); err = optimize_uops( - (PyCodeObject *)frame->f_executable, buffer, + _PyFrame_GetCode(frame), buffer, buffer_size, curr_stacklen, dependencies); if (err == 0) { From b998bf2b0a54dcbcb7aba828a060609a1af6b07e Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 19 Mar 2024 10:37:58 -0700 Subject: [PATCH 04/13] Clarify type of f_executable --- Include/internal/pycore_frame.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Include/internal/pycore_frame.h b/Include/internal/pycore_frame.h index 0f9e7333cf1e1c..909d2d18a916e8 100644 --- a/Include/internal/pycore_frame.h +++ b/Include/internal/pycore_frame.h @@ -55,7 +55,7 @@ enum _frameowner { }; typedef struct _PyInterpreterFrame { - PyObject *f_executable; /* Strong reference */ + PyObject *f_executable; /* Strong reference (code object) */ struct _PyInterpreterFrame *previous; PyObject *f_funcobj; /* Strong reference. Only valid if not on C stack */ PyObject *f_globals; /* Borrowed reference. Only valid if not on C stack */ From fdbfe070ba973e9d6e9deea32daf45104dce01ae Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 19 Mar 2024 11:03:38 -0700 Subject: [PATCH 05/13] Change the operand for _PUSH_FRAME It can be either NULL or a function object (as before), or a code object with the low bit set (new). I expect this to require changes to the JIT code too. --- Python/optimizer.c | 16 +++++++++++++--- Python/optimizer_analysis.c | 16 +++++++++++++--- Python/optimizer_bytecodes.c | 21 ++++++++++++++++----- Python/optimizer_cases.c.h | 21 ++++++++++++++++----- 4 files changed, 58 insertions(+), 16 deletions(-) diff --git a/Python/optimizer.c b/Python/optimizer.c index 07b54f4d31232d..b4846d3d5973a4 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -767,9 +767,19 @@ translate_bytecode_to_trace( instr += _PyOpcode_Caches[_PyOpcode_Deopt[opcode]] + 1; TRACE_STACK_PUSH(); _Py_BloomFilter_Add(dependencies, new_code); - /* Set the operand to the callee's function object, - * to assist optimization passes */ - ADD_TO_TRACE(uop, oparg, (uintptr_t)new_func, target); + /* Set the operand to the callee's function or code object, + * to assist optimization passes. + * We prefer setting it to the function (for remove_globals()) + * but if that's not available but the code is available, + * use the code, setting the low bit so the optimizer knows. + */ + if (new_func != NULL) { + operand = (uintptr_t)new_func; + } + else { + operand = (uintptr_t)new_code | 1; + } + ADD_TO_TRACE(uop, oparg, operand, target); code = new_code; func = new_func; instr = _PyCode_CODE(code); diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 13e23453b6479c..dedd4ef05cec3f 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -225,7 +225,12 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, builtins_watched <<= 1; globals_watched <<= 1; function_checked <<= 1; - PyFunctionObject *func = (PyFunctionObject *)buffer[pc].operand; + uintptr_t operand = buffer[pc].operand; + if (operand == 0 || (operand & 1)) { + // It's either a code object or NULL, so bail + return 1; + } + PyFunctionObject *func = (PyFunctionObject *)operand; if (func == NULL) { return 1; } @@ -534,11 +539,16 @@ peephole_opt(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, int buffer_s case _PUSH_FRAME: case _POP_FRAME: { - PyFunctionObject *func = (PyFunctionObject *)buffer[pc].operand; - if (func == NULL) { + uintptr_t operand = buffer[pc].operand; + if (operand & 1) { + co = (PyCodeObject *)(operand & ~1); + assert(PyCode_Check(co)); + } + else if (operand == 0) { co = NULL; } else { + PyFunctionObject *func = (PyFunctionObject *)operand; assert(PyFunction_Check(func)); co = (PyCodeObject *)func->func_code; } diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index ef08c0d8897c9f..b6f2551ee4751d 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -543,12 +543,23 @@ dummy_func(void) { (void)callable; - PyFunctionObject *func = (PyFunctionObject *)(this_instr + 2)->operand; - DPRINTF(3, "func: %p ", func); - if (func == NULL) { - goto error; + PyCodeObject *co = NULL; + assert((this_instr + 2)->opcode == _PUSH_FRAME); + uintptr_t push_operand = (this_instr + 2)->operand; + if (push_operand & 1) { + co = (PyCodeObject *)(push_operand & ~1); + DPRINTF(3, "code=%p ", co); + assert(PyCode_Check(co)); + } + else { + PyFunctionObject *func = (PyFunctionObject *)(this_instr + 2)->operand; + DPRINTF(3, "func=%p ", func); + if (func == NULL) { + goto error; + } + co = (PyCodeObject *)func->func_code; + DPRINTF(3, "code=%p ", co); } - PyCodeObject *co = (PyCodeObject *)func->func_code; assert(self_or_null != NULL); assert(args != NULL); diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 610d1b1aede9cc..c5abd68acff5a4 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -1596,12 +1596,23 @@ callable = stack_pointer[-2 - oparg]; int argcount = oparg; (void)callable; - PyFunctionObject *func = (PyFunctionObject *)(this_instr + 2)->operand; - DPRINTF(3, "func: %p ", func); - if (func == NULL) { - goto error; + PyCodeObject *co = NULL; + assert((this_instr + 2)->opcode == _PUSH_FRAME); + uintptr_t push_operand = (this_instr + 2)->operand; + if (push_operand & 1) { + co = (PyCodeObject *)(push_operand & ~1); + DPRINTF(3, "code=%p ", co); + assert(PyCode_Check(co)); + } + else { + PyFunctionObject *func = (PyFunctionObject *)(this_instr + 2)->operand; + DPRINTF(3, "func=%p ", func); + if (func == NULL) { + goto error; + } + co = (PyCodeObject *)func->func_code; + DPRINTF(3, "code=%p ", co); } - PyCodeObject *co = (PyCodeObject *)func->func_code; assert(self_or_null != NULL); assert(args != NULL); if (sym_is_not_null(self_or_null)) { From fe149b23ae053f2fed9f45c526ea5b746ec40fe6 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 19 Mar 2024 11:55:54 -0700 Subject: [PATCH 06/13] Rewrite explanatory comment --- Objects/funcobject.c | 62 +++++++++++++++++++++++--------------------- 1 file changed, 33 insertions(+), 29 deletions(-) diff --git a/Objects/funcobject.c b/Objects/funcobject.c index d22dea604b0643..4261ae5204b41a 100644 --- a/Objects/funcobject.c +++ b/Objects/funcobject.c @@ -218,45 +218,49 @@ PyFunction_NewWithQualName(PyObject *code, PyObject *globals, PyObject *qualname } /* -XXX TODO XXX Rewrite this entire comment to match the new reality! +(This is purely internal documentation. There are no public APIs here.) -Function versions ------------------ +Function (and code) versions +---------------------------- -Function versions are used to detect when a function object has been -updated, invalidating inline cache data used by the `CALL` bytecode -(notably `CALL_PY_EXACT_ARGS` and a few other `CALL` specializations). +The Tier 1 specializer generates CALL variants that can be invalidated +by changes to critical function attributes: -They are also used by the Tier 2 superblock creation code to find -the function being called (and from there the code object). +- __code__ +- __defaults__ +- __kwdefaults__ +- __closure__ -How does a function's `func_version` field get initialized? +For this purpose function objects have a 32-bit func_version member +that the specializer writes to the specialized instruction's inline +cache and which is checked by a guard on the specialized instructions. -- `PyFunction_New` and friends initialize it to 0. -- The `MAKE_FUNCTION` instruction sets it from the code's `co_version`. -- It is reset to 0 when various attributes like `__code__` are set. -- A new version is allocated by `_PyFunction_GetVersionForCurrentState` - when the specializer needs a version and the version is 0. +The MAKE_FUNCTION bytecode sets func_version from the code object's +co_version field. The latter is initialized from a counter in the +interpreter state (interp->func_state.next_version) and never changes. +When this counter overflows, it remains zero and the specializer loses +the ability to specialize calls to new functions. -The latter allocates versions using a counter in the interpreter state, -`interp->func_state.next_version`. -When the counter wraps around to 0, no more versions are allocated. -There is one other special case: functions with a non-standard -`vectorcall` field are not given a version. +The func_version is reset to zero when any of the critical attributes +is modified; after this point the specializer will no longer specialize +calls to this function, and the guard will always fail. -When the function version is 0, the `CALL` bytecode is not specialized. +The function and code version cache +----------------------------------- -Code object versions --------------------- +The Tier 2 optimizer now has a problem, since it needs to find the +function and code objects given only the version number from the inline +cache. Our solution is to maintain a cache mapping version numbers to +function and code objects. To limit the cache size we could hash +the version number, but for now we simply use it modulo the table size. -So where to code objects get their `co_version`? -They share the same counter, `interp->func_state.next_version`. +There are some corner cases (e.g. generator expressions) where we will +be unable to find the function object in the cache but we can still +find the code object. For this reason the cache stores both the +function object and the code object. -Code objects get a new `co_version` allocated from this counter upon -creation. Since code objects are nominally immutable, `co_version` can -not be invalidated. The only way it can be 0 is when 2**32 or more -code objects have been created during the process's lifetime. -(The counter isn't reset by `fork()`, extending the lifetime.) +The cache doesn't contain strong references; cache entries are +invalidated whenever the function or code object is deallocated. */ void From 0d0551ebc1f322cbecfc1b1855c5638a98da05c8 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 19 Mar 2024 14:13:56 -0700 Subject: [PATCH 07/13] Add code to the trace stack in translate_bytecode_to_trace() This fixes a situation where func is NULL but code isn't, and then we push another frame; when we pop, we must restore the code object without relying on func. --- Python/optimizer.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/Python/optimizer.c b/Python/optimizer.c index b4846d3d5973a4..34090c75a8ea10 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -479,8 +479,9 @@ BRANCH_TO_GUARD[4][2] = { ADD_TO_TRACE(_EXIT_TRACE, 0, 0, 0); \ goto done; \ } \ - assert(func->func_code == (PyObject *)code); \ + assert(func == NULL || func->func_code == (PyObject *)code); \ trace_stack[trace_stack_depth].func = func; \ + trace_stack[trace_stack_depth].code = code; \ trace_stack[trace_stack_depth].instr = instr; \ trace_stack_depth++; #define TRACE_STACK_POP() \ @@ -489,7 +490,8 @@ BRANCH_TO_GUARD[4][2] = { } \ trace_stack_depth--; \ func = trace_stack[trace_stack_depth].func; \ - code = (PyCodeObject *)trace_stack[trace_stack_depth].func->func_code; \ + code = trace_stack[trace_stack_depth].code; \ + assert(func == NULL || func->func_code == (PyObject *)code); \ instr = trace_stack[trace_stack_depth].instr; /* Returns 1 on success, @@ -515,6 +517,7 @@ translate_bytecode_to_trace( int max_length = buffer_size; struct { PyFunctionObject *func; + PyCodeObject *code; _Py_CODEUNIT *instr; } trace_stack[TRACE_STACK_SIZE]; int trace_stack_depth = 0; @@ -739,7 +742,8 @@ translate_bytecode_to_trace( + 1; uint32_t func_version = read_u32(&instr[func_version_offset].cache); PyCodeObject *new_code = NULL; - PyFunctionObject *new_func = _PyFunction_LookupByVersion(func_version, &new_code); + PyFunctionObject *new_func = + _PyFunction_LookupByVersion(func_version, (PyObject **) &new_code); DPRINTF(2, "Function: version=%#x; new_func=%p, new_code=%p\n", (int)func_version, new_func, new_code); if (new_code != NULL) { From b999d3e7f07161250e4ed67554911877eb540ef8 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 19 Mar 2024 15:23:42 -0700 Subject: [PATCH 08/13] Set operand for _PUSH/_POP_FRAME more consistently --- Python/optimizer.c | 21 +++++++++++++++++---- Python/optimizer_analysis.c | 10 +++++++++- 2 files changed, 26 insertions(+), 5 deletions(-) diff --git a/Python/optimizer.c b/Python/optimizer.c index 34090c75a8ea10..fbf43420be3413 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -722,9 +722,19 @@ translate_bytecode_to_trace( if (uop == _POP_FRAME) { TRACE_STACK_POP(); - /* Set the operand to the function object returned to, - * to assist optimization passes */ - ADD_TO_TRACE(uop, oparg, (uintptr_t)func, target); + /* Set the operand to the function or code object returned to, + * to assist optimization passes. (See _PUSH_FRAME below.) + */ + if (func != NULL) { + operand = (uintptr_t)func; + } + else if (code != NULL) { + operand = (uintptr_t)code | 1; + } + else { + operand = 0; + } + ADD_TO_TRACE(uop, oparg, operand, target); DPRINTF(2, "Returning to %s (%s:%d) at byte offset %d\n", PyUnicode_AsUTF8(code->co_qualname), @@ -780,9 +790,12 @@ translate_bytecode_to_trace( if (new_func != NULL) { operand = (uintptr_t)new_func; } - else { + else if (new_code != NULL) { operand = (uintptr_t)new_code | 1; } + else { + operand = 0; + } ADD_TO_TRACE(uop, oparg, operand, target); code = new_code; func = new_func; diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index dedd4ef05cec3f..2dd49a0646ed3e 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -252,7 +252,15 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, builtins_watched >>= 1; globals_watched >>= 1; function_checked >>= 1; - PyFunctionObject *func = (PyFunctionObject *)buffer[pc].operand; + uintptr_t operand = buffer[pc].operand; + if (operand == 0 || (operand & 1)) { + // It's either a code object or NULL, so bail + return 1; + } + PyFunctionObject *func = (PyFunctionObject *)operand; + if (func == NULL) { + return 1; + } assert(PyFunction_Check(func)); function_version = func->func_version; globals = func->func_globals; From b8141cdef31fff03e4df27d4acf6d0a9f2d6542b Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 19 Mar 2024 16:29:19 -0700 Subject: [PATCH 09/13] Remove bogus assert --- Python/bytecodes.c | 1 - Python/generated_cases.c.h | 1 - 2 files changed, 2 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 476975d2fbc3c2..09d066325fad10 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -3218,7 +3218,6 @@ dummy_func( assert(func->func_defaults); assert(PyTuple_CheckExact(func->func_defaults)); int defcount = (int)PyTuple_GET_SIZE(func->func_defaults); - assert(defcount <= code->co_argcount); int min_args = code->co_argcount - defcount; DEOPT_IF(argcount > code->co_argcount); DEOPT_IF(argcount < min_args); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 2996ee72e7f2c6..f850e5e60ab4df 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -1825,7 +1825,6 @@ assert(func->func_defaults); assert(PyTuple_CheckExact(func->func_defaults)); int defcount = (int)PyTuple_GET_SIZE(func->func_defaults); - assert(defcount <= code->co_argcount); int min_args = code->co_argcount - defcount; DEOPT_IF(argcount > code->co_argcount, CALL); DEOPT_IF(argcount < min_args, CALL); From 0fd96be84aa28ad987f76686139037a6352b06c1 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 19 Mar 2024 17:33:41 -0700 Subject: [PATCH 10/13] Document and enforce invariants --- Objects/funcobject.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/Objects/funcobject.c b/Objects/funcobject.c index 4261ae5204b41a..d2ebf0607d716c 100644 --- a/Objects/funcobject.c +++ b/Objects/funcobject.c @@ -261,6 +261,18 @@ function object and the code object. The cache doesn't contain strong references; cache entries are invalidated whenever the function or code object is deallocated. + +Invariants +---------- + +These should hold at any time except when one of the cache-mutating +functions is running. + +- For any slot s at index i: + - s->func == NULL or s->func->func_version % FUNC_VERSION_CACHE_SIZE == i + - s->code == NULL or s->code->co_version % FUNC_VERSION_CACHE_SIZE == i + if s->func != NULL, then s->func->func_code == s->code + */ void @@ -297,7 +309,10 @@ _PyFunction_ClearCodeByVersion(uint32_t version) assert(PyCode_Check(slot->code)); PyCodeObject *code = (PyCodeObject *)slot->code; if (code->co_version == version) { - slot->code = NULL; + slot->code = NULL; + if (slot->func != NULL) { + slot->code = slot->func->func_code; + } } } } From 6748a56bd8059dd520d12bd4c8f7ce28ddf3536f Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 20 Mar 2024 07:31:59 -0700 Subject: [PATCH 11/13] Enforce invariant differently --- Objects/funcobject.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/Objects/funcobject.c b/Objects/funcobject.c index d2ebf0607d716c..a3c0800e7891d3 100644 --- a/Objects/funcobject.c +++ b/Objects/funcobject.c @@ -310,9 +310,7 @@ _PyFunction_ClearCodeByVersion(uint32_t version) PyCodeObject *code = (PyCodeObject *)slot->code; if (code->co_version == version) { slot->code = NULL; - if (slot->func != NULL) { - slot->code = slot->func->func_code; - } + slot->func = NULL; } } } From fac617c5842ce3b5f710318ce2f337cc57815d08 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 20 Mar 2024 11:32:17 -0700 Subject: [PATCH 12/13] Revert "Remove bogus assert" (it's not my job to fix this) This reverts commit b8141cdef31fff03e4df27d4acf6d0a9f2d6542b. --- Python/bytecodes.c | 1 + Python/generated_cases.c.h | 1 + 2 files changed, 2 insertions(+) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 09d066325fad10..476975d2fbc3c2 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -3218,6 +3218,7 @@ dummy_func( assert(func->func_defaults); assert(PyTuple_CheckExact(func->func_defaults)); int defcount = (int)PyTuple_GET_SIZE(func->func_defaults); + assert(defcount <= code->co_argcount); int min_args = code->co_argcount - defcount; DEOPT_IF(argcount > code->co_argcount); DEOPT_IF(argcount < min_args); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index f850e5e60ab4df..2996ee72e7f2c6 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -1825,6 +1825,7 @@ assert(func->func_defaults); assert(PyTuple_CheckExact(func->func_defaults)); int defcount = (int)PyTuple_GET_SIZE(func->func_defaults); + assert(defcount <= code->co_argcount); int min_args = code->co_argcount - defcount; DEOPT_IF(argcount > code->co_argcount, CALL); DEOPT_IF(argcount < min_args, CALL); From 37fab3cc323780dc5ac3cd9916eae89a526608ca Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 20 Mar 2024 11:37:13 -0700 Subject: [PATCH 13/13] Address code review --- Include/internal/pycore_frame.h | 2 +- Python/optimizer_analysis.c | 6 +++--- Python/optimizer_bytecodes.c | 2 +- Python/optimizer_cases.c.h | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Include/internal/pycore_frame.h b/Include/internal/pycore_frame.h index 909d2d18a916e8..74d9e4cac72c0e 100644 --- a/Include/internal/pycore_frame.h +++ b/Include/internal/pycore_frame.h @@ -55,7 +55,7 @@ enum _frameowner { }; typedef struct _PyInterpreterFrame { - PyObject *f_executable; /* Strong reference (code object) */ + PyObject *f_executable; /* Strong reference (code object or None) */ struct _PyInterpreterFrame *previous; PyObject *f_funcobj; /* Strong reference. Only valid if not on C stack */ PyObject *f_globals; /* Borrowed reference. Only valid if not on C stack */ diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 2dd49a0646ed3e..9fb36dd36d363e 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -225,7 +225,7 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, builtins_watched <<= 1; globals_watched <<= 1; function_checked <<= 1; - uintptr_t operand = buffer[pc].operand; + uint64_t operand = buffer[pc].operand; if (operand == 0 || (operand & 1)) { // It's either a code object or NULL, so bail return 1; @@ -252,7 +252,7 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, builtins_watched >>= 1; globals_watched >>= 1; function_checked >>= 1; - uintptr_t operand = buffer[pc].operand; + uint64_t operand = buffer[pc].operand; if (operand == 0 || (operand & 1)) { // It's either a code object or NULL, so bail return 1; @@ -547,7 +547,7 @@ peephole_opt(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, int buffer_s case _PUSH_FRAME: case _POP_FRAME: { - uintptr_t operand = buffer[pc].operand; + uint64_t operand = buffer[pc].operand; if (operand & 1) { co = (PyCodeObject *)(operand & ~1); assert(PyCode_Check(co)); diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index b6f2551ee4751d..e974268b5994d5 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -552,7 +552,7 @@ dummy_func(void) { assert(PyCode_Check(co)); } else { - PyFunctionObject *func = (PyFunctionObject *)(this_instr + 2)->operand; + PyFunctionObject *func = (PyFunctionObject *)push_operand; DPRINTF(3, "func=%p ", func); if (func == NULL) { goto error; diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index c5abd68acff5a4..e9f6d5811e13b2 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -1605,7 +1605,7 @@ assert(PyCode_Check(co)); } else { - PyFunctionObject *func = (PyFunctionObject *)(this_instr + 2)->operand; + PyFunctionObject *func = (PyFunctionObject *)push_operand; DPRINTF(3, "func=%p ", func); if (func == NULL) { goto error;