From 9d22a48191035cbe430dfda8a7d16323e66f5d0b Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 17 Jan 2024 22:32:47 +0000 Subject: [PATCH 1/4] Convert _LOAD_CONST to inline versions during tier 2 optimization --- Include/internal/pycore_uop_ids.h | 7 ++++--- Include/internal/pycore_uop_metadata.h | 2 ++ Python/bytecodes.c | 4 ++++ Python/executor_cases.c.h | 9 ++++++++ Python/optimizer.c | 6 ++++++ Python/optimizer_analysis.c | 29 ++++++++++++++++++++++++++ 6 files changed, 54 insertions(+), 3 deletions(-) diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index 8ee90d79a13c2f..a7056586ff04c0 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -230,9 +230,10 @@ extern "C" { #define _JUMP_TO_TOP 377 #define _SAVE_RETURN_OFFSET 378 #define _CHECK_VALIDITY 379 -#define _LOAD_CONST_INLINE_BORROW 380 -#define _INTERNAL_INCREMENT_OPT_COUNTER 381 -#define MAX_UOP_ID 381 +#define _LOAD_CONST_INLINE 380 +#define _LOAD_CONST_INLINE_BORROW 381 +#define _INTERNAL_INCREMENT_OPT_COUNTER 382 +#define MAX_UOP_ID 382 #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 9bfb4f4f3a4dea..14d3382e895cdf 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -202,6 +202,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_SAVE_RETURN_OFFSET] = HAS_ARG_FLAG, [_EXIT_TRACE] = HAS_DEOPT_FLAG, [_CHECK_VALIDITY] = HAS_DEOPT_FLAG, + [_LOAD_CONST_INLINE] = 0, [_LOAD_CONST_INLINE_BORROW] = 0, [_INTERNAL_INCREMENT_OPT_COUNTER] = 0, }; @@ -329,6 +330,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_LOAD_ATTR_WITH_HINT] = "_LOAD_ATTR_WITH_HINT", [_LOAD_BUILD_CLASS] = "_LOAD_BUILD_CLASS", [_LOAD_CONST] = "_LOAD_CONST", + [_LOAD_CONST_INLINE] = "_LOAD_CONST_INLINE", [_LOAD_CONST_INLINE_BORROW] = "_LOAD_CONST_INLINE_BORROW", [_LOAD_DEREF] = "_LOAD_DEREF", [_LOAD_FAST] = "_LOAD_FAST", diff --git a/Python/bytecodes.c b/Python/bytecodes.c index c48f0a17c60fb1..a162063ef9c5c5 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -4070,6 +4070,10 @@ dummy_func( DEOPT_IF(!current_executor->vm_data.valid); } + op(_LOAD_CONST_INLINE, (ptr/4 -- value)) { + value = Py_NewRef(ptr); + } + op(_LOAD_CONST_INLINE_BORROW, (ptr/4 -- value)) { value = ptr; } diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 2b4399b25bae2b..241b9056207715 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -3390,6 +3390,15 @@ break; } + case _LOAD_CONST_INLINE: { + PyObject *value; + PyObject *ptr = (PyObject *)CURRENT_OPERAND(); + value = Py_NewRef(ptr); + stack_pointer[0] = value; + stack_pointer += 1; + break; + } + case _LOAD_CONST_INLINE_BORROW: { PyObject *value; PyObject *ptr = (PyObject *)CURRENT_OPERAND(); diff --git a/Python/optimizer.c b/Python/optimizer.c index 1551a5ef61f892..4b6ed1781b5b78 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -588,6 +588,9 @@ translate_bytecode_to_trace( ADD_TO_TRACE(uop, oparg, operand, target); if (uop == _POP_FRAME) { TRACE_STACK_POP(); + /* Set the operand to the code object returned to, + * to assist optimization passes */ + trace[trace_length-1].operand = (uintptr_t)code; DPRINTF(2, "Returning to %s (%s:%d) at byte offset %d\n", PyUnicode_AsUTF8(code->co_qualname), @@ -629,6 +632,9 @@ translate_bytecode_to_trace( instr += _PyOpcode_Caches[_PyOpcode_Deopt[opcode]] + 1; TRACE_STACK_PUSH(); _Py_BloomFilter_Add(dependencies, new_code); + /* Set the operand to the callee's code object, + * to assist optimization passes */ + trace[trace_length-1].operand = (uintptr_t)new_code; code = new_code; instr = _PyCode_CODE(code); DPRINTF(2, diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 7db51f0d90a453..f96b50ad6826e3 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -12,6 +12,34 @@ #include #include "pycore_optimizer.h" +static void +inline_consts(PyCodeObject *co, _PyUOpInstruction *buffer, int buffer_size) +{ + + for (int pc = 0; pc < buffer_size; pc++) { + int opcode = buffer[pc].opcode; + switch(opcode) { + case _LOAD_CONST: { + if (co == NULL) { + printf("NULL co @ %d\n", pc); + } + assert(co != NULL); + PyObject *val = PyTuple_GET_ITEM(co->co_consts, buffer[pc].oparg); + buffer[pc].opcode = _Py_IsImmortal(val) ? _LOAD_CONST_INLINE_BORROW : _LOAD_CONST_INLINE; + buffer[pc].operand = (uintptr_t)val; + break; + } + case _PUSH_FRAME: + case _POP_FRAME: + co = (PyCodeObject *)buffer[pc].operand; + break; + case _JUMP_TO_TOP: + case _EXIT_TRACE: + return; + } + } +} + static void remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) { @@ -59,6 +87,7 @@ _Py_uop_analyze_and_optimize( int curr_stacklen ) { + inline_consts(co, buffer, buffer_size); remove_unneeded_uops(buffer, buffer_size); return 0; } From f15fedec916a7398803d159dae2905b354ca1659 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 17 Jan 2024 22:49:15 +0000 Subject: [PATCH 2/4] Remove debug code --- Python/optimizer_analysis.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index f96b50ad6826e3..ae45af81267b4a 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -20,9 +20,6 @@ inline_consts(PyCodeObject *co, _PyUOpInstruction *buffer, int buffer_size) int opcode = buffer[pc].opcode; switch(opcode) { case _LOAD_CONST: { - if (co == NULL) { - printf("NULL co @ %d\n", pc); - } assert(co != NULL); PyObject *val = PyTuple_GET_ITEM(co->co_consts, buffer[pc].oparg); buffer[pc].opcode = _Py_IsImmortal(val) ? _LOAD_CONST_INLINE_BORROW : _LOAD_CONST_INLINE; From 42374f448adf9f7520a90e5bf629c0fb668b323b Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 17 Jan 2024 23:40:23 +0000 Subject: [PATCH 3/4] Perform a couple of peephole optimizations for tier 2. --- Python/optimizer_analysis.c | 13 +++++++++++-- Python/pystate.c | 10 +++++++--- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index ae45af81267b4a..2361e16e80d1e6 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -13,7 +13,7 @@ #include "pycore_optimizer.h" static void -inline_consts(PyCodeObject *co, _PyUOpInstruction *buffer, int buffer_size) +peephole_opt(PyCodeObject *co, _PyUOpInstruction *buffer, int buffer_size) { for (int pc = 0; pc < buffer_size; pc++) { @@ -26,6 +26,15 @@ inline_consts(PyCodeObject *co, _PyUOpInstruction *buffer, int buffer_size) buffer[pc].operand = (uintptr_t)val; break; } + case _CHECK_PEP_523: + { + /* Setting the eval frame function invalidates + * all executors, so no need to check dynamically */ + if (_PyInterpreterState_GET()->eval_frame == NULL) { + buffer[pc].opcode = _NOP; + } + break; + } case _PUSH_FRAME: case _POP_FRAME: co = (PyCodeObject *)buffer[pc].operand; @@ -84,7 +93,7 @@ _Py_uop_analyze_and_optimize( int curr_stacklen ) { - inline_consts(co, buffer, buffer_size); + peephole_opt(co, buffer, buffer_size); remove_unneeded_uops(buffer, buffer_size); return 0; } diff --git a/Python/pystate.c b/Python/pystate.c index 999976283da675..746055f466aaf2 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -2367,11 +2367,15 @@ _PyInterpreterState_SetEvalFrameFunc(PyInterpreterState *interp, _PyFrameEvalFunction eval_frame) { if (eval_frame == _PyEval_EvalFrameDefault) { - interp->eval_frame = NULL; + eval_frame = NULL; } - else { - interp->eval_frame = eval_frame; + if (eval_frame == interp->eval_frame) { + return; + } + if (eval_frame != NULL) { + _Py_Executors_InvalidateAll(interp); } + interp->eval_frame = eval_frame; } From c423aac16dd677e581c4ae9cf4f9f38e8ab220b1 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 18 Jan 2024 12:51:49 +0000 Subject: [PATCH 4/4] Remove blank line --- Python/optimizer_analysis.c | 1 - 1 file changed, 1 deletion(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 2361e16e80d1e6..d1225997e10be2 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -15,7 +15,6 @@ static void peephole_opt(PyCodeObject *co, _PyUOpInstruction *buffer, int buffer_size) { - for (int pc = 0; pc < buffer_size; pc++) { int opcode = buffer[pc].opcode; switch(opcode) {