From f8bd566d5ec4f3e07100de3048e26f2f6c05d303 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 30 Apr 2024 09:13:28 +0100 Subject: [PATCH 1/7] Target _FOR_ITER_TIER_TWO at POP_TOP following END_FOR --- Python/bytecodes.c | 4 +--- Python/executor_cases.c.h | 4 +--- Python/optimizer.c | 9 +++++---- 3 files changed, 7 insertions(+), 10 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 002b5a3529c127..c3140e209259e3 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2609,9 +2609,7 @@ dummy_func( _PyErr_Clear(tstate); } /* iterator ended normally */ - Py_DECREF(iter); - STACK_SHRINK(1); - /* The translator sets the deopt target just past END_FOR */ + /* The translator sets the deopt target just past the matching END_FOR */ DEOPT_IF(true); } // Common case: no jump, leave it to the code generator diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index b17f3762714c72..ac9f08e1c5f8f7 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -2664,9 +2664,7 @@ _PyErr_Clear(tstate); } /* iterator ended normally */ - Py_DECREF(iter); - STACK_SHRINK(1); - /* The translator sets the deopt target just past END_FOR */ + /* The translator sets the deopt target just past the matching END_FOR */ if (true) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); diff --git a/Python/optimizer.c b/Python/optimizer.c index 6576aa1cddc033..b258fb63ca9a26 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -747,10 +747,11 @@ translate_bytecode_to_trace( uop = _PyUOp_Replacements[uop]; assert(uop != 0); if (uop == _FOR_ITER_TIER_TWO) { - target += 1 + INLINE_CACHE_ENTRIES_FOR_ITER + oparg + 2 + extended; - assert(_PyCode_CODE(code)[target-2].op.code == END_FOR || - _PyCode_CODE(code)[target-2].op.code == INSTRUMENTED_END_FOR); - assert(_PyCode_CODE(code)[target-1].op.code == POP_TOP); + uint32_t next_inst = target + 1 + INLINE_CACHE_ENTRIES_FOR_ITER + extended; + target = next_inst + oparg + 1; + assert(_PyCode_CODE(code)[target-1].op.code == END_FOR || + _PyCode_CODE(code)[target-1].op.code == INSTRUMENTED_END_FOR); + assert(_PyCode_CODE(code)[target].op.code == POP_TOP); } break; default: From 3bac85827afb2f129f1e13c4125b207dd375ef04 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 30 Apr 2024 09:25:31 +0100 Subject: [PATCH 2/7] Move handling of _FOR_ITER_TIER_TWO exits from trace creation to the prepare for execution step. --- Python/optimizer.c | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/Python/optimizer.c b/Python/optimizer.c index b258fb63ca9a26..568b6e5df6ec7a 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -571,7 +571,6 @@ translate_bytecode_to_trace( uint32_t opcode = instr->op.code; uint32_t oparg = instr->op.arg; - uint32_t extended = 0; DPRINTF(2, "%d: %s(%d)\n", target, _PyOpcode_OpName[opcode], oparg); @@ -585,7 +584,6 @@ translate_bytecode_to_trace( if (opcode == EXTENDED_ARG) { instr++; - extended = 1; opcode = instr->op.code; oparg = (oparg << 8) | instr->op.arg; if (opcode == EXTENDED_ARG) { @@ -746,13 +744,15 @@ translate_bytecode_to_trace( case OPARG_REPLACED: uop = _PyUOp_Replacements[uop]; assert(uop != 0); +#ifdef Py_DEBUG if (uop == _FOR_ITER_TIER_TWO) { - uint32_t next_inst = target + 1 + INLINE_CACHE_ENTRIES_FOR_ITER + extended; - target = next_inst + oparg + 1; - assert(_PyCode_CODE(code)[target-1].op.code == END_FOR || - _PyCode_CODE(code)[target-1].op.code == INSTRUMENTED_END_FOR); - assert(_PyCode_CODE(code)[target].op.code == POP_TOP); + uint32_t next_inst = target + 1 + INLINE_CACHE_ENTRIES_FOR_ITER + (oparg > 256); + uint32_t jump_target = next_inst + oparg; + assert(_PyCode_CODE(code)[jump_target].op.code == END_FOR || + _PyCode_CODE(code)[jump_target].op.code == INSTRUMENTED_END_FOR); + assert(_PyCode_CODE(code)[jump_target+1].op.code == POP_TOP); } +#endif break; default: fprintf(stderr, @@ -972,7 +972,15 @@ prepare_for_execution(_PyUOpInstruction *buffer, int length) int opcode = inst->opcode; int32_t target = (int32_t)uop_get_target(inst); if (_PyUop_Flags[opcode] & (HAS_EXIT_FLAG | HAS_DEOPT_FLAG)) { - if (target != current_jump_target) { + int32_t jump_target = target; + if (opcode == _FOR_ITER_TIER_TWO) { + /* Target the POP_TOP immediately after the END_FOR, + * leaving only the iterator on the stack. */ + int extended_arg = inst->oparg > 256; + int32_t next_inst = target + 1 + INLINE_CACHE_ENTRIES_FOR_ITER + extended_arg; + jump_target = next_inst + inst->oparg + 1; + } + if (jump_target != current_jump_target) { uint16_t exit_op; if (_PyUop_Flags[opcode] & HAS_EXIT_FLAG) { if (opcode == _TIER2_RESUME_CHECK) { @@ -985,8 +993,8 @@ prepare_for_execution(_PyUOpInstruction *buffer, int length) else { exit_op = _DEOPT; } - make_exit(&buffer[next_spare], exit_op, target); - current_jump_target = target; + make_exit(&buffer[next_spare], exit_op, jump_target); + current_jump_target = jump_target; current_jump = next_spare; next_spare++; } From b83d053441aeb09aa7ab84c97143ac6aaf5cf252 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 30 Apr 2024 10:02:38 +0100 Subject: [PATCH 3/7] Extend treatment of _FOR_ITER_TIER_TWO to all FOR_ITER tier 2 tests (and fix off by one error) --- Python/optimizer.c | 36 +++++++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/Python/optimizer.c b/Python/optimizer.c index 568b6e5df6ec7a..a4d02c96149852 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -23,6 +23,18 @@ #define MAX_EXECUTORS_SIZE 256 +#ifdef Py_DEBUG +static int base_opcode(PyCodeObject *code, int offset) +{ + int opcode = _Py_GetBaseOpcode(code, offset); + if (opcode == ENTER_EXECUTOR) { + int oparg = _PyCode_CODE(code)[offset].op.arg; + _PyExecutorObject *ex = code->co_executors->executors[oparg]; + return ex->vm_data.opcode; + } + return opcode; +} +#endif static bool has_space_for_executor(PyCodeObject *code, _Py_CODEUNIT *instr) @@ -422,6 +434,14 @@ _PyUOp_Replacements[MAX_UOP_ID + 1] = { [_FOR_ITER] = _FOR_ITER_TIER_TWO, }; +static const uint8_t +is_for_iter_test[MAX_UOP_ID + 1] = { + [_GUARD_NOT_EXHAUSTED_RANGE] = 1, + [_GUARD_NOT_EXHAUSTED_LIST] = 1, + [_GUARD_NOT_EXHAUSTED_TUPLE] = 1, + [_FOR_ITER_TIER_TWO] = 1, +}; + static const uint16_t BRANCH_TO_GUARD[4][2] = { [POP_JUMP_IF_FALSE - POP_JUMP_IF_FALSE][0] = _GUARD_IS_TRUE_POP, @@ -745,13 +765,11 @@ translate_bytecode_to_trace( uop = _PyUOp_Replacements[uop]; assert(uop != 0); #ifdef Py_DEBUG - if (uop == _FOR_ITER_TIER_TWO) { - uint32_t next_inst = target + 1 + INLINE_CACHE_ENTRIES_FOR_ITER + (oparg > 256); - uint32_t jump_target = next_inst + oparg; - assert(_PyCode_CODE(code)[jump_target].op.code == END_FOR || - _PyCode_CODE(code)[jump_target].op.code == INSTRUMENTED_END_FOR); - assert(_PyCode_CODE(code)[jump_target+1].op.code == POP_TOP); - } + uint32_t next_inst = target + 1 + INLINE_CACHE_ENTRIES_FOR_ITER + (oparg > 255); + uint32_t jump_target = next_inst + oparg; + assert(base_opcode(code, jump_target) == END_FOR || + base_opcode(code, jump_target) == INSTRUMENTED_END_FOR); + assert(base_opcode(code, jump_target+1) == POP_TOP); #endif break; default: @@ -973,10 +991,10 @@ prepare_for_execution(_PyUOpInstruction *buffer, int length) int32_t target = (int32_t)uop_get_target(inst); if (_PyUop_Flags[opcode] & (HAS_EXIT_FLAG | HAS_DEOPT_FLAG)) { int32_t jump_target = target; - if (opcode == _FOR_ITER_TIER_TWO) { + if (is_for_iter_test[opcode]) { /* Target the POP_TOP immediately after the END_FOR, * leaving only the iterator on the stack. */ - int extended_arg = inst->oparg > 256; + int extended_arg = inst->oparg > 255; int32_t next_inst = target + 1 + INLINE_CACHE_ENTRIES_FOR_ITER + extended_arg; jump_target = next_inst + inst->oparg + 1; } From bb7efd4fa185dc18b9a3195a258c47112b29d6de Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 30 Apr 2024 10:12:32 +0100 Subject: [PATCH 4/7] Fix a minor bug in optimizer symbols --- Python/optimizer_symbols.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/Python/optimizer_symbols.c b/Python/optimizer_symbols.c index d52f490853c006..430052d409047b 100644 --- a/Python/optimizer_symbols.c +++ b/Python/optimizer_symbols.c @@ -164,12 +164,15 @@ _Py_uop_sym_set_const(_Py_UopsSymbol *sym, PyObject *const_val) return true; } - bool _Py_uop_sym_set_null(_Py_UopsSymbol *sym) { + if (_Py_uop_sym_is_not_null(sym)) { + sym_set_bottom(sym); + return false; + } sym_set_flag(sym, IS_NULL); - return !_Py_uop_sym_is_bottom(sym); + return true; } bool From 983b7c8f8ca15dfd445d002e716c3d718cc429d1 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 1 May 2024 14:53:30 +0100 Subject: [PATCH 5/7] Fix stats for non-tier-2 build --- Python/specialize.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Python/specialize.c b/Python/specialize.c index ee51781372166a..72114f27f69c52 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -215,6 +215,7 @@ print_gc_stats(FILE *out, GCStats *stats) } } +#ifdef _Py_TIER2 static void print_histogram(FILE *out, const char *name, uint64_t hist[_Py_UOP_HIST_SIZE]) { @@ -249,7 +250,6 @@ print_optimization_stats(FILE *out, OptimizationStats *stats) stats->optimizer_failure_reason_no_memory); fprintf(out, "Optimizer remove globals builtins changed: %" PRIu64 "\n", stats->remove_globals_builtins_changed); fprintf(out, "Optimizer remove globals incorrect keys: %" PRIu64 "\n", stats->remove_globals_incorrect_keys); - for (int i = 0; i <= MAX_UOP_ID; i++) { if (stats->opcode[i].execution_count) { fprintf(out, "uops[%s].execution_count : %" PRIu64 "\n", _PyUOpName(i), stats->opcode[i].execution_count); @@ -258,7 +258,6 @@ print_optimization_stats(FILE *out, OptimizationStats *stats) fprintf(out, "uops[%s].specialization.miss : %" PRIu64 "\n", _PyUOpName(i), stats->opcode[i].miss); } } - for (int i = 0; i < 256; i++) { if (stats->unsupported_opcode[i]) { fprintf( @@ -289,6 +288,7 @@ print_optimization_stats(FILE *out, OptimizationStats *stats) } } } +#endif static void print_rare_event_stats(FILE *out, RareEventStats *stats) @@ -309,7 +309,9 @@ print_stats(FILE *out, PyStats *stats) print_call_stats(out, &stats->call_stats); print_object_stats(out, &stats->object_stats); print_gc_stats(out, stats->gc_stats); +#ifdef _Py_TIER2 print_optimization_stats(out, &stats->optimization_stats); +#endif print_rare_event_stats(out, &stats->rare_event_stats); } From df2792f46f39e6a22096f5817e6824456f7d6b6f Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 1 May 2024 15:21:09 +0100 Subject: [PATCH 6/7] Fix tier 2 build --- Include/cpython/optimizer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Include/cpython/optimizer.h b/Include/cpython/optimizer.h index a169280b26a6ad..2bd7b493745931 100644 --- a/Include/cpython/optimizer.h +++ b/Include/cpython/optimizer.h @@ -141,7 +141,7 @@ void _Py_BloomFilter_Init(_PyBloomFilter *); void _Py_BloomFilter_Add(_PyBloomFilter *bloom, void *obj); PyAPI_FUNC(void) _Py_Executor_DependsOn(_PyExecutorObject *executor, void *obj); PyAPI_FUNC(void) _Py_Executors_InvalidateDependency(PyInterpreterState *interp, void *obj, int is_invalidation); -extern void _Py_Executors_InvalidateAll(PyInterpreterState *interp, int is_invalidation); +PyAPI_FUNC(void) _Py_Executors_InvalidateAll(PyInterpreterState *interp, int is_invalidation); /* For testing */ PyAPI_FUNC(PyObject *)PyUnstable_Optimizer_NewCounter(void); From 98b517d16d75055d707f188964594477cbeed1c9 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 2 May 2024 11:59:09 +0100 Subject: [PATCH 7/7] Address review comments --- Python/optimizer.c | 15 +++++++++------ Python/optimizer_symbols.c | 6 +++++- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/Python/optimizer.c b/Python/optimizer.c index 6d3b366cc53945..703e3dc6af5a6b 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -24,7 +24,8 @@ #define MAX_EXECUTORS_SIZE 256 #ifdef Py_DEBUG -static int base_opcode(PyCodeObject *code, int offset) +static int +base_opcode(PyCodeObject *code, int offset) { int opcode = _Py_GetBaseOpcode(code, offset); if (opcode == ENTER_EXECUTOR) { @@ -788,11 +789,13 @@ translate_bytecode_to_trace( uop = _PyUOp_Replacements[uop]; assert(uop != 0); #ifdef Py_DEBUG - uint32_t next_inst = target + 1 + INLINE_CACHE_ENTRIES_FOR_ITER + (oparg > 255); - uint32_t jump_target = next_inst + oparg; - assert(base_opcode(code, jump_target) == END_FOR || - base_opcode(code, jump_target) == INSTRUMENTED_END_FOR); - assert(base_opcode(code, jump_target+1) == POP_TOP); + { + uint32_t next_inst = target + 1 + INLINE_CACHE_ENTRIES_FOR_ITER + (oparg > 255); + uint32_t jump_target = next_inst + oparg; + assert(base_opcode(code, jump_target) == END_FOR || + base_opcode(code, jump_target) == INSTRUMENTED_END_FOR); + assert(base_opcode(code, jump_target+1) == POP_TOP); + } #endif break; default: diff --git a/Python/optimizer_symbols.c b/Python/optimizer_symbols.c index 430052d409047b..4aeb04fe0405d2 100644 --- a/Python/optimizer_symbols.c +++ b/Python/optimizer_symbols.c @@ -178,8 +178,12 @@ _Py_uop_sym_set_null(_Py_UopsSymbol *sym) bool _Py_uop_sym_set_non_null(_Py_UopsSymbol *sym) { + if (_Py_uop_sym_is_null(sym)) { + sym_set_bottom(sym); + return false; + } sym_set_flag(sym, NOT_NULL); - return !_Py_uop_sym_is_bottom(sym); + return true; }