diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index 1e6ef8e54a221a..702231e7cd7e5f 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -238,39 +238,40 @@ extern "C" { #define _REPLACE_WITH_TRUE 432 #define _RESUME_CHECK RESUME_CHECK #define _RETURN_GENERATOR RETURN_GENERATOR -#define _SAVE_RETURN_OFFSET 433 -#define _SEND 434 +#define _RETURN_OFFSET 433 +#define _SAVE_RETURN_OFFSET 434 +#define _SEND 435 #define _SEND_GEN SEND_GEN #define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS #define _SET_ADD SET_ADD #define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE #define _SET_UPDATE SET_UPDATE -#define _START_EXECUTOR 435 -#define _STORE_ATTR 436 -#define _STORE_ATTR_INSTANCE_VALUE 437 -#define _STORE_ATTR_SLOT 438 +#define _START_EXECUTOR 436 +#define _STORE_ATTR 437 +#define _STORE_ATTR_INSTANCE_VALUE 438 +#define _STORE_ATTR_SLOT 439 #define _STORE_ATTR_WITH_HINT STORE_ATTR_WITH_HINT #define _STORE_DEREF STORE_DEREF -#define _STORE_FAST 439 -#define _STORE_FAST_0 440 -#define _STORE_FAST_1 441 -#define _STORE_FAST_2 442 -#define _STORE_FAST_3 443 -#define _STORE_FAST_4 444 -#define _STORE_FAST_5 445 -#define _STORE_FAST_6 446 -#define _STORE_FAST_7 447 +#define _STORE_FAST 440 +#define _STORE_FAST_0 441 +#define _STORE_FAST_1 442 +#define _STORE_FAST_2 443 +#define _STORE_FAST_3 444 +#define _STORE_FAST_4 445 +#define _STORE_FAST_5 446 +#define _STORE_FAST_6 447 +#define _STORE_FAST_7 448 #define _STORE_FAST_LOAD_FAST STORE_FAST_LOAD_FAST #define _STORE_FAST_STORE_FAST STORE_FAST_STORE_FAST #define _STORE_GLOBAL STORE_GLOBAL #define _STORE_NAME STORE_NAME #define _STORE_SLICE STORE_SLICE -#define _STORE_SUBSCR 448 +#define _STORE_SUBSCR 449 #define _STORE_SUBSCR_DICT STORE_SUBSCR_DICT #define _STORE_SUBSCR_LIST_INT STORE_SUBSCR_LIST_INT #define _SWAP SWAP -#define _TIER2_RESUME_CHECK 449 -#define _TO_BOOL 450 +#define _TIER2_RESUME_CHECK 450 +#define _TO_BOOL 451 #define _TO_BOOL_BOOL TO_BOOL_BOOL #define _TO_BOOL_INT TO_BOOL_INT #define _TO_BOOL_LIST TO_BOOL_LIST @@ -280,13 +281,14 @@ extern "C" { #define _UNARY_NEGATIVE UNARY_NEGATIVE #define _UNARY_NOT UNARY_NOT #define _UNPACK_EX UNPACK_EX -#define _UNPACK_SEQUENCE 451 +#define _UNPACK_SEQUENCE 452 #define _UNPACK_SEQUENCE_LIST UNPACK_SEQUENCE_LIST #define _UNPACK_SEQUENCE_TUPLE UNPACK_SEQUENCE_TUPLE #define _UNPACK_SEQUENCE_TWO_TUPLE UNPACK_SEQUENCE_TWO_TUPLE #define _WITH_EXCEPT_START WITH_EXCEPT_START +#define _YIELD_OFFSET 453 #define _YIELD_VALUE YIELD_VALUE -#define MAX_UOP_ID 451 +#define MAX_UOP_ID 453 #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 470e95e2b3b041..36911b533244e7 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -260,6 +260,8 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_DEOPT] = 0, [_ERROR_POP_N] = HAS_ARG_FLAG, [_TIER2_RESUME_CHECK] = HAS_DEOPT_FLAG, + [_RETURN_OFFSET] = 0, + [_YIELD_OFFSET] = 0, }; const uint8_t _PyUop_Replication[MAX_UOP_ID+1] = { @@ -464,6 +466,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_REPLACE_WITH_TRUE] = "_REPLACE_WITH_TRUE", [_RESUME_CHECK] = "_RESUME_CHECK", [_RETURN_GENERATOR] = "_RETURN_GENERATOR", + [_RETURN_OFFSET] = "_RETURN_OFFSET", [_SAVE_RETURN_OFFSET] = "_SAVE_RETURN_OFFSET", [_SETUP_ANNOTATIONS] = "_SETUP_ANNOTATIONS", [_SET_ADD] = "_SET_ADD", @@ -509,6 +512,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_UNPACK_SEQUENCE_TUPLE] = "_UNPACK_SEQUENCE_TUPLE", [_UNPACK_SEQUENCE_TWO_TUPLE] = "_UNPACK_SEQUENCE_TWO_TUPLE", [_WITH_EXCEPT_START] = "_WITH_EXCEPT_START", + [_YIELD_OFFSET] = "_YIELD_OFFSET", [_YIELD_VALUE] = "_YIELD_VALUE", }; int _PyUop_num_popped(int opcode, int oparg) @@ -996,6 +1000,10 @@ int _PyUop_num_popped(int opcode, int oparg) return oparg; case _TIER2_RESUME_CHECK: return 0; + case _RETURN_OFFSET: + return 0; + case _YIELD_OFFSET: + return 0; default: return -1; } diff --git a/Python/bytecodes.c b/Python/bytecodes.c index b2a0dc030e20cc..2e9b98370133df 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2589,8 +2589,7 @@ dummy_func( _PyErr_Clear(tstate); } /* iterator ended normally */ - assert(next_instr[oparg].op.code == END_FOR || - next_instr[oparg].op.code == INSTRUMENTED_END_FOR); + assert(base_opcode(_PyFrame_GetCode(frame), INSTR_OFFSET() + oparg) == END_FOR); Py_DECREF(iter); STACK_SHRINK(1); /* Jump forward oparg, then skip following END_FOR and POP_TOP instruction */ @@ -4351,6 +4350,14 @@ dummy_func( assert(tstate->tracing || eval_breaker == FT_ATOMIC_LOAD_UINTPTR_ACQUIRE(_PyFrame_GetCode(frame)->_co_instrumentation_version)); } + tier2 op(_RETURN_OFFSET, (--)) { + frame->instr_ptr += frame->return_offset; + } + + tier2 op(_YIELD_OFFSET, (--)) { + frame->instr_ptr += 1 + INLINE_CACHE_ENTRIES_SEND; + } + // END BYTECODES // } diff --git a/Python/ceval.c b/Python/ceval.c index 0d02a9887bef7a..e0f8b8e1fc4fa5 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -655,6 +655,18 @@ extern const struct _PyCode_DEF(8) _Py_InitCleanup; #ifdef Py_DEBUG extern void _PyUOpPrint(const _PyUOpInstruction *uop); + +static int +base_opcode(PyCodeObject *code, int offset) +{ + int opcode = _Py_GetBaseOpcode(code, offset); + if (opcode == ENTER_EXECUTOR) { + int oparg = _PyCode_CODE(code)[offset].op.arg; + _PyExecutorObject *ex = code->co_executors->executors[oparg]; + return ex->vm_data.opcode; + } + return opcode; +} #endif diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 5f15f67324292b..6cf901d2b72ebd 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -4487,4 +4487,14 @@ break; } + case _RETURN_OFFSET: { + frame->instr_ptr += frame->return_offset; + break; + } + + case _YIELD_OFFSET: { + frame->instr_ptr += 1 + INLINE_CACHE_ENTRIES_SEND; + break; + } + #undef TIER_TWO diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 87098b0506522f..769527d55bd22b 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -2788,8 +2788,7 @@ _PyErr_Clear(tstate); } /* iterator ended normally */ - assert(next_instr[oparg].op.code == END_FOR || - next_instr[oparg].op.code == INSTRUMENTED_END_FOR); + assert(base_opcode(_PyFrame_GetCode(frame), INSTR_OFFSET() + oparg) == END_FOR); Py_DECREF(iter); STACK_SHRINK(1); /* Jump forward oparg, then skip following END_FOR and POP_TOP instruction */ diff --git a/Python/optimizer.c b/Python/optimizer.c index 8be2c0ffbd78e9..4ddf3bcb1d4d4b 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -211,6 +211,9 @@ _PyOptimizer_Optimize( _PyInterpreterFrame *frame, _Py_CODEUNIT *start, PyObject **stack_pointer, _PyExecutorObject **executor_ptr) { + if (start->op.code == INTERPRETER_EXIT || start->op.code == EXIT_INIT_CHECK) { + return 0; + } PyCodeObject *code = _PyFrame_GetCode(frame); assert(PyCode_Check(code)); PyInterpreterState *interp = _PyInterpreterState_GET(); @@ -747,17 +750,6 @@ translate_bytecode_to_trace( // Reserve space for nuops (+ _SET_IP + _EXIT_TRACE) int nuops = expansion->nuops; RESERVE(nuops + 1); /* One extra for exit */ - int16_t last_op = expansion->uops[nuops-1].uop; - if (last_op == _POP_FRAME || last_op == _RETURN_GENERATOR || last_op == _YIELD_VALUE) { - // Check for trace stack underflow now: - // We can't bail e.g. in the middle of - // LOAD_CONST + _POP_FRAME. - if (trace_stack_depth == 0) { - DPRINTF(2, "Trace stack underflow\n"); - OPT_STAT_INC(trace_stack_underflow); - goto done; - } - } uint32_t orig_oparg = oparg; // For OPARG_TOP/BOTTOM for (int i = 0; i < nuops; i++) { oparg = orig_oparg; @@ -811,6 +803,19 @@ translate_bytecode_to_trace( } if (uop == _POP_FRAME || uop == _RETURN_GENERATOR || uop == _YIELD_VALUE) { + if (trace_stack_depth == 0) { + DPRINTF(2, "Trace stack underflow\n"); + OPT_STAT_INC(trace_stack_underflow); + ADD_TO_TRACE(uop, oparg, 0, target); + if (uop == _POP_FRAME || uop == _RETURN_GENERATOR) { + ADD_TO_TRACE(_RETURN_OFFSET, 0, 0, 0); + } + else { + ADD_TO_TRACE(_YIELD_OFFSET, 0, 0, 0); + } + ADD_TO_TRACE(_DYNAMIC_EXIT, 0, 0, 0); + goto done; + } TRACE_STACK_POP(); /* Set the operand to the function or code object returned to, * to assist optimization passes. (See _PUSH_FRAME below.) diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 928bc03382b8fb..37166fe475e84f 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -641,7 +641,9 @@ dummy_func(void) { op(_POP_FRAME, (retval -- res)) { SYNC_SP(); ctx->frame->stack_pointer = stack_pointer; - frame_pop(ctx); + if (frame_pop(ctx)) { + goto done; + } stack_pointer = ctx->frame->stack_pointer; res = retval; @@ -663,7 +665,9 @@ dummy_func(void) { op(_RETURN_GENERATOR, ( -- res)) { SYNC_SP(); ctx->frame->stack_pointer = stack_pointer; - frame_pop(ctx); + if (frame_pop(ctx)) { + goto done; + } stack_pointer = ctx->frame->stack_pointer; OUT_OF_SPACE_IF_NULL(res = sym_new_unknown(ctx)); diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 2a4efd73d794df..4b6ef160d0c348 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -629,7 +629,9 @@ retval = stack_pointer[-1]; stack_pointer += -1; ctx->frame->stack_pointer = stack_pointer; - frame_pop(ctx); + if (frame_pop(ctx)) { + goto done; + } stack_pointer = ctx->frame->stack_pointer; res = retval; /* Stack space handling */ @@ -1904,7 +1906,9 @@ case _RETURN_GENERATOR: { _Py_UopsSymbol *res; ctx->frame->stack_pointer = stack_pointer; - frame_pop(ctx); + if (frame_pop(ctx)) { + goto done; + } stack_pointer = ctx->frame->stack_pointer; OUT_OF_SPACE_IF_NULL(res = sym_new_unknown(ctx)); /* Stack space handling */ @@ -2199,3 +2203,11 @@ break; } + case _RETURN_OFFSET: { + break; + } + + case _YIELD_OFFSET: { + break; + } + diff --git a/Python/optimizer_symbols.c b/Python/optimizer_symbols.c index 4aeb04fe0405d2..3fea7c9b8d109c 100644 --- a/Python/optimizer_symbols.c +++ b/Python/optimizer_symbols.c @@ -384,9 +384,11 @@ _Py_uop_frame_pop(_Py_UOpsContext *ctx) _Py_UOpsAbstractFrame *frame = ctx->frame; ctx->n_consumed = frame->locals; ctx->curr_frame_depth--; - assert(ctx->curr_frame_depth >= 1); + if (ctx->curr_frame_depth == 0) { + ctx->frame = NULL; + return -1; + } ctx->frame = &ctx->frames[ctx->curr_frame_depth - 1]; - return 0; } diff --git a/Python/specialize.c b/Python/specialize.c index 9ac428c3593f56..2266adb3345183 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -2357,7 +2357,8 @@ _Py_Specialize_ForIter(PyObject *iter, _Py_CODEUNIT *instr, int oparg) } else if (tp == &PyGen_Type && oparg <= SHRT_MAX) { assert(instr[oparg + INLINE_CACHE_ENTRIES_FOR_ITER + 1].op.code == END_FOR || - instr[oparg + INLINE_CACHE_ENTRIES_FOR_ITER + 1].op.code == INSTRUMENTED_END_FOR + instr[oparg + INLINE_CACHE_ENTRIES_FOR_ITER + 1].op.code == INSTRUMENTED_END_FOR || + instr[oparg + INLINE_CACHE_ENTRIES_FOR_ITER + 1].op.code == ENTER_EXECUTOR ); if (_PyInterpreterState_GET()->eval_frame) { SPECIALIZATION_FAIL(FOR_ITER, SPEC_FAIL_OTHER);