From 17c6f04ac69b5b0fa5a0c4db8b73469ae125a5aa Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Sun, 31 Dec 2023 21:19:10 +0000 Subject: [PATCH 1/4] More efficient set-ip and check-validility. Work in progress --- Include/internal/pycore_uop_ids.h | 3 +- Include/internal/pycore_uop_metadata.h | 2 + Python/bytecodes.c | 8 +++ Python/executor_cases.c.h | 9 ++++ Python/optimizer.c | 5 +- Python/optimizer_analysis.c | 75 +++++++++++++++++--------- 6 files changed, 74 insertions(+), 28 deletions(-) diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index 4a9a00ba352d33..8880c06790f5b2 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -231,7 +231,8 @@ extern "C" { #define _SAVE_RETURN_OFFSET 378 #define _INSERT 379 #define _CHECK_VALIDITY 380 -#define MAX_UOP_ID 380 +#define _CHECK_VALIDITY_AND_SET_IP 381 +#define MAX_UOP_ID 381 #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 300bd3baa7b377..c75c0e85ba9c3f 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -203,6 +203,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_EXIT_TRACE] = HAS_DEOPT_FLAG, [_INSERT] = HAS_ARG_FLAG, [_CHECK_VALIDITY] = HAS_DEOPT_FLAG, + [_CHECK_VALIDITY_AND_SET_IP] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG, }; const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { @@ -256,6 +257,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_CHECK_PEP_523] = "_CHECK_PEP_523", [_CHECK_STACK_SPACE] = "_CHECK_STACK_SPACE", [_CHECK_VALIDITY] = "_CHECK_VALIDITY", + [_CHECK_VALIDITY_AND_SET_IP] = "_CHECK_VALIDITY_AND_SET_IP", [_COMPARE_OP] = "_COMPARE_OP", [_COMPARE_OP_FLOAT] = "_COMPARE_OP_FLOAT", [_COMPARE_OP_INT] = "_COMPARE_OP_INT", diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 2eeeac53e1dd7e..c3efae54dae112 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -4065,6 +4065,14 @@ dummy_func( DEOPT_IF(!current_executor->base.vm_data.valid); } + // TO DO -- Support "super micro ops", so we can write + // op(_CHECK_VALIDITY_AND_SET_IP) = _CHECK_VALIDITY + _SET_IP; + op(_CHECK_VALIDITY_AND_SET_IP, (--)) { + TIER_TWO_ONLY + DEOPT_IF(!current_executor->base.vm_data.valid); + // TODO: Put the code pointer in `operand` to avoid indirection via `frame` + frame->instr_ptr = _PyCode_CODE(_PyFrame_GetCode(frame)) + oparg; + } // END BYTECODES // diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 14fb3a05a9f674..848155aa7e0984 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -3397,4 +3397,13 @@ break; } + case _CHECK_VALIDITY_AND_SET_IP: { + oparg = CURRENT_OPARG(); + TIER_TWO_ONLY + if (!current_executor->base.vm_data.valid) goto deoptimize; + // TODO: Put the code pointer in `operand` to avoid indirection via `frame` + frame->instr_ptr = _PyCode_CODE(_PyFrame_GetCode(frame)) + oparg; + break; + } + #undef TIER_TWO diff --git a/Python/optimizer.c b/Python/optimizer.c index f27af14d967cd3..3e7448b3a1a754 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -512,9 +512,8 @@ translate_bytecode_to_trace( top: // Jump here after _PUSH_FRAME or likely branches for (;;) { target = INSTR_IP(instr, code); - RESERVE_RAW(3, "epilogue"); // Always need space for _SET_IP, _CHECK_VALIDITY and _EXIT_TRACE - ADD_TO_TRACE(_SET_IP, target, 0, target); - ADD_TO_TRACE(_CHECK_VALIDITY, 0, 0, target); + RESERVE_RAW(2, "epilogue"); // Always need space for _SET_IP, _CHECK_VALIDITY and _EXIT_TRACE + ADD_TO_TRACE(_CHECK_VALIDITY_AND_SET_IP, target, 0, target); uint32_t opcode = instr->op.code; uint32_t oparg = instr->op.arg; diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 4eb2d9711f5e56..9611bf747c6daa 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -16,35 +16,62 @@ static void remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) { + /* Remove _SET_IP and _CHECK_VALIDITY where possible. + * _SET_IP is needed if the following instruction escapes or + * could error. _CHECK_VALIDITY is needed if the previous + * instruction could have escaped. */ int last_set_ip = -1; - bool maybe_invalid = false; + bool may_have_escaped = false; for (int pc = 0; pc < buffer_size; pc++) { int opcode = buffer[pc].opcode; - if (opcode == _SET_IP) { - buffer[pc].opcode = NOP; - last_set_ip = pc; - } - else if (opcode == _CHECK_VALIDITY) { - if (maybe_invalid) { - maybe_invalid = false; - } - else { + switch (opcode) { + case _SET_IP: buffer[pc].opcode = NOP; - } - } - else if (opcode == _JUMP_TO_TOP || opcode == _EXIT_TRACE) { - break; - } - else { - if (_PyUop_Flags[opcode] & HAS_ESCAPES_FLAG) { - maybe_invalid = true; - if (last_set_ip >= 0) { - buffer[last_set_ip].opcode = _SET_IP; + last_set_ip = pc; + break; + case _CHECK_VALIDITY: + if (may_have_escaped) { + may_have_escaped = false; } - } - if ((_PyUop_Flags[opcode] & HAS_ERROR_FLAG) || opcode == _PUSH_FRAME) { - if (last_set_ip >= 0) { - buffer[last_set_ip].opcode = _SET_IP; + else { + buffer[pc].opcode = NOP; + } + break; + case _CHECK_VALIDITY_AND_SET_IP: + if (may_have_escaped) { + may_have_escaped = false; + buffer[pc].opcode = _CHECK_VALIDITY; + } + else { + buffer[pc].opcode = NOP; + } + last_set_ip = pc; + break; + case _JUMP_TO_TOP: + case _EXIT_TRACE: + return; + default: + { + bool needs_ip = false; + if (_PyUop_Flags[opcode] & HAS_ESCAPES_FLAG) { + needs_ip = true; + may_have_escaped = true; + } + if (_PyUop_Flags[opcode] & HAS_ERROR_FLAG) { + needs_ip = true; + } + if (opcode == _PUSH_FRAME) { + needs_ip = true; + } + if (needs_ip && last_set_ip >= 0) { + if (buffer[last_set_ip].opcode == _CHECK_VALIDITY) { + buffer[last_set_ip].opcode = _CHECK_VALIDITY_AND_SET_IP; + } + else { + assert(buffer[last_set_ip].opcode == _NOP); + buffer[last_set_ip].opcode = _SET_IP; + } + last_set_ip = -1; } } } From 1c6ceafef171a937d1adc71862aa290503fc8d6c Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Sun, 31 Dec 2023 21:45:12 +0000 Subject: [PATCH 2/4] Store IP, not just offset in SET_IP operand. --- Include/internal/pycore_uop_metadata.h | 4 ++-- Python/bytecodes.c | 6 ++---- Python/executor_cases.c.h | 8 ++------ Python/optimizer.c | 3 ++- 4 files changed, 8 insertions(+), 13 deletions(-) diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index c75c0e85ba9c3f..2e28b155739b4c 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -198,12 +198,12 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_GUARD_IS_NONE_POP] = HAS_DEOPT_FLAG, [_GUARD_IS_NOT_NONE_POP] = HAS_DEOPT_FLAG, [_JUMP_TO_TOP] = HAS_EVAL_BREAK_FLAG, - [_SET_IP] = HAS_ARG_FLAG | HAS_ESCAPES_FLAG, + [_SET_IP] = 0, [_SAVE_RETURN_OFFSET] = HAS_ARG_FLAG, [_EXIT_TRACE] = HAS_DEOPT_FLAG, [_INSERT] = HAS_ARG_FLAG, [_CHECK_VALIDITY] = HAS_DEOPT_FLAG, - [_CHECK_VALIDITY_AND_SET_IP] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG, + [_CHECK_VALIDITY_AND_SET_IP] = HAS_DEOPT_FLAG, }; const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 3a77586073fcd4..57de3130984ecb 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -4038,8 +4038,7 @@ dummy_func( op(_SET_IP, (--)) { TIER_TWO_ONLY - // TODO: Put the code pointer in `operand` to avoid indirection via `frame` - frame->instr_ptr = _PyCode_CODE(_PyFrame_GetCode(frame)) + oparg; + frame->instr_ptr = (_Py_CODEUNIT *)CURRENT_OPERAND(); } op(_SAVE_RETURN_OFFSET, (--)) { @@ -4071,8 +4070,7 @@ dummy_func( op(_CHECK_VALIDITY_AND_SET_IP, (--)) { TIER_TWO_ONLY DEOPT_IF(!current_executor->base.vm_data.valid); - // TODO: Put the code pointer in `operand` to avoid indirection via `frame` - frame->instr_ptr = _PyCode_CODE(_PyFrame_GetCode(frame)) + oparg; + frame->instr_ptr = (_Py_CODEUNIT *)CURRENT_OPERAND(); } // END BYTECODES // diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 848155aa7e0984..d4b1680038b162 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -3357,10 +3357,8 @@ } case _SET_IP: { - oparg = CURRENT_OPARG(); TIER_TWO_ONLY - // TODO: Put the code pointer in `operand` to avoid indirection via `frame` - frame->instr_ptr = _PyCode_CODE(_PyFrame_GetCode(frame)) + oparg; + frame->instr_ptr = (_Py_CODEUNIT *)CURRENT_OPERAND(); break; } @@ -3398,11 +3396,9 @@ } case _CHECK_VALIDITY_AND_SET_IP: { - oparg = CURRENT_OPARG(); TIER_TWO_ONLY if (!current_executor->base.vm_data.valid) goto deoptimize; - // TODO: Put the code pointer in `operand` to avoid indirection via `frame` - frame->instr_ptr = _PyCode_CODE(_PyFrame_GetCode(frame)) + oparg; + frame->instr_ptr = (_Py_CODEUNIT *)CURRENT_OPERAND(); break; } diff --git a/Python/optimizer.c b/Python/optimizer.c index 3e7448b3a1a754..b35c2ded50ac90 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -513,7 +513,8 @@ translate_bytecode_to_trace( for (;;) { target = INSTR_IP(instr, code); RESERVE_RAW(2, "epilogue"); // Always need space for _SET_IP, _CHECK_VALIDITY and _EXIT_TRACE - ADD_TO_TRACE(_CHECK_VALIDITY_AND_SET_IP, target, 0, target); + uintptr_t ip_to_set = (uintptr_t)(_PyCode_CODE(code) + target); + ADD_TO_TRACE(_CHECK_VALIDITY_AND_SET_IP, 0, ip_to_set, target); uint32_t opcode = instr->op.code; uint32_t oparg = instr->op.arg; From 4d59a849899d4796ca119c64eb76a2192df9fdc2 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Fri, 9 Feb 2024 07:44:44 +0000 Subject: [PATCH 3/4] Fix _SET_IP --- Python/bytecodes.c | 10 ++++------ Python/executor_cases.c.h | 6 ++++-- Python/optimizer.c | 3 +-- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 57de3130984ecb..80a78f74d80b36 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -4036,9 +4036,9 @@ dummy_func( CHECK_EVAL_BREAKER(); } - op(_SET_IP, (--)) { + op(_SET_IP, (instr_ptr/4 --)) { TIER_TWO_ONLY - frame->instr_ptr = (_Py_CODEUNIT *)CURRENT_OPERAND(); + frame->instr_ptr = (_Py_CODEUNIT *)instr_ptr; } op(_SAVE_RETURN_OFFSET, (--)) { @@ -4065,12 +4065,10 @@ dummy_func( DEOPT_IF(!current_executor->base.vm_data.valid); } - // TO DO -- Support "super micro ops", so we can write - // op(_CHECK_VALIDITY_AND_SET_IP) = _CHECK_VALIDITY + _SET_IP; - op(_CHECK_VALIDITY_AND_SET_IP, (--)) { + op(_CHECK_VALIDITY_AND_SET_IP, (instr_ptr/4 --)) { TIER_TWO_ONLY DEOPT_IF(!current_executor->base.vm_data.valid); - frame->instr_ptr = (_Py_CODEUNIT *)CURRENT_OPERAND(); + frame->instr_ptr = instr_ptr; } // END BYTECODES // diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index d4b1680038b162..0e735c5d3fa3d0 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -3357,8 +3357,9 @@ } case _SET_IP: { + PyObject *instr_ptr = (PyObject *)CURRENT_OPERAND(); TIER_TWO_ONLY - frame->instr_ptr = (_Py_CODEUNIT *)CURRENT_OPERAND(); + frame->instr_ptr = (_Py_CODEUNIT *)instr_ptr; break; } @@ -3396,9 +3397,10 @@ } case _CHECK_VALIDITY_AND_SET_IP: { + PyObject *instr_ptr = (PyObject *)CURRENT_OPERAND(); TIER_TWO_ONLY if (!current_executor->base.vm_data.valid) goto deoptimize; - frame->instr_ptr = (_Py_CODEUNIT *)CURRENT_OPERAND(); + frame->instr_ptr = instr_ptr; break; } diff --git a/Python/optimizer.c b/Python/optimizer.c index b35c2ded50ac90..88b56e189eea3f 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -513,8 +513,7 @@ translate_bytecode_to_trace( for (;;) { target = INSTR_IP(instr, code); RESERVE_RAW(2, "epilogue"); // Always need space for _SET_IP, _CHECK_VALIDITY and _EXIT_TRACE - uintptr_t ip_to_set = (uintptr_t)(_PyCode_CODE(code) + target); - ADD_TO_TRACE(_CHECK_VALIDITY_AND_SET_IP, 0, ip_to_set, target); + ADD_TO_TRACE(_CHECK_VALIDITY_AND_SET_IP, 0, (uintptr_t)instr, target); uint32_t opcode = instr->op.code; uint32_t oparg = instr->op.arg; From c015afb1bacc13a88579db4bba058fd7d3eb3192 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Fri, 9 Feb 2024 11:02:21 +0000 Subject: [PATCH 4/4] Fix compiler warning --- Python/bytecodes.c | 2 +- Python/executor_cases.c.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index a9554c00b10536..d95f5c9437e80b 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -4099,7 +4099,7 @@ dummy_func( op(_CHECK_VALIDITY_AND_SET_IP, (instr_ptr/4 --)) { TIER_TWO_ONLY DEOPT_IF(!current_executor->vm_data.valid); - frame->instr_ptr = instr_ptr; + frame->instr_ptr = (_Py_CODEUNIT *)instr_ptr; } // END BYTECODES // diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 9bf88e5f48bb71..2db8929f49a796 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -3462,7 +3462,7 @@ PyObject *instr_ptr = (PyObject *)CURRENT_OPERAND(); TIER_TWO_ONLY if (!current_executor->vm_data.valid) goto deoptimize; - frame->instr_ptr = instr_ptr; + frame->instr_ptr = (_Py_CODEUNIT *)instr_ptr; break; }