diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index dd1bf2d1d2b51a..97d585dd2c0061 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1474,9 +1474,9 @@ _PyOpcode_macro_expansion[256] = { [STORE_ATTR_SLOT] = { .nuops = 2, .uops = { { _GUARD_TYPE_VERSION, 2, 1 }, { _STORE_ATTR_SLOT, 1, 3 } } }, [STORE_ATTR_WITH_HINT] = { .nuops = 2, .uops = { { _GUARD_TYPE_VERSION, 2, 1 }, { _STORE_ATTR_WITH_HINT, 1, 3 } } }, [STORE_DEREF] = { .nuops = 1, .uops = { { _STORE_DEREF, OPARG_SIMPLE, 0 } } }, - [STORE_FAST] = { .nuops = 1, .uops = { { _STORE_FAST, OPARG_SIMPLE, 0 } } }, - [STORE_FAST_LOAD_FAST] = { .nuops = 2, .uops = { { _STORE_FAST, OPARG_TOP, 0 }, { _LOAD_FAST, OPARG_BOTTOM, 0 } } }, - [STORE_FAST_STORE_FAST] = { .nuops = 2, .uops = { { _STORE_FAST, OPARG_TOP, 0 }, { _STORE_FAST, OPARG_BOTTOM, 0 } } }, + [STORE_FAST] = { .nuops = 2, .uops = { { _SWAP_FAST, OPARG_SIMPLE, 0 }, { _POP_TOP, OPARG_SIMPLE, 0 } } }, + [STORE_FAST_LOAD_FAST] = { .nuops = 3, .uops = { { _SWAP_FAST, OPARG_TOP, 0 }, { _POP_TOP, OPARG_TOP, 0 }, { _LOAD_FAST, OPARG_BOTTOM, 0 } } }, + [STORE_FAST_STORE_FAST] = { .nuops = 4, .uops = { { _SWAP_FAST, OPARG_TOP, 0 }, { _POP_TOP, OPARG_TOP, 0 }, { _SWAP_FAST, OPARG_BOTTOM, 0 }, { _POP_TOP, OPARG_BOTTOM, 0 } } }, [STORE_GLOBAL] = { .nuops = 1, .uops = { { _STORE_GLOBAL, OPARG_SIMPLE, 0 } } }, [STORE_NAME] = { .nuops = 1, .uops = { { _STORE_NAME, OPARG_SIMPLE, 0 } } }, [STORE_SLICE] = { .nuops = 1, .uops = { { _STORE_SLICE, OPARG_SIMPLE, 0 } } }, diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index aa11ddb75e19fb..29d66743be7bc9 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -284,72 +284,76 @@ extern "C" { #define _POP_JUMP_IF_FALSE 500 #define _POP_JUMP_IF_TRUE 501 #define _POP_TOP POP_TOP -#define _POP_TOP_LOAD_CONST_INLINE 502 -#define _POP_TOP_LOAD_CONST_INLINE_BORROW 503 -#define _POP_TWO 504 -#define _POP_TWO_LOAD_CONST_INLINE_BORROW 505 +#define _POP_TOP_FLOAT 502 +#define _POP_TOP_INT 503 +#define _POP_TOP_LOAD_CONST_INLINE 504 +#define _POP_TOP_LOAD_CONST_INLINE_BORROW 505 +#define _POP_TOP_NOP 506 +#define _POP_TOP_UNICODE 507 +#define _POP_TWO 508 +#define _POP_TWO_LOAD_CONST_INLINE_BORROW 509 #define _PUSH_EXC_INFO PUSH_EXC_INFO -#define _PUSH_FRAME 506 +#define _PUSH_FRAME 510 #define _PUSH_NULL PUSH_NULL -#define _PUSH_NULL_CONDITIONAL 507 -#define _PY_FRAME_GENERAL 508 -#define _PY_FRAME_KW 509 -#define _QUICKEN_RESUME 510 -#define _REPLACE_WITH_TRUE 511 +#define _PUSH_NULL_CONDITIONAL 511 +#define _PY_FRAME_GENERAL 512 +#define _PY_FRAME_KW 513 +#define _QUICKEN_RESUME 514 +#define _REPLACE_WITH_TRUE 515 #define _RESUME_CHECK RESUME_CHECK #define _RETURN_GENERATOR RETURN_GENERATOR #define _RETURN_VALUE RETURN_VALUE -#define _SAVE_RETURN_OFFSET 512 -#define _SEND 513 -#define _SEND_GEN_FRAME 514 +#define _SAVE_RETURN_OFFSET 516 +#define _SEND 517 +#define _SEND_GEN_FRAME 518 #define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS #define _SET_ADD SET_ADD #define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE #define _SET_UPDATE SET_UPDATE -#define _START_EXECUTOR 515 -#define _STORE_ATTR 516 -#define _STORE_ATTR_INSTANCE_VALUE 517 -#define _STORE_ATTR_SLOT 518 -#define _STORE_ATTR_WITH_HINT 519 +#define _START_EXECUTOR 519 +#define _STORE_ATTR 520 +#define _STORE_ATTR_INSTANCE_VALUE 521 +#define _STORE_ATTR_SLOT 522 +#define _STORE_ATTR_WITH_HINT 523 #define _STORE_DEREF STORE_DEREF -#define _STORE_FAST 520 -#define _STORE_FAST_0 521 -#define _STORE_FAST_1 522 -#define _STORE_FAST_2 523 -#define _STORE_FAST_3 524 -#define _STORE_FAST_4 525 -#define _STORE_FAST_5 526 -#define _STORE_FAST_6 527 -#define _STORE_FAST_7 528 #define _STORE_FAST_LOAD_FAST STORE_FAST_LOAD_FAST #define _STORE_FAST_STORE_FAST STORE_FAST_STORE_FAST #define _STORE_GLOBAL STORE_GLOBAL #define _STORE_NAME STORE_NAME -#define _STORE_SLICE 529 -#define _STORE_SUBSCR 530 -#define _STORE_SUBSCR_DICT 531 -#define _STORE_SUBSCR_LIST_INT 532 -#define _SWAP 533 -#define _SWAP_2 534 -#define _SWAP_3 535 -#define _TIER2_RESUME_CHECK 536 -#define _TO_BOOL 537 +#define _STORE_SLICE 524 +#define _STORE_SUBSCR 525 +#define _STORE_SUBSCR_DICT 526 +#define _STORE_SUBSCR_LIST_INT 527 +#define _SWAP 528 +#define _SWAP_2 529 +#define _SWAP_3 530 +#define _SWAP_FAST 531 +#define _SWAP_FAST_0 532 +#define _SWAP_FAST_1 533 +#define _SWAP_FAST_2 534 +#define _SWAP_FAST_3 535 +#define _SWAP_FAST_4 536 +#define _SWAP_FAST_5 537 +#define _SWAP_FAST_6 538 +#define _SWAP_FAST_7 539 +#define _TIER2_RESUME_CHECK 540 +#define _TO_BOOL 541 #define _TO_BOOL_BOOL TO_BOOL_BOOL #define _TO_BOOL_INT TO_BOOL_INT -#define _TO_BOOL_LIST 538 +#define _TO_BOOL_LIST 542 #define _TO_BOOL_NONE TO_BOOL_NONE -#define _TO_BOOL_STR 539 +#define _TO_BOOL_STR 543 #define _UNARY_INVERT UNARY_INVERT #define _UNARY_NEGATIVE UNARY_NEGATIVE #define _UNARY_NOT UNARY_NOT #define _UNPACK_EX UNPACK_EX -#define _UNPACK_SEQUENCE 540 -#define _UNPACK_SEQUENCE_LIST 541 -#define _UNPACK_SEQUENCE_TUPLE 542 -#define _UNPACK_SEQUENCE_TWO_TUPLE 543 +#define _UNPACK_SEQUENCE 544 +#define _UNPACK_SEQUENCE_LIST 545 +#define _UNPACK_SEQUENCE_TUPLE 546 +#define _UNPACK_SEQUENCE_TWO_TUPLE 547 #define _WITH_EXCEPT_START WITH_EXCEPT_START #define _YIELD_VALUE YIELD_VALUE -#define MAX_UOP_ID 543 +#define MAX_UOP_ID 547 #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 11345a00785817..1875b6a14d7dd4 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -52,18 +52,22 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_LOAD_SMALL_INT_2] = 0, [_LOAD_SMALL_INT_3] = 0, [_LOAD_SMALL_INT] = HAS_ARG_FLAG, - [_STORE_FAST_0] = HAS_LOCAL_FLAG | HAS_ESCAPES_FLAG, - [_STORE_FAST_1] = HAS_LOCAL_FLAG | HAS_ESCAPES_FLAG, - [_STORE_FAST_2] = HAS_LOCAL_FLAG | HAS_ESCAPES_FLAG, - [_STORE_FAST_3] = HAS_LOCAL_FLAG | HAS_ESCAPES_FLAG, - [_STORE_FAST_4] = HAS_LOCAL_FLAG | HAS_ESCAPES_FLAG, - [_STORE_FAST_5] = HAS_LOCAL_FLAG | HAS_ESCAPES_FLAG, - [_STORE_FAST_6] = HAS_LOCAL_FLAG | HAS_ESCAPES_FLAG, - [_STORE_FAST_7] = HAS_LOCAL_FLAG | HAS_ESCAPES_FLAG, - [_STORE_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ESCAPES_FLAG, + [_SWAP_FAST_0] = HAS_LOCAL_FLAG, + [_SWAP_FAST_1] = HAS_LOCAL_FLAG, + [_SWAP_FAST_2] = HAS_LOCAL_FLAG, + [_SWAP_FAST_3] = HAS_LOCAL_FLAG, + [_SWAP_FAST_4] = HAS_LOCAL_FLAG, + [_SWAP_FAST_5] = HAS_LOCAL_FLAG, + [_SWAP_FAST_6] = HAS_LOCAL_FLAG, + [_SWAP_FAST_7] = HAS_LOCAL_FLAG, + [_SWAP_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG, [_STORE_FAST_LOAD_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ESCAPES_FLAG, [_STORE_FAST_STORE_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ESCAPES_FLAG, [_POP_TOP] = HAS_ESCAPES_FLAG | HAS_PURE_FLAG, + [_POP_TOP_NOP] = 0, + [_POP_TOP_INT] = 0, + [_POP_TOP_FLOAT] = 0, + [_POP_TOP_UNICODE] = 0, [_POP_TWO] = HAS_ESCAPES_FLAG, [_PUSH_NULL] = HAS_PURE_FLAG, [_END_FOR] = HAS_ESCAPES_FLAG | HAS_NO_SAVE_IP_FLAG, @@ -338,7 +342,7 @@ const ReplicationRange _PyUop_Replication[MAX_UOP_ID+1] = { [_LOAD_FAST] = { 0, 8 }, [_LOAD_FAST_BORROW] = { 0, 8 }, [_LOAD_SMALL_INT] = { 0, 4 }, - [_STORE_FAST] = { 0, 8 }, + [_SWAP_FAST] = { 0, 8 }, [_INIT_CALL_PY_EXACT_ARGS] = { 0, 5 }, [_COPY] = { 1, 4 }, [_SWAP] = { 2, 4 }, @@ -593,8 +597,12 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_POP_EXCEPT] = "_POP_EXCEPT", [_POP_ITER] = "_POP_ITER", [_POP_TOP] = "_POP_TOP", + [_POP_TOP_FLOAT] = "_POP_TOP_FLOAT", + [_POP_TOP_INT] = "_POP_TOP_INT", [_POP_TOP_LOAD_CONST_INLINE] = "_POP_TOP_LOAD_CONST_INLINE", [_POP_TOP_LOAD_CONST_INLINE_BORROW] = "_POP_TOP_LOAD_CONST_INLINE_BORROW", + [_POP_TOP_NOP] = "_POP_TOP_NOP", + [_POP_TOP_UNICODE] = "_POP_TOP_UNICODE", [_POP_TWO] = "_POP_TWO", [_POP_TWO_LOAD_CONST_INLINE_BORROW] = "_POP_TWO_LOAD_CONST_INLINE_BORROW", [_PUSH_EXC_INFO] = "_PUSH_EXC_INFO", @@ -620,15 +628,6 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_STORE_ATTR_SLOT] = "_STORE_ATTR_SLOT", [_STORE_ATTR_WITH_HINT] = "_STORE_ATTR_WITH_HINT", [_STORE_DEREF] = "_STORE_DEREF", - [_STORE_FAST] = "_STORE_FAST", - [_STORE_FAST_0] = "_STORE_FAST_0", - [_STORE_FAST_1] = "_STORE_FAST_1", - [_STORE_FAST_2] = "_STORE_FAST_2", - [_STORE_FAST_3] = "_STORE_FAST_3", - [_STORE_FAST_4] = "_STORE_FAST_4", - [_STORE_FAST_5] = "_STORE_FAST_5", - [_STORE_FAST_6] = "_STORE_FAST_6", - [_STORE_FAST_7] = "_STORE_FAST_7", [_STORE_FAST_LOAD_FAST] = "_STORE_FAST_LOAD_FAST", [_STORE_FAST_STORE_FAST] = "_STORE_FAST_STORE_FAST", [_STORE_GLOBAL] = "_STORE_GLOBAL", @@ -640,6 +639,15 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_SWAP] = "_SWAP", [_SWAP_2] = "_SWAP_2", [_SWAP_3] = "_SWAP_3", + [_SWAP_FAST] = "_SWAP_FAST", + [_SWAP_FAST_0] = "_SWAP_FAST_0", + [_SWAP_FAST_1] = "_SWAP_FAST_1", + [_SWAP_FAST_2] = "_SWAP_FAST_2", + [_SWAP_FAST_3] = "_SWAP_FAST_3", + [_SWAP_FAST_4] = "_SWAP_FAST_4", + [_SWAP_FAST_5] = "_SWAP_FAST_5", + [_SWAP_FAST_6] = "_SWAP_FAST_6", + [_SWAP_FAST_7] = "_SWAP_FAST_7", [_TIER2_RESUME_CHECK] = "_TIER2_RESUME_CHECK", [_TO_BOOL] = "_TO_BOOL", [_TO_BOOL_BOOL] = "_TO_BOOL_BOOL", @@ -725,23 +733,23 @@ int _PyUop_num_popped(int opcode, int oparg) return 0; case _LOAD_SMALL_INT: return 0; - case _STORE_FAST_0: + case _SWAP_FAST_0: return 1; - case _STORE_FAST_1: + case _SWAP_FAST_1: return 1; - case _STORE_FAST_2: + case _SWAP_FAST_2: return 1; - case _STORE_FAST_3: + case _SWAP_FAST_3: return 1; - case _STORE_FAST_4: + case _SWAP_FAST_4: return 1; - case _STORE_FAST_5: + case _SWAP_FAST_5: return 1; - case _STORE_FAST_6: + case _SWAP_FAST_6: return 1; - case _STORE_FAST_7: + case _SWAP_FAST_7: return 1; - case _STORE_FAST: + case _SWAP_FAST: return 1; case _STORE_FAST_LOAD_FAST: return 1; @@ -749,6 +757,14 @@ int _PyUop_num_popped(int opcode, int oparg) return 2; case _POP_TOP: return 1; + case _POP_TOP_NOP: + return 1; + case _POP_TOP_INT: + return 1; + case _POP_TOP_FLOAT: + return 1; + case _POP_TOP_UNICODE: + return 1; case _POP_TWO: return 2; case _PUSH_NULL: diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 2b6934d747ebe0..0af7524611a949 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -2349,6 +2349,79 @@ def testfunc(n): assert ex is not None """)) + def test_store_fast_pop_top_specialize_immortal(self): + def testfunc(n): + for _ in range(n): + x = None # _POP_TOP, as x's type is not yet known by optimizer. + x = None # _POP_TOP_NOP, as x = None + + testfunc(TIER2_THRESHOLD) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + + self.assertIn("_POP_TOP_NOP", uops) + + def test_store_fast_pop_top_specialize_int(self): + def testfunc(n): + y = int(1e6) # Big number so no int caching + for _ in range(n): + x = y + y # _POP_TOP, as x's type is not yet known by optimizer. + x = None # _POP_TOP_INT, as x = int + + testfunc(TIER2_THRESHOLD) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + + self.assertIn("_POP_TOP_INT", uops) + + def test_store_fast_pop_top_specialize_float(self): + def testfunc(n): + y = 1.0 + for _ in range(n): + x = y + y # _POP_TOP, as x's type is not yet known by optimizer. + x = None # _POP_TOP_FLOAT, as x = int + + testfunc(TIER2_THRESHOLD) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + + self.assertIn("_POP_TOP_FLOAT", uops) + + def test_store_fast_pop_top_specialize_unicode(self): + def testfunc(n): + y = "hi" + for _ in range(n): + x = y + y # _POP_TOP, as x's type is not yet known by optimizer. + x = None # _POP_TOP_STR, as x = int + + testfunc(TIER2_THRESHOLD) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + + self.assertIn("_POP_TOP_UNICODE", uops) + + def test_store_pop_top_specialize_none(self): + def testfunc(n): + for _ in range(n): + global_identity(None) + + testfunc(TIER2_THRESHOLD) + + ex = get_first_executor(testfunc) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + + self.assertIn("_POP_TOP_NOP", uops) + + def global_identity(x): return x diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-20-14-50-44.gh-issue-134584.3CJdAI.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-20-14-50-44.gh-issue-134584.3CJdAI.rst new file mode 100644 index 00000000000000..dcd1420c4e6478 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-20-14-50-44.gh-issue-134584.3CJdAI.rst @@ -0,0 +1 @@ +Specialize ``POP_TOP`` and thus ``STORE_FAST`` in the JIT compiler by specializing for reference lifetime and type. This will also enable easier top of stack caching in the JIT compiler. diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 307844d38ccfcc..b1bb7d8de3c853 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -306,13 +306,14 @@ dummy_func( value = PyStackRef_FromPyObjectBorrow(obj); } - replicate(8) inst(STORE_FAST, (value --)) { - _PyStackRef tmp = GETLOCAL(oparg); + replicate(8) op(_SWAP_FAST, (value -- trash)) { + trash = GETLOCAL(oparg); GETLOCAL(oparg) = value; DEAD(value); - PyStackRef_XCLOSE(tmp); } + macro(STORE_FAST) = _SWAP_FAST + POP_TOP; + pseudo(STORE_FAST_MAYBE_NULL, (unused --)) = { STORE_FAST, }; @@ -344,6 +345,27 @@ dummy_func( PyStackRef_XCLOSE(value); } + op(_POP_TOP_NOP, (value --)) { + assert(PyStackRef_IsNull(value) || (!PyStackRef_RefcountOnObject(value)) || + _Py_IsImmortal((PyStackRef_AsPyObjectBorrow(value)))); + DEAD(value); + } + + op(_POP_TOP_INT, (value --)) { + assert(PyLong_CheckExact(PyStackRef_AsPyObjectBorrow(value))); + PyStackRef_CLOSE_SPECIALIZED(value, _PyLong_ExactDealloc); + } + + op(_POP_TOP_FLOAT, (value --)) { + assert(PyFloat_CheckExact(PyStackRef_AsPyObjectBorrow(value))); + PyStackRef_CLOSE_SPECIALIZED(value, _PyFloat_ExactDealloc); + } + + op(_POP_TOP_UNICODE, (value --)) { + assert(PyUnicode_CheckExact(PyStackRef_AsPyObjectBorrow(value))); + PyStackRef_CLOSE_SPECIALIZED(value, _PyUnicode_ExactDealloc); + } + tier2 op(_POP_TWO, (nos, tos --)) { PyStackRef_CLOSE(tos); PyStackRef_CLOSE(nos); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 8f506172550afe..3c5d66774c2f98 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -394,137 +394,110 @@ break; } - case _STORE_FAST_0: { + case _SWAP_FAST_0: { _PyStackRef value; + _PyStackRef trash; oparg = 0; assert(oparg == CURRENT_OPARG()); value = stack_pointer[-1]; - _PyStackRef tmp = GETLOCAL(oparg); + trash = GETLOCAL(oparg); GETLOCAL(oparg) = value; - stack_pointer += -1; - assert(WITHIN_STACK_BOUNDS()); - _PyFrame_SetStackPointer(frame, stack_pointer); - PyStackRef_XCLOSE(tmp); - stack_pointer = _PyFrame_GetStackPointer(frame); + stack_pointer[-1] = trash; break; } - case _STORE_FAST_1: { + case _SWAP_FAST_1: { _PyStackRef value; + _PyStackRef trash; oparg = 1; assert(oparg == CURRENT_OPARG()); value = stack_pointer[-1]; - _PyStackRef tmp = GETLOCAL(oparg); + trash = GETLOCAL(oparg); GETLOCAL(oparg) = value; - stack_pointer += -1; - assert(WITHIN_STACK_BOUNDS()); - _PyFrame_SetStackPointer(frame, stack_pointer); - PyStackRef_XCLOSE(tmp); - stack_pointer = _PyFrame_GetStackPointer(frame); + stack_pointer[-1] = trash; break; } - case _STORE_FAST_2: { + case _SWAP_FAST_2: { _PyStackRef value; + _PyStackRef trash; oparg = 2; assert(oparg == CURRENT_OPARG()); value = stack_pointer[-1]; - _PyStackRef tmp = GETLOCAL(oparg); + trash = GETLOCAL(oparg); GETLOCAL(oparg) = value; - stack_pointer += -1; - assert(WITHIN_STACK_BOUNDS()); - _PyFrame_SetStackPointer(frame, stack_pointer); - PyStackRef_XCLOSE(tmp); - stack_pointer = _PyFrame_GetStackPointer(frame); + stack_pointer[-1] = trash; break; } - case _STORE_FAST_3: { + case _SWAP_FAST_3: { _PyStackRef value; + _PyStackRef trash; oparg = 3; assert(oparg == CURRENT_OPARG()); value = stack_pointer[-1]; - _PyStackRef tmp = GETLOCAL(oparg); + trash = GETLOCAL(oparg); GETLOCAL(oparg) = value; - stack_pointer += -1; - assert(WITHIN_STACK_BOUNDS()); - _PyFrame_SetStackPointer(frame, stack_pointer); - PyStackRef_XCLOSE(tmp); - stack_pointer = _PyFrame_GetStackPointer(frame); + stack_pointer[-1] = trash; break; } - case _STORE_FAST_4: { + case _SWAP_FAST_4: { _PyStackRef value; + _PyStackRef trash; oparg = 4; assert(oparg == CURRENT_OPARG()); value = stack_pointer[-1]; - _PyStackRef tmp = GETLOCAL(oparg); + trash = GETLOCAL(oparg); GETLOCAL(oparg) = value; - stack_pointer += -1; - assert(WITHIN_STACK_BOUNDS()); - _PyFrame_SetStackPointer(frame, stack_pointer); - PyStackRef_XCLOSE(tmp); - stack_pointer = _PyFrame_GetStackPointer(frame); + stack_pointer[-1] = trash; break; } - case _STORE_FAST_5: { + case _SWAP_FAST_5: { _PyStackRef value; + _PyStackRef trash; oparg = 5; assert(oparg == CURRENT_OPARG()); value = stack_pointer[-1]; - _PyStackRef tmp = GETLOCAL(oparg); + trash = GETLOCAL(oparg); GETLOCAL(oparg) = value; - stack_pointer += -1; - assert(WITHIN_STACK_BOUNDS()); - _PyFrame_SetStackPointer(frame, stack_pointer); - PyStackRef_XCLOSE(tmp); - stack_pointer = _PyFrame_GetStackPointer(frame); + stack_pointer[-1] = trash; break; } - case _STORE_FAST_6: { + case _SWAP_FAST_6: { _PyStackRef value; + _PyStackRef trash; oparg = 6; assert(oparg == CURRENT_OPARG()); value = stack_pointer[-1]; - _PyStackRef tmp = GETLOCAL(oparg); + trash = GETLOCAL(oparg); GETLOCAL(oparg) = value; - stack_pointer += -1; - assert(WITHIN_STACK_BOUNDS()); - _PyFrame_SetStackPointer(frame, stack_pointer); - PyStackRef_XCLOSE(tmp); - stack_pointer = _PyFrame_GetStackPointer(frame); + stack_pointer[-1] = trash; break; } - case _STORE_FAST_7: { + case _SWAP_FAST_7: { _PyStackRef value; + _PyStackRef trash; oparg = 7; assert(oparg == CURRENT_OPARG()); value = stack_pointer[-1]; - _PyStackRef tmp = GETLOCAL(oparg); + trash = GETLOCAL(oparg); GETLOCAL(oparg) = value; - stack_pointer += -1; - assert(WITHIN_STACK_BOUNDS()); - _PyFrame_SetStackPointer(frame, stack_pointer); - PyStackRef_XCLOSE(tmp); - stack_pointer = _PyFrame_GetStackPointer(frame); + stack_pointer[-1] = trash; break; } - case _STORE_FAST: { + case _SWAP_FAST: { _PyStackRef value; + _PyStackRef trash; oparg = CURRENT_OPARG(); value = stack_pointer[-1]; - _PyStackRef tmp = GETLOCAL(oparg); + trash = GETLOCAL(oparg); GETLOCAL(oparg) = value; - stack_pointer += -1; - assert(WITHIN_STACK_BOUNDS()); - _PyFrame_SetStackPointer(frame, stack_pointer); - PyStackRef_XCLOSE(tmp); - stack_pointer = _PyFrame_GetStackPointer(frame); + stack_pointer[-1] = trash; break; } @@ -539,6 +512,46 @@ break; } + case _POP_TOP_NOP: { + _PyStackRef value; + value = stack_pointer[-1]; + assert(PyStackRef_IsNull(value) || (!PyStackRef_RefcountOnObject(value)) || + _Py_IsImmortal((PyStackRef_AsPyObjectBorrow(value)))); + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _POP_TOP_INT: { + _PyStackRef value; + value = stack_pointer[-1]; + assert(PyLong_CheckExact(PyStackRef_AsPyObjectBorrow(value))); + PyStackRef_CLOSE_SPECIALIZED(value, _PyLong_ExactDealloc); + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _POP_TOP_FLOAT: { + _PyStackRef value; + value = stack_pointer[-1]; + assert(PyFloat_CheckExact(PyStackRef_AsPyObjectBorrow(value))); + PyStackRef_CLOSE_SPECIALIZED(value, _PyFloat_ExactDealloc); + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _POP_TOP_UNICODE: { + _PyStackRef value; + value = stack_pointer[-1]; + assert(PyUnicode_CheckExact(PyStackRef_AsPyObjectBorrow(value))); + PyStackRef_CLOSE_SPECIALIZED(value, _PyUnicode_ExactDealloc); + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + case _POP_TWO: { _PyStackRef tos; _PyStackRef nos; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 8f7932f0033c6f..3e658bc27bb9f2 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -11148,14 +11148,22 @@ next_instr += 1; INSTRUCTION_STATS(STORE_FAST); _PyStackRef value; - value = stack_pointer[-1]; - _PyStackRef tmp = GETLOCAL(oparg); - GETLOCAL(oparg) = value; - stack_pointer += -1; - assert(WITHIN_STACK_BOUNDS()); - _PyFrame_SetStackPointer(frame, stack_pointer); - PyStackRef_XCLOSE(tmp); - stack_pointer = _PyFrame_GetStackPointer(frame); + _PyStackRef trash; + // _SWAP_FAST + { + value = stack_pointer[-1]; + trash = GETLOCAL(oparg); + GETLOCAL(oparg) = value; + } + // _POP_TOP + { + value = trash; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + _PyFrame_SetStackPointer(frame, stack_pointer); + PyStackRef_XCLOSE(value); + stack_pointer = _PyFrame_GetStackPointer(frame); + } DISPATCH(); } diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 8b1a63e3d2916f..145a8c118d3612 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -345,7 +345,7 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, #define sym_new_tuple _Py_uop_sym_new_tuple #define sym_tuple_getitem _Py_uop_sym_tuple_getitem #define sym_tuple_length _Py_uop_sym_tuple_length -#define sym_is_immortal _Py_uop_sym_is_immortal +#define sym_is_immortal _Py_uop_symbol_is_immortal #define sym_is_compact_int _Py_uop_sym_is_compact_int #define sym_new_compact_int _Py_uop_sym_new_compact_int #define sym_new_truthiness _Py_uop_sym_new_truthiness diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 2e240830013a46..2cf67c8482cace 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -34,7 +34,7 @@ typedef struct _Py_UOpsAbstractFrame _Py_UOpsAbstractFrame; #define sym_new_tuple _Py_uop_sym_new_tuple #define sym_tuple_getitem _Py_uop_sym_tuple_getitem #define sym_tuple_length _Py_uop_sym_tuple_length -#define sym_is_immortal _Py_uop_sym_is_immortal +#define sym_is_immortal _Py_uop_symbol_is_immortal #define sym_new_compact_int _Py_uop_sym_new_compact_int #define sym_is_compact_int _Py_uop_sym_is_compact_int #define sym_new_truthiness _Py_uop_sym_new_truthiness @@ -99,7 +99,8 @@ dummy_func(void) { GETLOCAL(oparg) = temp; } - op(_STORE_FAST, (value --)) { + op(_SWAP_FAST, (value -- trash)) { + trash = GETLOCAL(oparg); GETLOCAL(oparg) = value; } @@ -534,7 +535,7 @@ dummy_func(void) { } op(_LOAD_CONST_INLINE, (ptr/4 -- value)) { - value = PyJitRef_Borrow(sym_new_const(ctx, ptr)); + value = sym_new_const(ctx, ptr); } op(_LOAD_CONST_INLINE_BORROW, (ptr/4 -- value)) { @@ -542,7 +543,7 @@ dummy_func(void) { } op(_POP_TOP_LOAD_CONST_INLINE, (ptr/4, pop -- value)) { - value = PyJitRef_Borrow(sym_new_const(ctx, ptr)); + value = sym_new_const(ctx, ptr); } op(_POP_TOP_LOAD_CONST_INLINE_BORROW, (ptr/4, pop -- value)) { @@ -561,6 +562,24 @@ dummy_func(void) { value = PyJitRef_Borrow(sym_new_const(ctx, ptr)); } + op(_POP_TOP, (value -- )) { + PyTypeObject *typ = sym_get_type(value); + if (PyJitRef_IsBorrowed(value) || + sym_is_immortal(PyJitRef_Unwrap(value)) || + sym_is_null(value)) { + REPLACE_OP(this_instr, _POP_TOP_NOP, 0, 0); + } + else if (typ == &PyLong_Type) { + REPLACE_OP(this_instr, _POP_TOP_INT, 0, 0); + } + else if (typ == &PyFloat_Type) { + REPLACE_OP(this_instr, _POP_TOP_FLOAT, 0, 0); + } + else if (typ == &PyUnicode_Type) { + REPLACE_OP(this_instr, _POP_TOP_UNICODE, 0, 0); + } + } + op(_COPY, (bottom, unused[oparg-1] -- bottom, unused[oparg-1], top)) { assert(oparg > 0); top = bottom; @@ -803,7 +822,9 @@ dummy_func(void) { } op(_RETURN_VALUE, (retval -- res)) { - JitOptRef temp = retval; + // We wrap and unwrap the value to mimic PyStackRef_MakeHeapSafe + // in bytecodes.c + JitOptRef temp = PyJitRef_Wrap(PyJitRef_Unwrap(retval)); DEAD(retval); SAVE_STACK(); ctx->frame->stack_pointer = stack_pointer; diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 3cabf619fe81e7..59f765c68d57b4 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -90,16 +90,58 @@ break; } - case _STORE_FAST: { + case _SWAP_FAST: { JitOptRef value; + JitOptRef trash; value = stack_pointer[-1]; + trash = GETLOCAL(oparg); GETLOCAL(oparg) = value; + stack_pointer[-1] = trash; + break; + } + + case _POP_TOP: { + JitOptRef value; + value = stack_pointer[-1]; + PyTypeObject *typ = sym_get_type(value); + if (PyJitRef_IsBorrowed(value) || + sym_is_immortal(PyJitRef_Unwrap(value)) || + sym_is_null(value)) { + REPLACE_OP(this_instr, _POP_TOP_NOP, 0, 0); + } + else if (typ == &PyLong_Type) { + REPLACE_OP(this_instr, _POP_TOP_INT, 0, 0); + } + else if (typ == &PyFloat_Type) { + REPLACE_OP(this_instr, _POP_TOP_FLOAT, 0, 0); + } + else if (typ == &PyUnicode_Type) { + REPLACE_OP(this_instr, _POP_TOP_UNICODE, 0, 0); + } stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); break; } - case _POP_TOP: { + case _POP_TOP_NOP: { + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _POP_TOP_INT: { + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _POP_TOP_FLOAT: { + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _POP_TOP_UNICODE: { stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); break; @@ -784,7 +826,7 @@ JitOptRef retval; JitOptRef res; retval = stack_pointer[-1]; - JitOptRef temp = retval; + JitOptRef temp = PyJitRef_Wrap(PyJitRef_Unwrap(retval)); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); ctx->frame->stack_pointer = stack_pointer; @@ -2641,7 +2683,7 @@ case _LOAD_CONST_INLINE: { JitOptRef value; PyObject *ptr = (PyObject *)this_instr->operand0; - value = PyJitRef_Borrow(sym_new_const(ctx, ptr)); + value = sym_new_const(ctx, ptr); stack_pointer[0] = value; stack_pointer += 1; assert(WITHIN_STACK_BOUNDS()); @@ -2651,7 +2693,7 @@ case _POP_TOP_LOAD_CONST_INLINE: { JitOptRef value; PyObject *ptr = (PyObject *)this_instr->operand0; - value = PyJitRef_Borrow(sym_new_const(ctx, ptr)); + value = sym_new_const(ctx, ptr); stack_pointer[-1] = value; break; } diff --git a/Python/optimizer_symbols.c b/Python/optimizer_symbols.c index 64cc1b9074fcf0..c3d9e0e778bf55 100644 --- a/Python/optimizer_symbols.c +++ b/Python/optimizer_symbols.c @@ -668,9 +668,6 @@ _Py_uop_symbol_is_immortal(JitOptSymbol *sym) if (sym->tag == JIT_SYM_KNOWN_CLASS_TAG) { return sym->cls.type == &PyBool_Type; } - if (sym->tag == JIT_SYM_TRUTHINESS_TAG) { - return true; - } return false; } diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index 6ff0223d2ef3e7..6466d2615cd14e 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -596,6 +596,7 @@ def has_error_without_pop(op: parser.CodeDef) -> bool: "PyStackRef_IsNull", "PyStackRef_MakeHeapSafe", "PyStackRef_None", + "PyStackRef_RefcountOnObject", "PyStackRef_TYPE", "PyStackRef_True", "PyTuple_GET_ITEM", diff --git a/Tools/cases_generator/opcode_metadata_generator.py b/Tools/cases_generator/opcode_metadata_generator.py index 10567204dcc599..0bcdc5395dcd8e 100644 --- a/Tools/cases_generator/opcode_metadata_generator.py +++ b/Tools/cases_generator/opcode_metadata_generator.py @@ -242,14 +242,10 @@ def generate_expansion_table(analysis: Analysis, out: CWriter) -> None: assert name2 in analysis.instructions, f"{name2} doesn't match any instr" instr1 = analysis.instructions[name1] instr2 = analysis.instructions[name2] - assert ( - len(instr1.parts) == 1 - ), f"{name1} is not a good superinstruction part" - assert ( - len(instr2.parts) == 1 - ), f"{name2} is not a good superinstruction part" - expansions.append((instr1.parts[0].name, "OPARG_TOP", 0)) - expansions.append((instr2.parts[0].name, "OPARG_BOTTOM", 0)) + for part in instr1.parts: + expansions.append((part.name, "OPARG_TOP", 0)) + for part in instr2.parts: + expansions.append((part.name, "OPARG_BOTTOM", 0)) elif not is_viable_expansion(inst): continue else: