From 30962c47e04500f2df153298e6097fad934cd4aa Mon Sep 17 00:00:00 2001 From: Matt Page Date: Tue, 26 Nov 2024 10:41:00 -0800 Subject: [PATCH 1/8] Specialize LOAD_ATTR_MODULE in free-threaded builds --- Include/internal/pycore_opcode_metadata.h | 4 +- Include/internal/pycore_uop_ids.h | 141 +++++++++++----------- Include/internal/pycore_uop_metadata.h | 16 ++- Python/bytecodes.c | 56 ++++++--- Python/executor_cases.c.h | 72 +++++++++-- Python/generated_cases.c.h | 35 ++++-- Python/optimizer_analysis.c | 2 +- Python/optimizer_bytecodes.c | 8 +- Python/optimizer_cases.c.h | 45 +++++-- Python/specialize.c | 69 +++++++---- 10 files changed, 294 insertions(+), 154 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 81dde66a6f26c2..28aa1120414337 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1492,7 +1492,7 @@ int _PyOpcode_max_stack_effect(int opcode, int oparg, int *effect) { return 0; } case LOAD_ATTR_MODULE: { - *effect = Py_MAX(0, (oparg & 1)); + *effect = Py_MAX(1, (oparg & 1)); return 0; } case LOAD_ATTR_NONDESCRIPTOR_NO_DICT: { @@ -2271,7 +2271,7 @@ _PyOpcode_macro_expansion[256] = { [LOAD_ATTR_METHOD_LAZY_DICT] = { .nuops = 3, .uops = { { _GUARD_TYPE_VERSION, 2, 1 }, { _CHECK_ATTR_METHOD_LAZY_DICT, 1, 3 }, { _LOAD_ATTR_METHOD_LAZY_DICT, 4, 5 } } }, [LOAD_ATTR_METHOD_NO_DICT] = { .nuops = 2, .uops = { { _GUARD_TYPE_VERSION, 2, 1 }, { _LOAD_ATTR_METHOD_NO_DICT, 4, 5 } } }, [LOAD_ATTR_METHOD_WITH_VALUES] = { .nuops = 4, .uops = { { _GUARD_TYPE_VERSION, 2, 1 }, { _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT, 0, 0 }, { _GUARD_KEYS_VERSION, 2, 3 }, { _LOAD_ATTR_METHOD_WITH_VALUES, 4, 5 } } }, - [LOAD_ATTR_MODULE] = { .nuops = 2, .uops = { { _CHECK_ATTR_MODULE, 2, 1 }, { _LOAD_ATTR_MODULE, 1, 3 } } }, + [LOAD_ATTR_MODULE] = { .nuops = 2, .uops = { { _CHECK_ATTR_MODULE_PUSH_KEYS, 2, 1 }, { _LOAD_ATTR_MODULE_FROM_KEYS, 1, 3 } } }, [LOAD_ATTR_NONDESCRIPTOR_NO_DICT] = { .nuops = 2, .uops = { { _GUARD_TYPE_VERSION, 2, 1 }, { _LOAD_ATTR_NONDESCRIPTOR_NO_DICT, 4, 5 } } }, [LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES] = { .nuops = 4, .uops = { { _GUARD_TYPE_VERSION, 2, 1 }, { _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT, 0, 0 }, { _GUARD_KEYS_VERSION, 2, 3 }, { _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES, 4, 5 } } }, [LOAD_ATTR_PROPERTY] = { .nuops = 5, .uops = { { _CHECK_PEP_523, 0, 0 }, { _GUARD_TYPE_VERSION, 2, 1 }, { _LOAD_ATTR_PROPERTY_FRAME, 4, 5 }, { _SAVE_RETURN_OFFSET, 7, 9 }, { _PUSH_FRAME, 0, 0 } } }, diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index fab4ce6a25b347..45563585dd5681 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -55,7 +55,7 @@ extern "C" { #define _CHECK_AND_ALLOCATE_OBJECT 327 #define _CHECK_ATTR_CLASS 328 #define _CHECK_ATTR_METHOD_LAZY_DICT 329 -#define _CHECK_ATTR_MODULE 330 +#define _CHECK_ATTR_MODULE_PUSH_KEYS 330 #define _CHECK_ATTR_WITH_HINT 331 #define _CHECK_CALL_BOUND_METHOD_EXACT_ARGS 332 #define _CHECK_EG_MATCH CHECK_EG_MATCH @@ -186,115 +186,116 @@ extern "C" { #define _LOAD_ATTR_METHOD_NO_DICT 416 #define _LOAD_ATTR_METHOD_WITH_VALUES 417 #define _LOAD_ATTR_MODULE 418 -#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 419 -#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 420 -#define _LOAD_ATTR_PROPERTY_FRAME 421 -#define _LOAD_ATTR_SLOT 422 -#define _LOAD_ATTR_SLOT_0 423 -#define _LOAD_ATTR_SLOT_1 424 -#define _LOAD_ATTR_WITH_HINT 425 +#define _LOAD_ATTR_MODULE_FROM_KEYS 419 +#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 420 +#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 421 +#define _LOAD_ATTR_PROPERTY_FRAME 422 +#define _LOAD_ATTR_SLOT 423 +#define _LOAD_ATTR_SLOT_0 424 +#define _LOAD_ATTR_SLOT_1 425 +#define _LOAD_ATTR_WITH_HINT 426 #define _LOAD_BUILD_CLASS LOAD_BUILD_CLASS -#define _LOAD_BYTECODE 426 +#define _LOAD_BYTECODE 427 #define _LOAD_COMMON_CONSTANT LOAD_COMMON_CONSTANT #define _LOAD_CONST LOAD_CONST #define _LOAD_CONST_IMMORTAL LOAD_CONST_IMMORTAL -#define _LOAD_CONST_INLINE 427 -#define _LOAD_CONST_INLINE_BORROW 428 -#define _LOAD_CONST_INLINE_BORROW_WITH_NULL 429 -#define _LOAD_CONST_INLINE_WITH_NULL 430 +#define _LOAD_CONST_INLINE 428 +#define _LOAD_CONST_INLINE_BORROW 429 +#define _LOAD_CONST_INLINE_BORROW_WITH_NULL 430 +#define _LOAD_CONST_INLINE_WITH_NULL 431 #define _LOAD_DEREF LOAD_DEREF -#define _LOAD_FAST 431 -#define _LOAD_FAST_0 432 -#define _LOAD_FAST_1 433 -#define _LOAD_FAST_2 434 -#define _LOAD_FAST_3 435 -#define _LOAD_FAST_4 436 -#define _LOAD_FAST_5 437 -#define _LOAD_FAST_6 438 -#define _LOAD_FAST_7 439 +#define _LOAD_FAST 432 +#define _LOAD_FAST_0 433 +#define _LOAD_FAST_1 434 +#define _LOAD_FAST_2 435 +#define _LOAD_FAST_3 436 +#define _LOAD_FAST_4 437 +#define _LOAD_FAST_5 438 +#define _LOAD_FAST_6 439 +#define _LOAD_FAST_7 440 #define _LOAD_FAST_AND_CLEAR LOAD_FAST_AND_CLEAR #define _LOAD_FAST_CHECK LOAD_FAST_CHECK #define _LOAD_FAST_LOAD_FAST LOAD_FAST_LOAD_FAST #define _LOAD_FROM_DICT_OR_DEREF LOAD_FROM_DICT_OR_DEREF #define _LOAD_FROM_DICT_OR_GLOBALS LOAD_FROM_DICT_OR_GLOBALS -#define _LOAD_GLOBAL 440 -#define _LOAD_GLOBAL_BUILTINS 441 -#define _LOAD_GLOBAL_BUILTINS_FROM_KEYS 442 -#define _LOAD_GLOBAL_MODULE 443 -#define _LOAD_GLOBAL_MODULE_FROM_KEYS 444 +#define _LOAD_GLOBAL 441 +#define _LOAD_GLOBAL_BUILTINS 442 +#define _LOAD_GLOBAL_BUILTINS_FROM_KEYS 443 +#define _LOAD_GLOBAL_MODULE 444 +#define _LOAD_GLOBAL_MODULE_FROM_KEYS 445 #define _LOAD_LOCALS LOAD_LOCALS #define _LOAD_NAME LOAD_NAME -#define _LOAD_SMALL_INT 445 -#define _LOAD_SMALL_INT_0 446 -#define _LOAD_SMALL_INT_1 447 -#define _LOAD_SMALL_INT_2 448 -#define _LOAD_SMALL_INT_3 449 +#define _LOAD_SMALL_INT 446 +#define _LOAD_SMALL_INT_0 447 +#define _LOAD_SMALL_INT_1 448 +#define _LOAD_SMALL_INT_2 449 +#define _LOAD_SMALL_INT_3 450 #define _LOAD_SPECIAL LOAD_SPECIAL #define _LOAD_SUPER_ATTR_ATTR LOAD_SUPER_ATTR_ATTR #define _LOAD_SUPER_ATTR_METHOD LOAD_SUPER_ATTR_METHOD -#define _MAKE_CALLARGS_A_TUPLE 450 +#define _MAKE_CALLARGS_A_TUPLE 451 #define _MAKE_CELL MAKE_CELL #define _MAKE_FUNCTION MAKE_FUNCTION -#define _MAKE_WARM 451 +#define _MAKE_WARM 452 #define _MAP_ADD MAP_ADD #define _MATCH_CLASS MATCH_CLASS #define _MATCH_KEYS MATCH_KEYS #define _MATCH_MAPPING MATCH_MAPPING #define _MATCH_SEQUENCE MATCH_SEQUENCE -#define _MAYBE_EXPAND_METHOD 452 -#define _MAYBE_EXPAND_METHOD_KW 453 -#define _MONITOR_CALL 454 -#define _MONITOR_JUMP_BACKWARD 455 -#define _MONITOR_RESUME 456 +#define _MAYBE_EXPAND_METHOD 453 +#define _MAYBE_EXPAND_METHOD_KW 454 +#define _MONITOR_CALL 455 +#define _MONITOR_JUMP_BACKWARD 456 +#define _MONITOR_RESUME 457 #define _NOP NOP #define _POP_EXCEPT POP_EXCEPT -#define _POP_JUMP_IF_FALSE 457 -#define _POP_JUMP_IF_TRUE 458 +#define _POP_JUMP_IF_FALSE 458 +#define _POP_JUMP_IF_TRUE 459 #define _POP_TOP POP_TOP -#define _POP_TOP_LOAD_CONST_INLINE_BORROW 459 +#define _POP_TOP_LOAD_CONST_INLINE_BORROW 460 #define _PUSH_EXC_INFO PUSH_EXC_INFO -#define _PUSH_FRAME 460 +#define _PUSH_FRAME 461 #define _PUSH_NULL PUSH_NULL -#define _PY_FRAME_GENERAL 461 -#define _PY_FRAME_KW 462 -#define _QUICKEN_RESUME 463 -#define _REPLACE_WITH_TRUE 464 +#define _PY_FRAME_GENERAL 462 +#define _PY_FRAME_KW 463 +#define _QUICKEN_RESUME 464 +#define _REPLACE_WITH_TRUE 465 #define _RESUME_CHECK RESUME_CHECK #define _RETURN_GENERATOR RETURN_GENERATOR #define _RETURN_VALUE RETURN_VALUE -#define _SAVE_RETURN_OFFSET 465 -#define _SEND 466 -#define _SEND_GEN_FRAME 467 +#define _SAVE_RETURN_OFFSET 466 +#define _SEND 467 +#define _SEND_GEN_FRAME 468 #define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS #define _SET_ADD SET_ADD #define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE #define _SET_UPDATE SET_UPDATE -#define _START_EXECUTOR 468 -#define _STORE_ATTR 469 -#define _STORE_ATTR_INSTANCE_VALUE 470 -#define _STORE_ATTR_SLOT 471 -#define _STORE_ATTR_WITH_HINT 472 +#define _START_EXECUTOR 469 +#define _STORE_ATTR 470 +#define _STORE_ATTR_INSTANCE_VALUE 471 +#define _STORE_ATTR_SLOT 472 +#define _STORE_ATTR_WITH_HINT 473 #define _STORE_DEREF STORE_DEREF -#define _STORE_FAST 473 -#define _STORE_FAST_0 474 -#define _STORE_FAST_1 475 -#define _STORE_FAST_2 476 -#define _STORE_FAST_3 477 -#define _STORE_FAST_4 478 -#define _STORE_FAST_5 479 -#define _STORE_FAST_6 480 -#define _STORE_FAST_7 481 +#define _STORE_FAST 474 +#define _STORE_FAST_0 475 +#define _STORE_FAST_1 476 +#define _STORE_FAST_2 477 +#define _STORE_FAST_3 478 +#define _STORE_FAST_4 479 +#define _STORE_FAST_5 480 +#define _STORE_FAST_6 481 +#define _STORE_FAST_7 482 #define _STORE_FAST_LOAD_FAST STORE_FAST_LOAD_FAST #define _STORE_FAST_STORE_FAST STORE_FAST_STORE_FAST #define _STORE_GLOBAL STORE_GLOBAL #define _STORE_NAME STORE_NAME -#define _STORE_SLICE 482 -#define _STORE_SUBSCR 483 +#define _STORE_SLICE 483 +#define _STORE_SUBSCR 484 #define _STORE_SUBSCR_DICT STORE_SUBSCR_DICT #define _STORE_SUBSCR_LIST_INT STORE_SUBSCR_LIST_INT #define _SWAP SWAP -#define _TIER2_RESUME_CHECK 484 -#define _TO_BOOL 485 +#define _TIER2_RESUME_CHECK 485 +#define _TO_BOOL 486 #define _TO_BOOL_BOOL TO_BOOL_BOOL #define _TO_BOOL_INT TO_BOOL_INT #define _TO_BOOL_LIST TO_BOOL_LIST @@ -304,13 +305,13 @@ extern "C" { #define _UNARY_NEGATIVE UNARY_NEGATIVE #define _UNARY_NOT UNARY_NOT #define _UNPACK_EX UNPACK_EX -#define _UNPACK_SEQUENCE 486 +#define _UNPACK_SEQUENCE 487 #define _UNPACK_SEQUENCE_LIST UNPACK_SEQUENCE_LIST #define _UNPACK_SEQUENCE_TUPLE UNPACK_SEQUENCE_TUPLE #define _UNPACK_SEQUENCE_TWO_TUPLE UNPACK_SEQUENCE_TWO_TUPLE #define _WITH_EXCEPT_START WITH_EXCEPT_START #define _YIELD_VALUE YIELD_VALUE -#define MAX_UOP_ID 486 +#define MAX_UOP_ID 487 #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 89fce193f40bd8..dd775d3f7d3cdd 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -152,8 +152,8 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_LOAD_ATTR_INSTANCE_VALUE_0] = HAS_DEOPT_FLAG, [_LOAD_ATTR_INSTANCE_VALUE_1] = HAS_DEOPT_FLAG, [_LOAD_ATTR_INSTANCE_VALUE] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_OPARG_AND_1_FLAG, - [_CHECK_ATTR_MODULE] = HAS_DEOPT_FLAG, - [_LOAD_ATTR_MODULE] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, + [_CHECK_ATTR_MODULE_PUSH_KEYS] = HAS_DEOPT_FLAG, + [_LOAD_ATTR_MODULE_FROM_KEYS] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, [_CHECK_ATTR_WITH_HINT] = HAS_EXIT_FLAG, [_LOAD_ATTR_WITH_HINT] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_DEOPT_FLAG, [_LOAD_ATTR_SLOT_0] = HAS_DEOPT_FLAG, @@ -283,6 +283,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_CHECK_FUNCTION] = HAS_DEOPT_FLAG, [_LOAD_GLOBAL_MODULE] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, [_LOAD_GLOBAL_BUILTINS] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, + [_LOAD_ATTR_MODULE] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, [_INTERNAL_INCREMENT_OPT_COUNTER] = 0, [_DYNAMIC_EXIT] = HAS_ESCAPES_FLAG, [_START_EXECUTOR] = 0, @@ -346,7 +347,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_CHECK_AND_ALLOCATE_OBJECT] = "_CHECK_AND_ALLOCATE_OBJECT", [_CHECK_ATTR_CLASS] = "_CHECK_ATTR_CLASS", [_CHECK_ATTR_METHOD_LAZY_DICT] = "_CHECK_ATTR_METHOD_LAZY_DICT", - [_CHECK_ATTR_MODULE] = "_CHECK_ATTR_MODULE", + [_CHECK_ATTR_MODULE_PUSH_KEYS] = "_CHECK_ATTR_MODULE_PUSH_KEYS", [_CHECK_ATTR_WITH_HINT] = "_CHECK_ATTR_WITH_HINT", [_CHECK_CALL_BOUND_METHOD_EXACT_ARGS] = "_CHECK_CALL_BOUND_METHOD_EXACT_ARGS", [_CHECK_EG_MATCH] = "_CHECK_EG_MATCH", @@ -459,6 +460,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_LOAD_ATTR_METHOD_NO_DICT] = "_LOAD_ATTR_METHOD_NO_DICT", [_LOAD_ATTR_METHOD_WITH_VALUES] = "_LOAD_ATTR_METHOD_WITH_VALUES", [_LOAD_ATTR_MODULE] = "_LOAD_ATTR_MODULE", + [_LOAD_ATTR_MODULE_FROM_KEYS] = "_LOAD_ATTR_MODULE_FROM_KEYS", [_LOAD_ATTR_NONDESCRIPTOR_NO_DICT] = "_LOAD_ATTR_NONDESCRIPTOR_NO_DICT", [_LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES] = "_LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES", [_LOAD_ATTR_PROPERTY_FRAME] = "_LOAD_ATTR_PROPERTY_FRAME", @@ -845,10 +847,10 @@ int _PyUop_num_popped(int opcode, int oparg) return 1; case _LOAD_ATTR_INSTANCE_VALUE: return 1; - case _CHECK_ATTR_MODULE: + case _CHECK_ATTR_MODULE_PUSH_KEYS: return 0; - case _LOAD_ATTR_MODULE: - return 1; + case _LOAD_ATTR_MODULE_FROM_KEYS: + return 2; case _CHECK_ATTR_WITH_HINT: return 0; case _LOAD_ATTR_WITH_HINT: @@ -1107,6 +1109,8 @@ int _PyUop_num_popped(int opcode, int oparg) return 0; case _LOAD_GLOBAL_BUILTINS: return 0; + case _LOAD_ATTR_MODULE: + return 1; case _INTERNAL_INCREMENT_OPT_COUNTER: return 1; case _DYNAMIC_EXIT: diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 3d280941b35244..1942eb3bb4039d 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2069,7 +2069,7 @@ dummy_func( }; specializing op(_SPECIALIZE_LOAD_ATTR, (counter/1, owner -- owner)) { - #if ENABLE_SPECIALIZATION + #if ENABLE_SPECIALIZATION_FT if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { PyObject *name = GETITEM(FRAME_CO_NAMES, oparg>>1); next_instr = this_instr; @@ -2078,7 +2078,7 @@ dummy_func( } OPCODE_DEFERRED_INC(LOAD_ATTR); ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); - #endif /* ENABLE_SPECIALIZATION */ + #endif /* ENABLE_SPECIALIZATION_FT */ } op(_LOAD_ATTR, (owner -- attr, self_or_null if (oparg & 1))) { @@ -2157,33 +2157,44 @@ dummy_func( _LOAD_ATTR_INSTANCE_VALUE + unused/5; // Skip over rest of cache - op(_CHECK_ATTR_MODULE, (dict_version/2, owner -- owner)) { + op(_CHECK_ATTR_MODULE_PUSH_KEYS, (dict_version/2, owner -- owner, mod_keys: PyDictKeysObject *)) { PyObject *owner_o = PyStackRef_AsPyObjectBorrow(owner); DEOPT_IF(Py_TYPE(owner_o)->tp_getattro != PyModule_Type.tp_getattro); PyDictObject *dict = (PyDictObject *)((PyModuleObject *)owner_o)->md_dict; assert(dict != NULL); - DEOPT_IF(dict->ma_keys->dk_version != dict_version); + PyDictKeysObject *keys = FT_ATOMIC_LOAD_PTR_ACQUIRE(dict->ma_keys); + DEOPT_IF(FT_ATOMIC_LOAD_UINT32_RELAXED(keys->dk_version) != dict_version); + mod_keys = keys; } - op(_LOAD_ATTR_MODULE, (index/1, owner -- attr, null if (oparg & 1))) { - PyObject *owner_o = PyStackRef_AsPyObjectBorrow(owner); - PyDictObject *dict = (PyDictObject *)((PyModuleObject *)owner_o)->md_dict; - assert(dict->ma_keys->dk_kind == DICT_KEYS_UNICODE); - assert(index < dict->ma_keys->dk_nentries); - PyDictUnicodeEntry *ep = DK_UNICODE_ENTRIES(dict->ma_keys) + index; - PyObject *attr_o = ep->me_value; + op(_LOAD_ATTR_MODULE_FROM_KEYS, (index/1, owner, mod_keys: PyDictKeysObject* -- attr, null if (oparg & 1))) { + assert(mod_keys->dk_kind == DICT_KEYS_UNICODE); + assert(index < FT_ATOMIC_LOAD_SSIZE_RELAXED(mod_keys->dk_nentries)); + PyDictUnicodeEntry *ep = DK_UNICODE_ENTRIES(mod_keys) + index; + PyObject *attr_o = FT_ATOMIC_LOAD_PTR_RELAXED(ep->me_value); + DEAD(mod_keys); + // Clear mod_keys from stack in case we need to deopt + SAVE_STACK(); + RELOAD_STACK(); DEOPT_IF(attr_o == NULL); - STAT_INC(LOAD_ATTR, hit); + #ifdef Py_GIL_DISABLED + int increfed = _Py_TryIncrefCompareStackRef(&ep->me_value, attr_o, &attr); + if (!increfed) { + DEOPT_IF(true); + } + #else Py_INCREF(attr_o); attr = PyStackRef_FromPyObjectSteal(attr_o); + #endif + STAT_INC(LOAD_ATTR, hit); null = PyStackRef_NULL; - DECREF_INPUTS(); + PyStackRef_CLOSE(owner); } macro(LOAD_ATTR_MODULE) = unused/1 + - _CHECK_ATTR_MODULE + - _LOAD_ATTR_MODULE + + _CHECK_ATTR_MODULE_PUSH_KEYS + + _LOAD_ATTR_MODULE_FROM_KEYS + unused/5; op(_CHECK_ATTR_WITH_HINT, (owner -- owner)) { @@ -4961,6 +4972,21 @@ dummy_func( null = PyStackRef_NULL; } + tier2 op(_LOAD_ATTR_MODULE, (index/1, owner -- attr, null if (oparg & 1))) { + PyObject *owner_o = PyStackRef_AsPyObjectBorrow(owner); + PyDictObject *dict = (PyDictObject *)((PyModuleObject *)owner_o)->md_dict; + assert(dict->ma_keys->dk_kind == DICT_KEYS_UNICODE); + assert(index < dict->ma_keys->dk_nentries); + PyDictUnicodeEntry *ep = DK_UNICODE_ENTRIES(dict->ma_keys) + index; + PyObject *attr_o = ep->me_value; + DEOPT_IF(attr_o == NULL); + STAT_INC(LOAD_ATTR, hit); + Py_INCREF(attr_o); + attr = PyStackRef_FromPyObjectSteal(attr_o); + null = PyStackRef_NULL; + DECREF_INPUTS(); + } + /* Internal -- for testing executors */ op(_INTERNAL_INCREMENT_OPT_COUNTER, (opt --)) { _PyCounterOptimizerObject *exe = (_PyCounterOptimizerObject *)PyStackRef_AsPyObjectBorrow(opt); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 987ff2e6419669..8f762c6c1c1d49 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -2641,8 +2641,9 @@ /* _LOAD_ATTR_INSTANCE_VALUE is split on (oparg & 1) */ - case _CHECK_ATTR_MODULE: { + case _CHECK_ATTR_MODULE_PUSH_KEYS: { _PyStackRef owner; + PyDictKeysObject *mod_keys; owner = stack_pointer[-1]; uint32_t dict_version = (uint32_t)CURRENT_OPERAND0(); PyObject *owner_o = PyStackRef_AsPyObjectBorrow(owner); @@ -2652,33 +2653,53 @@ } PyDictObject *dict = (PyDictObject *)((PyModuleObject *)owner_o)->md_dict; assert(dict != NULL); - if (dict->ma_keys->dk_version != dict_version) { + PyDictKeysObject *keys = FT_ATOMIC_LOAD_PTR_ACQUIRE(dict->ma_keys); + if (FT_ATOMIC_LOAD_UINT32_RELAXED(keys->dk_version) != dict_version) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } + mod_keys = keys; + stack_pointer[0].bits = (uintptr_t)mod_keys; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); break; } - case _LOAD_ATTR_MODULE: { + case _LOAD_ATTR_MODULE_FROM_KEYS: { + PyDictKeysObject *mod_keys; _PyStackRef owner; _PyStackRef attr; _PyStackRef null = PyStackRef_NULL; oparg = CURRENT_OPARG(); - owner = stack_pointer[-1]; + mod_keys = (PyDictKeysObject *)stack_pointer[-1].bits; + owner = stack_pointer[-2]; uint16_t index = (uint16_t)CURRENT_OPERAND0(); - PyObject *owner_o = PyStackRef_AsPyObjectBorrow(owner); - PyDictObject *dict = (PyDictObject *)((PyModuleObject *)owner_o)->md_dict; - assert(dict->ma_keys->dk_kind == DICT_KEYS_UNICODE); - assert(index < dict->ma_keys->dk_nentries); - PyDictUnicodeEntry *ep = DK_UNICODE_ENTRIES(dict->ma_keys) + index; - PyObject *attr_o = ep->me_value; + assert(mod_keys->dk_kind == DICT_KEYS_UNICODE); + assert(index < FT_ATOMIC_LOAD_SSIZE_RELAXED(mod_keys->dk_nentries)); + PyDictUnicodeEntry *ep = DK_UNICODE_ENTRIES(mod_keys) + index; + PyObject *attr_o = FT_ATOMIC_LOAD_PTR_RELAXED(ep->me_value); + // Clear mod_keys from stack in case we need to deopt + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + _PyFrame_SetStackPointer(frame, stack_pointer); + stack_pointer = _PyFrame_GetStackPointer(frame); if (attr_o == NULL) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } - STAT_INC(LOAD_ATTR, hit); + #ifdef Py_GIL_DISABLED + int increfed = _Py_TryIncrefCompareStackRef(&ep->me_value, attr_o, &attr); + if (!increfed) { + if (true) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } + } + #else Py_INCREF(attr_o); attr = PyStackRef_FromPyObjectSteal(attr_o); + #endif + STAT_INC(LOAD_ATTR, hit); null = PyStackRef_NULL; PyStackRef_CLOSE(owner); stack_pointer[-1] = attr; @@ -5925,6 +5946,35 @@ break; } + case _LOAD_ATTR_MODULE: { + _PyStackRef owner; + _PyStackRef attr; + _PyStackRef null = PyStackRef_NULL; + oparg = CURRENT_OPARG(); + owner = stack_pointer[-1]; + uint16_t index = (uint16_t)CURRENT_OPERAND0(); + PyObject *owner_o = PyStackRef_AsPyObjectBorrow(owner); + PyDictObject *dict = (PyDictObject *)((PyModuleObject *)owner_o)->md_dict; + assert(dict->ma_keys->dk_kind == DICT_KEYS_UNICODE); + assert(index < dict->ma_keys->dk_nentries); + PyDictUnicodeEntry *ep = DK_UNICODE_ENTRIES(dict->ma_keys) + index; + PyObject *attr_o = ep->me_value; + if (attr_o == NULL) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } + STAT_INC(LOAD_ATTR, hit); + Py_INCREF(attr_o); + attr = PyStackRef_FromPyObjectSteal(attr_o); + null = PyStackRef_NULL; + PyStackRef_CLOSE(owner); + stack_pointer[-1] = attr; + if (oparg & 1) stack_pointer[0] = null; + stack_pointer += (oparg & 1); + assert(WITHIN_STACK_BOUNDS()); + break; + } + case _INTERNAL_INCREMENT_OPT_COUNTER: { _PyStackRef opt; opt = stack_pointer[-1]; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 33f32aba1e5145..6a2ce425f3c4c6 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -5204,7 +5204,7 @@ owner = stack_pointer[-1]; uint16_t counter = read_u16(&this_instr[1].cache); (void)counter; - #if ENABLE_SPECIALIZATION + #if ENABLE_SPECIALIZATION_FT if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { PyObject *name = GETITEM(FRAME_CO_NAMES, oparg>>1); next_instr = this_instr; @@ -5215,7 +5215,7 @@ } OPCODE_DEFERRED_INC(LOAD_ATTR); ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); - #endif /* ENABLE_SPECIALIZATION */ + #endif /* ENABLE_SPECIALIZATION_FT */ } /* Skip 8 cache entries */ // _LOAD_ATTR @@ -5557,10 +5557,11 @@ INSTRUCTION_STATS(LOAD_ATTR_MODULE); static_assert(INLINE_CACHE_ENTRIES_LOAD_ATTR == 9, "incorrect cache size"); _PyStackRef owner; + PyDictKeysObject *mod_keys; _PyStackRef attr; _PyStackRef null = PyStackRef_NULL; /* Skip 1 cache entry */ - // _CHECK_ATTR_MODULE + // _CHECK_ATTR_MODULE_PUSH_KEYS { owner = stack_pointer[-1]; uint32_t dict_version = read_u32(&this_instr[2].cache); @@ -5568,21 +5569,31 @@ DEOPT_IF(Py_TYPE(owner_o)->tp_getattro != PyModule_Type.tp_getattro, LOAD_ATTR); PyDictObject *dict = (PyDictObject *)((PyModuleObject *)owner_o)->md_dict; assert(dict != NULL); - DEOPT_IF(dict->ma_keys->dk_version != dict_version, LOAD_ATTR); + PyDictKeysObject *keys = FT_ATOMIC_LOAD_PTR_ACQUIRE(dict->ma_keys); + DEOPT_IF(FT_ATOMIC_LOAD_UINT32_RELAXED(keys->dk_version) != dict_version, LOAD_ATTR); + mod_keys = keys; } - // _LOAD_ATTR_MODULE + // _LOAD_ATTR_MODULE_FROM_KEYS { uint16_t index = read_u16(&this_instr[4].cache); - PyObject *owner_o = PyStackRef_AsPyObjectBorrow(owner); - PyDictObject *dict = (PyDictObject *)((PyModuleObject *)owner_o)->md_dict; - assert(dict->ma_keys->dk_kind == DICT_KEYS_UNICODE); - assert(index < dict->ma_keys->dk_nentries); - PyDictUnicodeEntry *ep = DK_UNICODE_ENTRIES(dict->ma_keys) + index; - PyObject *attr_o = ep->me_value; + assert(mod_keys->dk_kind == DICT_KEYS_UNICODE); + assert(index < FT_ATOMIC_LOAD_SSIZE_RELAXED(mod_keys->dk_nentries)); + PyDictUnicodeEntry *ep = DK_UNICODE_ENTRIES(mod_keys) + index; + PyObject *attr_o = FT_ATOMIC_LOAD_PTR_RELAXED(ep->me_value); + // Clear mod_keys from stack in case we need to deopt + _PyFrame_SetStackPointer(frame, stack_pointer); + stack_pointer = _PyFrame_GetStackPointer(frame); DEOPT_IF(attr_o == NULL, LOAD_ATTR); - STAT_INC(LOAD_ATTR, hit); + #ifdef Py_GIL_DISABLED + int increfed = _Py_TryIncrefCompareStackRef(&ep->me_value, attr_o, &attr); + if (!increfed) { + DEOPT_IF(true, LOAD_ATTR); + } + #else Py_INCREF(attr_o); attr = PyStackRef_FromPyObjectSteal(attr_o); + #endif + STAT_INC(LOAD_ATTR, hit); null = PyStackRef_NULL; PyStackRef_CLOSE(owner); } diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index a4a0472b64e57c..0ef15c630e91db 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -95,7 +95,7 @@ type_watcher_callback(PyTypeObject* type) static PyObject * convert_global_to_const(_PyUOpInstruction *inst, PyObject *obj) { - assert(inst->opcode == _LOAD_GLOBAL_MODULE || inst->opcode == _LOAD_GLOBAL_BUILTINS || inst->opcode == _LOAD_ATTR_MODULE); + assert(inst->opcode == _LOAD_GLOBAL_MODULE || inst->opcode == _LOAD_GLOBAL_BUILTINS || inst->opcode == _LOAD_ATTR_MODULE_FROM_KEYS); assert(PyDict_CheckExact(obj)); PyDictObject *dict = (PyDictObject *)obj; assert(dict->ma_keys->dk_kind == DICT_KEYS_UNICODE); diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 42bdbd9ca8d0cd..be52d3862d7c40 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -492,8 +492,9 @@ dummy_func(void) { (void)owner; } - op(_CHECK_ATTR_MODULE, (dict_version/2, owner -- owner)) { + op(_CHECK_ATTR_MODULE_PUSH_KEYS, (dict_version/2, owner -- owner, mod_keys)) { (void)dict_version; + mod_keys = sym_new_not_null(ctx); if (sym_is_const(owner)) { PyObject *cnst = sym_get_const(owner); if (PyModule_CheckExact(cnst)) { @@ -515,7 +516,7 @@ dummy_func(void) { self_or_null = sym_new_unknown(ctx); } - op(_LOAD_ATTR_MODULE, (index/1, owner -- attr, null if (oparg & 1))) { + op(_LOAD_ATTR_MODULE_FROM_KEYS, (index/1, owner, mod_keys -- attr, null if (oparg & 1))) { (void)index; null = sym_new_null(ctx); attr = NULL; @@ -530,6 +531,9 @@ dummy_func(void) { this_instr[-1].opcode = _POP_TOP; attr = sym_new_const(ctx, res); } + else { + this_instr->opcode = _LOAD_ATTR_MODULE; + } } if (attr == NULL) { /* No conversion made. We don't know what `attr` is. */ diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index f77a5aa35bdf82..43118b10a3a3c6 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -1134,32 +1134,42 @@ break; } - case _CHECK_ATTR_MODULE: { + case _CHECK_ATTR_MODULE_PUSH_KEYS: { _Py_UopsSymbol *owner; + _Py_UopsSymbol *mod_keys; owner = stack_pointer[-1]; uint32_t dict_version = (uint32_t)this_instr->operand0; (void)dict_version; + mod_keys = sym_new_not_null(ctx); if (sym_is_const(owner)) { PyObject *cnst = sym_get_const(owner); if (PyModule_CheckExact(cnst)) { PyModuleObject *mod = (PyModuleObject *)cnst; PyObject *dict = mod->md_dict; + stack_pointer[0] = mod_keys; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); uint64_t watched_mutations = get_mutations(dict); if (watched_mutations < _Py_MAX_ALLOWED_GLOBALS_MODIFICATIONS) { PyDict_Watch(GLOBALS_WATCHER_ID, dict); _Py_BloomFilter_Add(dependencies, dict); this_instr->opcode = _NOP; } + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); } } + stack_pointer[0] = mod_keys; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); break; } - case _LOAD_ATTR_MODULE: { + case _LOAD_ATTR_MODULE_FROM_KEYS: { _Py_UopsSymbol *owner; _Py_UopsSymbol *attr; _Py_UopsSymbol *null = NULL; - owner = stack_pointer[-1]; + owner = stack_pointer[-2]; uint16_t index = (uint16_t)this_instr->operand0; (void)index; null = sym_new_null(ctx); @@ -1170,25 +1180,28 @@ PyModuleObject *mod = (PyModuleObject *)sym_get_const(owner); assert(PyModule_CheckExact(mod)); PyObject *dict = mod->md_dict; - stack_pointer[-1] = attr; - if (oparg & 1) stack_pointer[0] = null; - stack_pointer += (oparg & 1); + stack_pointer[-2] = attr; + if (oparg & 1) stack_pointer[-1] = null; + stack_pointer += -1 + (oparg & 1); assert(WITHIN_STACK_BOUNDS()); PyObject *res = convert_global_to_const(this_instr, dict); if (res != NULL) { this_instr[-1].opcode = _POP_TOP; attr = sym_new_const(ctx, res); } - stack_pointer += -(oparg & 1); + else { + this_instr->opcode = _LOAD_ATTR_MODULE; + } + stack_pointer += 1 - (oparg & 1); assert(WITHIN_STACK_BOUNDS()); } if (attr == NULL) { /* No conversion made. We don't know what `attr` is. */ attr = sym_new_not_null(ctx); } - stack_pointer[-1] = attr; - if (oparg & 1) stack_pointer[0] = null; - stack_pointer += (oparg & 1); + stack_pointer[-2] = attr; + if (oparg & 1) stack_pointer[-1] = null; + stack_pointer += -1 + (oparg & 1); assert(WITHIN_STACK_BOUNDS()); break; } @@ -2528,6 +2541,18 @@ break; } + case _LOAD_ATTR_MODULE: { + _Py_UopsSymbol *attr; + _Py_UopsSymbol *null = NULL; + attr = sym_new_not_null(ctx); + null = sym_new_null(ctx); + stack_pointer[-1] = attr; + if (oparg & 1) stack_pointer[0] = null; + stack_pointer += (oparg & 1); + assert(WITHIN_STACK_BOUNDS()); + break; + } + case _INTERNAL_INCREMENT_OPT_COUNTER: { stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); diff --git a/Python/specialize.c b/Python/specialize.c index ec2cd7025e5054..df6b4047a9fcb1 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -738,22 +738,16 @@ unspecialize(_Py_CODEUNIT *instr) } static int function_kind(PyCodeObject *code); +#ifndef Py_GIL_DISABLED static bool function_check_args(PyObject *o, int expected_argcount, int opcode); static uint32_t function_get_version(PyObject *o, int opcode); +#endif static uint32_t type_get_version(PyTypeObject *t, int opcode); static int -specialize_module_load_attr( - PyObject *owner, _Py_CODEUNIT *instr, PyObject *name -) { +specialize_module_load_attr_lock_held(PyDictObject *dict, _Py_CODEUNIT *instr, PyObject *name) +{ _PyAttrCache *cache = (_PyAttrCache *)(instr + 1); - PyModuleObject *m = (PyModuleObject *)owner; - assert((Py_TYPE(owner)->tp_flags & Py_TPFLAGS_MANAGED_DICT) == 0); - PyDictObject *dict = (PyDictObject *)m->md_dict; - if (dict == NULL) { - SPECIALIZATION_FAIL(LOAD_ATTR, SPEC_FAIL_NO_DICT); - return -1; - } if (dict->ma_keys->dk_kind != DICT_KEYS_UNICODE) { SPECIALIZATION_FAIL(LOAD_ATTR, SPEC_FAIL_ATTR_NON_STRING); return -1; @@ -773,19 +767,35 @@ specialize_module_load_attr( SPEC_FAIL_OUT_OF_RANGE); return -1; } - uint32_t keys_version = _PyDictKeys_GetVersionForCurrentState( - _PyInterpreterState_GET(), dict->ma_keys); + uint32_t keys_version = _PyDict_GetKeysVersionForCurrentState( + _PyInterpreterState_GET(), dict); if (keys_version == 0) { SPECIALIZATION_FAIL(LOAD_ATTR, SPEC_FAIL_OUT_OF_VERSIONS); return -1; } write_u32(cache->version, keys_version); cache->index = (uint16_t)index; - instr->op.code = LOAD_ATTR_MODULE; + specialize(instr, LOAD_ATTR_MODULE); return 0; } - +static int +specialize_module_load_attr( + PyObject *owner, _Py_CODEUNIT *instr, PyObject *name) +{ + PyModuleObject *m = (PyModuleObject *)owner; + assert((Py_TYPE(owner)->tp_flags & Py_TPFLAGS_MANAGED_DICT) == 0); + PyDictObject *dict = (PyDictObject *)m->md_dict; + if (dict == NULL) { + SPECIALIZATION_FAIL(LOAD_ATTR, SPEC_FAIL_NO_DICT); + return -1; + } + int result; + Py_BEGIN_CRITICAL_SECTION(dict); + result = specialize_module_load_attr_lock_held(dict, instr, name); + Py_END_CRITICAL_SECTION(); + return result; +} /* Attribute specialization */ @@ -994,6 +1004,7 @@ specialize_dict_access( return 1; } +#ifndef Py_GIL_DISABLED static int specialize_attr_loadclassattr(PyObject* owner, _Py_CODEUNIT* instr, PyObject* name, PyObject* descr, DescriptorClassification kind, bool is_method); static int specialize_class_load_attr(PyObject* owner, _Py_CODEUNIT* instr, PyObject* name); @@ -1194,14 +1205,14 @@ specialize_instance_load_attr(PyObject* owner, _Py_CODEUNIT* instr, PyObject* na } return -1; } +#endif // Py_GIL_DISABLED void _Py_Specialize_LoadAttr(_PyStackRef owner_st, _Py_CODEUNIT *instr, PyObject *name) { - _PyAttrCache *cache = (_PyAttrCache *)(instr + 1); PyObject *owner = PyStackRef_AsPyObjectBorrow(owner_st); - assert(ENABLE_SPECIALIZATION); + assert(ENABLE_SPECIALIZATION_FT); assert(_PyOpcode_Caches[LOAD_ATTR] == INLINE_CACHE_ENTRIES_LOAD_ATTR); PyTypeObject *type = Py_TYPE(owner); bool fail; @@ -1216,22 +1227,24 @@ _Py_Specialize_LoadAttr(_PyStackRef owner_st, _Py_CODEUNIT *instr, PyObject *nam fail = specialize_module_load_attr(owner, instr, name); } else if (PyType_Check(owner)) { + #ifdef Py_GIL_DISABLED + SPECIALIZATION_FAIL(LOAD_ATTR, SPEC_FAIL_EXPECTED_ERROR); + fail = true; + #else fail = specialize_class_load_attr(owner, instr, name); + #endif } else { + #ifdef Py_GIL_DISABLED + SPECIALIZATION_FAIL(LOAD_ATTR, SPEC_FAIL_EXPECTED_ERROR); + fail = true; + #else fail = specialize_instance_load_attr(owner, instr, name); + #endif } if (fail) { - STAT_INC(LOAD_ATTR, failure); - assert(!PyErr_Occurred()); - instr->op.code = LOAD_ATTR; - cache->counter = adaptive_counter_backoff(cache->counter); - } - else { - STAT_INC(LOAD_ATTR, success); - assert(!PyErr_Occurred()); - cache->counter = adaptive_counter_cooldown(); + unspecialize(instr); } } @@ -1336,6 +1349,7 @@ _Py_Specialize_StoreAttr(_PyStackRef owner_st, _Py_CODEUNIT *instr, PyObject *na cache->counter = adaptive_counter_cooldown(); } +#ifndef Py_GIL_DISABLED #ifdef Py_STATS static int @@ -1513,6 +1527,9 @@ PyObject *descr, DescriptorClassification kind, bool is_method) return 1; } +#endif // Py_GIL_DISABLED + + static void specialize_load_global_lock_held( PyObject *globals, PyObject *builtins, @@ -1658,6 +1675,7 @@ function_kind(PyCodeObject *code) { return SIMPLE_FUNCTION; } +#ifndef Py_GIL_DISABLED /* Returning false indicates a failure. */ static bool function_check_args(PyObject *o, int expected_argcount, int opcode) @@ -1690,6 +1708,7 @@ function_get_version(PyObject *o, int opcode) } return version; } +#endif // Py_GIL_DISABLED /* Returning 0 indicates a failure. */ static uint32_t From 89cb71e0d741986902025ce83b6e7120259fa284 Mon Sep 17 00:00:00 2001 From: Matt Page Date: Tue, 3 Dec 2024 17:01:40 -0800 Subject: [PATCH 2/8] Add POP_DEAD_INPUTS --- Lib/test/test_generated_cases.py | 74 ++++++++++++++++++++++ Python/bytecodes.c | 3 +- Python/executor_cases.c.h | 2 - Python/generated_cases.c.h | 2 - Tools/cases_generator/generators_common.py | 14 ++++ Tools/cases_generator/stack.py | 4 ++ 6 files changed, 93 insertions(+), 6 deletions(-) diff --git a/Lib/test/test_generated_cases.py b/Lib/test/test_generated_cases.py index 66862ec17cca98..9c65e81dfe4be1 100644 --- a/Lib/test/test_generated_cases.py +++ b/Lib/test/test_generated_cases.py @@ -1639,6 +1639,80 @@ def test_escaping_call_next_to_cmacro(self): """ self.run_cases_test(input, output) + def test_pop_dead_inputs_all_live(self): + input = """ + inst(OP, (a, b --)) { + POP_DEAD_INPUTS(); + HAM(a, b); + INPUTS_DEAD(); + } + """ + output = """ + TARGET(OP) { + frame->instr_ptr = next_instr; + next_instr += 1; + INSTRUCTION_STATS(OP); + _PyStackRef a; + _PyStackRef b; + b = stack_pointer[-1]; + a = stack_pointer[-2]; + HAM(a, b); + stack_pointer += -2; + assert(WITHIN_STACK_BOUNDS()); + DISPATCH(); + } + """ + self.run_cases_test(input, output) + + def test_pop_dead_inputs_some_live(self): + input = """ + inst(OP, (a, b, c --)) { + POP_DEAD_INPUTS(); + HAM(a); + INPUTS_DEAD(); + } + """ + output = """ + TARGET(OP) { + frame->instr_ptr = next_instr; + next_instr += 1; + INSTRUCTION_STATS(OP); + _PyStackRef a; + a = stack_pointer[-3]; + stack_pointer += -2; + assert(WITHIN_STACK_BOUNDS()); + HAM(a); + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + DISPATCH(); + } + """ + self.run_cases_test(input, output) + + def test_pop_dead_inputs_with_output(self): + input = """ + inst(OP, (a, b -- c)) { + POP_DEAD_INPUTS(); + c = SPAM(); + } + """ + output = """ + TARGET(OP) { + frame->instr_ptr = next_instr; + next_instr += 1; + INSTRUCTION_STATS(OP); + _PyStackRef c; + stack_pointer += -2; + assert(WITHIN_STACK_BOUNDS()); + c = SPAM(); + stack_pointer[0] = c; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + DISPATCH(); + } + """ + self.run_cases_test(input, output) + class TestGeneratedAbstractCases(unittest.TestCase): def setUp(self) -> None: diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 1942eb3bb4039d..d523340c5d485b 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2174,8 +2174,7 @@ dummy_func( PyObject *attr_o = FT_ATOMIC_LOAD_PTR_RELAXED(ep->me_value); DEAD(mod_keys); // Clear mod_keys from stack in case we need to deopt - SAVE_STACK(); - RELOAD_STACK(); + POP_DEAD_INPUTS(); DEOPT_IF(attr_o == NULL); #ifdef Py_GIL_DISABLED int increfed = _Py_TryIncrefCompareStackRef(&ep->me_value, attr_o, &attr); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 8f762c6c1c1d49..daf0f37c0bf967 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -2681,8 +2681,6 @@ // Clear mod_keys from stack in case we need to deopt stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); - _PyFrame_SetStackPointer(frame, stack_pointer); - stack_pointer = _PyFrame_GetStackPointer(frame); if (attr_o == NULL) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 6a2ce425f3c4c6..74a488a271bbbb 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -5581,8 +5581,6 @@ PyDictUnicodeEntry *ep = DK_UNICODE_ENTRIES(mod_keys) + index; PyObject *attr_o = FT_ATOMIC_LOAD_PTR_RELAXED(ep->me_value); // Clear mod_keys from stack in case we need to deopt - _PyFrame_SetStackPointer(frame, stack_pointer); - stack_pointer = _PyFrame_GetStackPointer(frame); DEOPT_IF(attr_o == NULL, LOAD_ATTR); #ifdef Py_GIL_DISABLED int increfed = _Py_TryIncrefCompareStackRef(&ep->me_value, attr_o, &attr); diff --git a/Tools/cases_generator/generators_common.py b/Tools/cases_generator/generators_common.py index dad2557e97a948..41d3152800b675 100644 --- a/Tools/cases_generator/generators_common.py +++ b/Tools/cases_generator/generators_common.py @@ -120,6 +120,7 @@ def __init__(self, out: CWriter): "PyStackRef_AsPyObjectSteal": self.stackref_steal, "DISPATCH": self.dispatch, "INSTRUCTION_SIZE": self.instruction_size, + "POP_DEAD_INPUTS": self.pop_dead_inputs, } self.out = out @@ -348,6 +349,19 @@ def save_stack( self.emit_save(storage) return True + def pop_dead_inputs( + self, + tkn: Token, + tkn_iter: TokenIterator, + uop: Uop, + storage: Storage, + inst: Instruction | None, + ) -> None: + next(tkn_iter) + next(tkn_iter) + next(tkn_iter) + storage.pop_dead_inputs(self.out) + def emit_reload(self, storage: Storage) -> None: storage.reload(self.out) self._print_storage(storage) diff --git a/Tools/cases_generator/stack.py b/Tools/cases_generator/stack.py index 286f47d0cfb11b..9471fe0e56f7d8 100644 --- a/Tools/cases_generator/stack.py +++ b/Tools/cases_generator/stack.py @@ -512,6 +512,10 @@ def flush(self, out: CWriter, cast_type: str = "uintptr_t", extract_bits: bool = self._push_defined_outputs() self.stack.flush(out, cast_type, extract_bits) + def pop_dead_inputs(self, out: CWriter, cast_type: str = "uintptr_t", extract_bits: bool = True) -> None: + self.clear_dead_inputs() + self.stack.flush(out, cast_type, extract_bits) + def save(self, out: CWriter) -> None: assert self.spilled >= 0 if self.spilled == 0: From 13a05a0949fee9a58e53df1526c3e85701baecc9 Mon Sep 17 00:00:00 2001 From: Matt Page Date: Wed, 4 Dec 2024 16:34:18 -0800 Subject: [PATCH 3/8] Fix comment --- Python/optimizer_bytecodes.c | 2 +- Python/optimizer_cases.c.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index be52d3862d7c40..0b8aff02367e31 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -521,7 +521,7 @@ dummy_func(void) { null = sym_new_null(ctx); attr = NULL; if (this_instr[-1].opcode == _NOP) { - // Preceding _CHECK_ATTR_MODULE was removed: mod is const and dict is watched. + // Preceding _CHECK_ATTR_MODULE_PUSH_KEYS was removed: mod is const and dict is watched. assert(sym_is_const(owner)); PyModuleObject *mod = (PyModuleObject *)sym_get_const(owner); assert(PyModule_CheckExact(mod)); diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 43118b10a3a3c6..f4fbe8c8aa0480 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -1175,7 +1175,7 @@ null = sym_new_null(ctx); attr = NULL; if (this_instr[-1].opcode == _NOP) { - // Preceding _CHECK_ATTR_MODULE was removed: mod is const and dict is watched. + // Preceding _CHECK_ATTR_MODULE_PUSH_KEYS was removed: mod is const and dict is watched. assert(sym_is_const(owner)); PyModuleObject *mod = (PyModuleObject *)sym_get_const(owner); assert(PyModule_CheckExact(mod)); From eb8ce39bc74c6ed7af32e0bf06fe2b4551632431 Mon Sep 17 00:00:00 2001 From: Matt Page Date: Thu, 5 Dec 2024 21:53:47 -0800 Subject: [PATCH 4/8] Refactor --- Python/specialize.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/Python/specialize.c b/Python/specialize.c index df6b4047a9fcb1..8b03fee6d6494e 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -975,7 +975,7 @@ specialize_dict_access( } write_u32(cache->version, type->tp_version_tag); cache->index = (uint16_t)offset; - instr->op.code = values_op; + specialize(instr, values_op); } else { PyDictObject *dict = _PyObject_GetManagedDict(owner); @@ -999,7 +999,7 @@ specialize_dict_access( } cache->index = (uint16_t)index; write_u32(cache->version, type->tp_version_tag); - instr->op.code = hint_op; + specialize(instr, hint_op); } return 1; } @@ -1101,7 +1101,7 @@ specialize_instance_load_attr(PyObject* owner, _Py_CODEUNIT* instr, PyObject* na write_u32(lm_cache->type_version, type->tp_version_tag); /* borrowed */ write_obj(lm_cache->descr, fget); - instr->op.code = LOAD_ATTR_PROPERTY; + specialize(instr, LOAD_ATTR_PROPERTY); return 0; } case OBJECT_SLOT: @@ -1125,7 +1125,7 @@ specialize_instance_load_attr(PyObject* owner, _Py_CODEUNIT* instr, PyObject* na assert(offset > 0); cache->index = (uint16_t)offset; write_u32(cache->version, type->tp_version_tag); - instr->op.code = LOAD_ATTR_SLOT; + specialize(instr, LOAD_ATTR_SLOT); return 0; } case DUNDER_CLASS: @@ -1134,7 +1134,7 @@ specialize_instance_load_attr(PyObject* owner, _Py_CODEUNIT* instr, PyObject* na assert(offset == (uint16_t)offset); cache->index = (uint16_t)offset; write_u32(cache->version, type->tp_version_tag); - instr->op.code = LOAD_ATTR_SLOT; + specialize(instr, LOAD_ATTR_SLOT); return 0; } case OTHER_SLOT: @@ -1170,7 +1170,7 @@ specialize_instance_load_attr(PyObject* owner, _Py_CODEUNIT* instr, PyObject* na /* borrowed */ write_obj(lm_cache->descr, descr); write_u32(lm_cache->type_version, type->tp_version_tag); - instr->op.code = LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN; + specialize(instr, LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN); return 0; } case BUILTIN_CLASSMETHOD: @@ -1194,6 +1194,7 @@ specialize_instance_load_attr(PyObject* owner, _Py_CODEUNIT* instr, PyObject* na if (shadow) { goto try_instance; } + set_counter((_Py_BackoffCounter*)instr + 1, adaptive_counter_cooldown()); return 0; } Py_UNREACHABLE(); @@ -1433,10 +1434,10 @@ specialize_class_load_attr(PyObject *owner, _Py_CODEUNIT *instr, write_obj(cache->descr, descr); if (metaclass_check) { write_u32(cache->keys_version, Py_TYPE(cls)->tp_version_tag); - instr->op.code = LOAD_ATTR_CLASS_WITH_METACLASS_CHECK; + specialize(instr, LOAD_ATTR_CLASS_WITH_METACLASS_CHECK); } else { - instr->op.code = LOAD_ATTR_CLASS; + specialize(instr, LOAD_ATTR_CLASS); } return 0; #ifdef Py_STATS @@ -1472,7 +1473,7 @@ PyObject *descr, DescriptorClassification kind, bool is_method) return 0; } write_u32(cache->keys_version, keys_version); - instr->op.code = is_method ? LOAD_ATTR_METHOD_WITH_VALUES : LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES; + specialize(instr, is_method ? LOAD_ATTR_METHOD_WITH_VALUES : LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES); } else { Py_ssize_t dictoffset; @@ -1487,7 +1488,7 @@ PyObject *descr, DescriptorClassification kind, bool is_method) } } if (dictoffset == 0) { - instr->op.code = is_method ? LOAD_ATTR_METHOD_NO_DICT : LOAD_ATTR_NONDESCRIPTOR_NO_DICT; + specialize(instr, is_method ? LOAD_ATTR_METHOD_NO_DICT : LOAD_ATTR_NONDESCRIPTOR_NO_DICT); } else if (is_method) { PyObject *dict = *(PyObject **) ((char *)owner + dictoffset); @@ -1501,7 +1502,7 @@ PyObject *descr, DescriptorClassification kind, bool is_method) dictoffset -= MANAGED_DICT_OFFSET; assert(((uint16_t)dictoffset) == dictoffset); cache->dict_offset = (uint16_t)dictoffset; - instr->op.code = LOAD_ATTR_METHOD_LAZY_DICT; + specialize(instr, LOAD_ATTR_METHOD_LAZY_DICT); } else { SPECIALIZATION_FAIL(LOAD_ATTR, SPEC_FAIL_ATTR_CLASS_ATTR_SIMPLE); From 7032a5c842bd5d7e4f309f5d00ff15cfb5cb0809 Mon Sep 17 00:00:00 2001 From: Matt Page Date: Fri, 6 Dec 2024 14:41:07 -0800 Subject: [PATCH 5/8] Enable test --- Lib/test/test_opcache.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_opcache.py b/Lib/test/test_opcache.py index 50b5f365165921..0a7557adc4763b 100644 --- a/Lib/test/test_opcache.py +++ b/Lib/test/test_opcache.py @@ -892,7 +892,7 @@ def write(items): opname = "LOAD_ATTR_METHOD_WITH_VALUES" self.assert_races_do_not_crash(opname, get_items, read, write) - @requires_specialization + @requires_specialization_ft def test_load_attr_module(self): def get_items(): items = [] From 38140d960885710f8b285e0eb45ed8b7ee1db8a2 Mon Sep 17 00:00:00 2001 From: Matt Page Date: Fri, 6 Dec 2024 15:05:43 -0800 Subject: [PATCH 6/8] Appease mypy --- Tools/cases_generator/generators_common.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Tools/cases_generator/generators_common.py b/Tools/cases_generator/generators_common.py index 41d3152800b675..d17617cab0266b 100644 --- a/Tools/cases_generator/generators_common.py +++ b/Tools/cases_generator/generators_common.py @@ -356,11 +356,12 @@ def pop_dead_inputs( uop: Uop, storage: Storage, inst: Instruction | None, - ) -> None: + ) -> bool: next(tkn_iter) next(tkn_iter) next(tkn_iter) storage.pop_dead_inputs(self.out) + return True def emit_reload(self, storage: Storage) -> None: storage.reload(self.out) From c6ddd69d19d52c65eba3e86d5304699f9c7e35ca Mon Sep 17 00:00:00 2001 From: Matt Page Date: Sun, 8 Dec 2024 21:14:52 -0800 Subject: [PATCH 7/8] Fix test_c_subclass_of_heap_ctype_with_del_modifying_dunder_class_only_decrefs_once Refactor the test so that the specialized and unspecialized implementation of loading the argument to sys.getrefcount use the same refcounting approach. Previously, the argument would be evaluated by loading an attribute from a module. This specializes to LOAD_ATTR_MODULE. In free-threaded builds the unspecialized form of LOAD_ATTR always creates a new reference for its result, while the specialized form does not create a reference if the result uses deferred refcounting. This causes a difference in the result returned from sys.getrefcount, depending on whether or not the bytecode has been specialized (e.g. on runs > 1 in refleak tests). The refactored version uses LOAD_GLOBAL, whose specialized and unspecialized forms both do not create references when the result uses deferred refcounting. Also refactor the test to handle the difference in the result returned from sys.getrefcount in default builds (includes the temporary reference on the operand stack) and free-threaded builds (no temporary reference is created for deferred values). --- Lib/test/test_capi/test_misc.py | 47 ++++++++++++++++++++++++++------- 1 file changed, 38 insertions(+), 9 deletions(-) diff --git a/Lib/test/test_capi/test_misc.py b/Lib/test/test_capi/test_misc.py index 8e0271919cc8a5..e6511926eb6b4c 100644 --- a/Lib/test/test_capi/test_misc.py +++ b/Lib/test/test_capi/test_misc.py @@ -48,6 +48,8 @@ # Skip this test if the _testcapi module isn't available. _testcapi = import_helper.import_module('_testcapi') +from _testcapi import HeapCTypeSubclass, HeapCTypeSubclassWithFinalizer + import _testlimitedcapi import _testinternalcapi @@ -646,9 +648,9 @@ def test_c_subclass_of_heap_ctype_with_tpdealloc_decrefs_once(self): self.assertEqual(type_refcnt - 1, sys.getrefcount(_testcapi.HeapCTypeSubclass)) def test_c_subclass_of_heap_ctype_with_del_modifying_dunder_class_only_decrefs_once(self): - subclass_instance = _testcapi.HeapCTypeSubclassWithFinalizer() - type_refcnt = sys.getrefcount(_testcapi.HeapCTypeSubclassWithFinalizer) - new_type_refcnt = sys.getrefcount(_testcapi.HeapCTypeSubclass) + subclass_instance = HeapCTypeSubclassWithFinalizer() + type_refcnt = sys.getrefcount(HeapCTypeSubclassWithFinalizer) + new_type_refcnt = sys.getrefcount(HeapCTypeSubclass) # Test that subclass instance was fully created self.assertEqual(subclass_instance.value, 10) @@ -658,19 +660,46 @@ def test_c_subclass_of_heap_ctype_with_del_modifying_dunder_class_only_decrefs_o del subclass_instance # Test that setting __class__ modified the reference counts of the types + # + # This is highly sensitive to implementation details and may break in the future. + # + # We expect the refcount on the old type, HeapCTypeSubclassWithFinalizer, to + # remain the same: the finalizer gets a strong reference (+1) when it gets the + # type from the module and setting __class__ decrements the refcount (-1). + # + # We expect the refcount on the new type, HeapCTypeSubclass, to increase by 2: + # the finalizer get a strong reference (+1) when it gets the type from the + # module and setting __class__ increments the refcount (+1). + expected_type_refcnt = type_refcnt + expected_new_type_refcnt = new_type_refcnt + 2 + + if not Py_GIL_DISABLED: + # In default builds the result returned from sys.getrefcount + # includes a temporary reference that is created by the interpreter + # when it pushes its argument on the operand stack. This temporary + # reference is not included in the result returned by Py_REFCNT, which + # is used in the finalizer. + # + # In free-threaded builds the result returned from sys.getrefcount + # does not include the temporary reference. Types use deferred + # refcounting and the interpreter will not create a new reference + # for deferred values on the operand stack. + expected_type_refcnt -= 1 + expected_new_type_refcnt -= 1 + if support.Py_DEBUG: # gh-89373: In debug mode, _Py_Dealloc() keeps a strong reference # to the type while calling tp_dealloc() - self.assertEqual(type_refcnt, _testcapi.HeapCTypeSubclassWithFinalizer.refcnt_in_del) - else: - self.assertEqual(type_refcnt - 1, _testcapi.HeapCTypeSubclassWithFinalizer.refcnt_in_del) - self.assertEqual(new_type_refcnt + 1, _testcapi.HeapCTypeSubclass.refcnt_in_del) + expected_type_refcnt += 1 + + self.assertEqual(expected_type_refcnt, HeapCTypeSubclassWithFinalizer.refcnt_in_del) + self.assertEqual(expected_new_type_refcnt, HeapCTypeSubclass.refcnt_in_del) # Test that the original type already has decreased its refcnt - self.assertEqual(type_refcnt - 1, sys.getrefcount(_testcapi.HeapCTypeSubclassWithFinalizer)) + self.assertEqual(type_refcnt - 1, sys.getrefcount(HeapCTypeSubclassWithFinalizer)) # Test that subtype_dealloc decref the newly assigned __class__ only once - self.assertEqual(new_type_refcnt, sys.getrefcount(_testcapi.HeapCTypeSubclass)) + self.assertEqual(new_type_refcnt, sys.getrefcount(HeapCTypeSubclass)) def test_heaptype_with_setattro(self): obj = _testcapi.HeapCTypeSetattr() From a483958df88215f9163e903a655c4bb940605a1b Mon Sep 17 00:00:00 2001 From: Matt Page Date: Thu, 12 Dec 2024 09:26:48 -0800 Subject: [PATCH 8/8] Fix formatting --- Python/bytecodes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 768b29b950df24..9d6b781c611c78 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2167,7 +2167,7 @@ dummy_func( mod_keys = keys; } - op(_LOAD_ATTR_MODULE_FROM_KEYS, (index/1, owner, mod_keys: PyDictKeysObject* -- attr, null if (oparg & 1))) { + op(_LOAD_ATTR_MODULE_FROM_KEYS, (index/1, owner, mod_keys: PyDictKeysObject * -- attr, null if (oparg & 1))) { assert(mod_keys->dk_kind == DICT_KEYS_UNICODE); assert(index < FT_ATOMIC_LOAD_SSIZE_RELAXED(mod_keys->dk_nentries)); PyDictUnicodeEntry *ep = DK_UNICODE_ENTRIES(mod_keys) + index;