From 18c78b1e7dbd8b2bc3987b0a6886c064ecc16d02 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Mon, 16 Jun 2025 14:42:12 +0100 Subject: [PATCH 1/3] Support limited scalar replacement for replicated uops in the code generator. Use it to support efficient specializations of COPY and SWAP in the JIT. --- Include/internal/pycore_uop_ids.h | 347 +++++++++--------- Include/internal/pycore_uop_metadata.h | 37 +- Python/bytecodes.c | 6 +- Python/executor_cases.c.h | 61 ++- Python/generated_cases.c.h | 2 - Python/optimizer.c | 4 +- Tools/cases_generator/analyzer.py | 38 +- Tools/cases_generator/parsing.py | 8 +- .../cases_generator/uop_metadata_generator.py | 8 +- 9 files changed, 313 insertions(+), 198 deletions(-) diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index 2b845527cf2ed5..8211c5d056535e 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -86,86 +86,89 @@ extern "C" { #define _CONTAINS_OP_DICT 362 #define _CONTAINS_OP_SET 363 #define _CONVERT_VALUE CONVERT_VALUE -#define _COPY COPY +#define _COPY 364 +#define _COPY_1 365 +#define _COPY_2 366 +#define _COPY_3 367 #define _COPY_FREE_VARS COPY_FREE_VARS -#define _CREATE_INIT_FRAME 364 +#define _CREATE_INIT_FRAME 368 #define _DELETE_ATTR DELETE_ATTR #define _DELETE_DEREF DELETE_DEREF #define _DELETE_FAST DELETE_FAST #define _DELETE_GLOBAL DELETE_GLOBAL #define _DELETE_NAME DELETE_NAME #define _DELETE_SUBSCR DELETE_SUBSCR -#define _DEOPT 365 +#define _DEOPT 369 #define _DICT_MERGE DICT_MERGE #define _DICT_UPDATE DICT_UPDATE -#define _DO_CALL 366 -#define _DO_CALL_FUNCTION_EX 367 -#define _DO_CALL_KW 368 +#define _DO_CALL 370 +#define _DO_CALL_FUNCTION_EX 371 +#define _DO_CALL_KW 372 #define _END_FOR END_FOR #define _END_SEND END_SEND -#define _ERROR_POP_N 369 +#define _ERROR_POP_N 373 #define _EXIT_INIT_CHECK EXIT_INIT_CHECK -#define _EXPAND_METHOD 370 -#define _EXPAND_METHOD_KW 371 -#define _FATAL_ERROR 372 +#define _EXPAND_METHOD 374 +#define _EXPAND_METHOD_KW 375 +#define _FATAL_ERROR 376 #define _FORMAT_SIMPLE FORMAT_SIMPLE #define _FORMAT_WITH_SPEC FORMAT_WITH_SPEC -#define _FOR_ITER 373 -#define _FOR_ITER_GEN_FRAME 374 -#define _FOR_ITER_TIER_TWO 375 +#define _FOR_ITER 377 +#define _FOR_ITER_GEN_FRAME 378 +#define _FOR_ITER_TIER_TWO 379 #define _GET_AITER GET_AITER #define _GET_ANEXT GET_ANEXT #define _GET_AWAITABLE GET_AWAITABLE #define _GET_ITER GET_ITER #define _GET_LEN GET_LEN #define _GET_YIELD_FROM_ITER GET_YIELD_FROM_ITER -#define _GUARD_BINARY_OP_EXTEND 376 -#define _GUARD_CALLABLE_ISINSTANCE 377 -#define _GUARD_CALLABLE_LEN 378 -#define _GUARD_CALLABLE_LIST_APPEND 379 -#define _GUARD_CALLABLE_STR_1 380 -#define _GUARD_CALLABLE_TUPLE_1 381 -#define _GUARD_CALLABLE_TYPE_1 382 -#define _GUARD_DORV_NO_DICT 383 -#define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 384 -#define _GUARD_GLOBALS_VERSION 385 -#define _GUARD_IS_FALSE_POP 386 -#define _GUARD_IS_NONE_POP 387 -#define _GUARD_IS_NOT_NONE_POP 388 -#define _GUARD_IS_TRUE_POP 389 -#define _GUARD_KEYS_VERSION 390 -#define _GUARD_NOS_DICT 391 -#define _GUARD_NOS_FLOAT 392 -#define _GUARD_NOS_INT 393 -#define _GUARD_NOS_LIST 394 -#define _GUARD_NOS_NOT_NULL 395 -#define _GUARD_NOS_NULL 396 -#define _GUARD_NOS_TUPLE 397 -#define _GUARD_NOS_UNICODE 398 -#define _GUARD_NOT_EXHAUSTED_LIST 399 -#define _GUARD_NOT_EXHAUSTED_RANGE 400 -#define _GUARD_NOT_EXHAUSTED_TUPLE 401 -#define _GUARD_THIRD_NULL 402 -#define _GUARD_TOS_ANY_SET 403 -#define _GUARD_TOS_DICT 404 -#define _GUARD_TOS_FLOAT 405 -#define _GUARD_TOS_INT 406 -#define _GUARD_TOS_LIST 407 -#define _GUARD_TOS_SLICE 408 -#define _GUARD_TOS_TUPLE 409 -#define _GUARD_TOS_UNICODE 410 -#define _GUARD_TYPE_VERSION 411 -#define _GUARD_TYPE_VERSION_AND_LOCK 412 +#define _GUARD_BINARY_OP_EXTEND 380 +#define _GUARD_CALLABLE_ISINSTANCE 381 +#define _GUARD_CALLABLE_LEN 382 +#define _GUARD_CALLABLE_LIST_APPEND 383 +#define _GUARD_CALLABLE_STR_1 384 +#define _GUARD_CALLABLE_TUPLE_1 385 +#define _GUARD_CALLABLE_TYPE_1 386 +#define _GUARD_DORV_NO_DICT 387 +#define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 388 +#define _GUARD_GLOBALS_VERSION 389 +#define _GUARD_IS_FALSE_POP 390 +#define _GUARD_IS_NONE_POP 391 +#define _GUARD_IS_NOT_NONE_POP 392 +#define _GUARD_IS_TRUE_POP 393 +#define _GUARD_KEYS_VERSION 394 +#define _GUARD_NOS_DICT 395 +#define _GUARD_NOS_FLOAT 396 +#define _GUARD_NOS_INT 397 +#define _GUARD_NOS_LIST 398 +#define _GUARD_NOS_NOT_NULL 399 +#define _GUARD_NOS_NULL 400 +#define _GUARD_NOS_TUPLE 401 +#define _GUARD_NOS_UNICODE 402 +#define _GUARD_NOT_EXHAUSTED_LIST 403 +#define _GUARD_NOT_EXHAUSTED_RANGE 404 +#define _GUARD_NOT_EXHAUSTED_TUPLE 405 +#define _GUARD_THIRD_NULL 406 +#define _GUARD_TOS_ANY_SET 407 +#define _GUARD_TOS_DICT 408 +#define _GUARD_TOS_FLOAT 409 +#define _GUARD_TOS_INT 410 +#define _GUARD_TOS_LIST 411 +#define _GUARD_TOS_SLICE 412 +#define _GUARD_TOS_TUPLE 413 +#define _GUARD_TOS_UNICODE 414 +#define _GUARD_TYPE_VERSION 415 +#define _GUARD_TYPE_VERSION_AND_LOCK 416 #define _IMPORT_FROM IMPORT_FROM #define _IMPORT_NAME IMPORT_NAME -#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 413 -#define _INIT_CALL_PY_EXACT_ARGS 414 -#define _INIT_CALL_PY_EXACT_ARGS_0 415 -#define _INIT_CALL_PY_EXACT_ARGS_1 416 -#define _INIT_CALL_PY_EXACT_ARGS_2 417 -#define _INIT_CALL_PY_EXACT_ARGS_3 418 -#define _INIT_CALL_PY_EXACT_ARGS_4 419 -#define _INSERT_NULL 420 +#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 417 +#define _INIT_CALL_PY_EXACT_ARGS 418 +#define _INIT_CALL_PY_EXACT_ARGS_0 419 +#define _INIT_CALL_PY_EXACT_ARGS_1 420 +#define _INIT_CALL_PY_EXACT_ARGS_2 421 +#define _INIT_CALL_PY_EXACT_ARGS_3 422 +#define _INIT_CALL_PY_EXACT_ARGS_4 423 +#define _INSERT_NULL 424 #define _INSTRUMENTED_FOR_ITER INSTRUMENTED_FOR_ITER #define _INSTRUMENTED_INSTRUCTION INSTRUMENTED_INSTRUCTION #define _INSTRUMENTED_JUMP_FORWARD INSTRUMENTED_JUMP_FORWARD @@ -175,171 +178,173 @@ extern "C" { #define _INSTRUMENTED_POP_JUMP_IF_NONE INSTRUMENTED_POP_JUMP_IF_NONE #define _INSTRUMENTED_POP_JUMP_IF_NOT_NONE INSTRUMENTED_POP_JUMP_IF_NOT_NONE #define _INSTRUMENTED_POP_JUMP_IF_TRUE INSTRUMENTED_POP_JUMP_IF_TRUE -#define _IS_NONE 421 +#define _IS_NONE 425 #define _IS_OP IS_OP -#define _ITER_CHECK_LIST 422 -#define _ITER_CHECK_RANGE 423 -#define _ITER_CHECK_TUPLE 424 -#define _ITER_JUMP_LIST 425 -#define _ITER_JUMP_RANGE 426 -#define _ITER_JUMP_TUPLE 427 -#define _ITER_NEXT_LIST 428 -#define _ITER_NEXT_LIST_TIER_TWO 429 -#define _ITER_NEXT_RANGE 430 -#define _ITER_NEXT_TUPLE 431 -#define _JUMP_TO_TOP 432 +#define _ITER_CHECK_LIST 426 +#define _ITER_CHECK_RANGE 427 +#define _ITER_CHECK_TUPLE 428 +#define _ITER_JUMP_LIST 429 +#define _ITER_JUMP_RANGE 430 +#define _ITER_JUMP_TUPLE 431 +#define _ITER_NEXT_LIST 432 +#define _ITER_NEXT_LIST_TIER_TWO 433 +#define _ITER_NEXT_RANGE 434 +#define _ITER_NEXT_TUPLE 435 +#define _JUMP_TO_TOP 436 #define _LIST_APPEND LIST_APPEND #define _LIST_EXTEND LIST_EXTEND -#define _LOAD_ATTR 433 -#define _LOAD_ATTR_CLASS 434 +#define _LOAD_ATTR 437 +#define _LOAD_ATTR_CLASS 438 #define _LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN -#define _LOAD_ATTR_INSTANCE_VALUE 435 -#define _LOAD_ATTR_METHOD_LAZY_DICT 436 -#define _LOAD_ATTR_METHOD_NO_DICT 437 -#define _LOAD_ATTR_METHOD_WITH_VALUES 438 -#define _LOAD_ATTR_MODULE 439 -#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 440 -#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 441 -#define _LOAD_ATTR_PROPERTY_FRAME 442 -#define _LOAD_ATTR_SLOT 443 -#define _LOAD_ATTR_WITH_HINT 444 +#define _LOAD_ATTR_INSTANCE_VALUE 439 +#define _LOAD_ATTR_METHOD_LAZY_DICT 440 +#define _LOAD_ATTR_METHOD_NO_DICT 441 +#define _LOAD_ATTR_METHOD_WITH_VALUES 442 +#define _LOAD_ATTR_MODULE 443 +#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 444 +#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 445 +#define _LOAD_ATTR_PROPERTY_FRAME 446 +#define _LOAD_ATTR_SLOT 447 +#define _LOAD_ATTR_WITH_HINT 448 #define _LOAD_BUILD_CLASS LOAD_BUILD_CLASS -#define _LOAD_BYTECODE 445 +#define _LOAD_BYTECODE 449 #define _LOAD_COMMON_CONSTANT LOAD_COMMON_CONSTANT #define _LOAD_CONST LOAD_CONST -#define _LOAD_CONST_INLINE 446 -#define _LOAD_CONST_INLINE_BORROW 447 -#define _LOAD_CONST_UNDER_INLINE 448 -#define _LOAD_CONST_UNDER_INLINE_BORROW 449 +#define _LOAD_CONST_INLINE 450 +#define _LOAD_CONST_INLINE_BORROW 451 +#define _LOAD_CONST_UNDER_INLINE 452 +#define _LOAD_CONST_UNDER_INLINE_BORROW 453 #define _LOAD_DEREF LOAD_DEREF -#define _LOAD_FAST 450 -#define _LOAD_FAST_0 451 -#define _LOAD_FAST_1 452 -#define _LOAD_FAST_2 453 -#define _LOAD_FAST_3 454 -#define _LOAD_FAST_4 455 -#define _LOAD_FAST_5 456 -#define _LOAD_FAST_6 457 -#define _LOAD_FAST_7 458 +#define _LOAD_FAST 454 +#define _LOAD_FAST_0 455 +#define _LOAD_FAST_1 456 +#define _LOAD_FAST_2 457 +#define _LOAD_FAST_3 458 +#define _LOAD_FAST_4 459 +#define _LOAD_FAST_5 460 +#define _LOAD_FAST_6 461 +#define _LOAD_FAST_7 462 #define _LOAD_FAST_AND_CLEAR LOAD_FAST_AND_CLEAR -#define _LOAD_FAST_BORROW 459 -#define _LOAD_FAST_BORROW_0 460 -#define _LOAD_FAST_BORROW_1 461 -#define _LOAD_FAST_BORROW_2 462 -#define _LOAD_FAST_BORROW_3 463 -#define _LOAD_FAST_BORROW_4 464 -#define _LOAD_FAST_BORROW_5 465 -#define _LOAD_FAST_BORROW_6 466 -#define _LOAD_FAST_BORROW_7 467 +#define _LOAD_FAST_BORROW 463 +#define _LOAD_FAST_BORROW_0 464 +#define _LOAD_FAST_BORROW_1 465 +#define _LOAD_FAST_BORROW_2 466 +#define _LOAD_FAST_BORROW_3 467 +#define _LOAD_FAST_BORROW_4 468 +#define _LOAD_FAST_BORROW_5 469 +#define _LOAD_FAST_BORROW_6 470 +#define _LOAD_FAST_BORROW_7 471 #define _LOAD_FAST_BORROW_LOAD_FAST_BORROW LOAD_FAST_BORROW_LOAD_FAST_BORROW #define _LOAD_FAST_CHECK LOAD_FAST_CHECK #define _LOAD_FAST_LOAD_FAST LOAD_FAST_LOAD_FAST #define _LOAD_FROM_DICT_OR_DEREF LOAD_FROM_DICT_OR_DEREF #define _LOAD_FROM_DICT_OR_GLOBALS LOAD_FROM_DICT_OR_GLOBALS -#define _LOAD_GLOBAL 468 -#define _LOAD_GLOBAL_BUILTINS 469 -#define _LOAD_GLOBAL_MODULE 470 +#define _LOAD_GLOBAL 472 +#define _LOAD_GLOBAL_BUILTINS 473 +#define _LOAD_GLOBAL_MODULE 474 #define _LOAD_LOCALS LOAD_LOCALS #define _LOAD_NAME LOAD_NAME -#define _LOAD_SMALL_INT 471 -#define _LOAD_SMALL_INT_0 472 -#define _LOAD_SMALL_INT_1 473 -#define _LOAD_SMALL_INT_2 474 -#define _LOAD_SMALL_INT_3 475 -#define _LOAD_SPECIAL 476 +#define _LOAD_SMALL_INT 475 +#define _LOAD_SMALL_INT_0 476 +#define _LOAD_SMALL_INT_1 477 +#define _LOAD_SMALL_INT_2 478 +#define _LOAD_SMALL_INT_3 479 +#define _LOAD_SPECIAL 480 #define _LOAD_SUPER_ATTR_ATTR LOAD_SUPER_ATTR_ATTR #define _LOAD_SUPER_ATTR_METHOD LOAD_SUPER_ATTR_METHOD -#define _MAKE_CALLARGS_A_TUPLE 477 +#define _MAKE_CALLARGS_A_TUPLE 481 #define _MAKE_CELL MAKE_CELL #define _MAKE_FUNCTION MAKE_FUNCTION -#define _MAKE_WARM 478 +#define _MAKE_WARM 482 #define _MAP_ADD MAP_ADD #define _MATCH_CLASS MATCH_CLASS #define _MATCH_KEYS MATCH_KEYS #define _MATCH_MAPPING MATCH_MAPPING #define _MATCH_SEQUENCE MATCH_SEQUENCE -#define _MAYBE_EXPAND_METHOD 479 -#define _MAYBE_EXPAND_METHOD_KW 480 -#define _MONITOR_CALL 481 -#define _MONITOR_CALL_KW 482 -#define _MONITOR_JUMP_BACKWARD 483 -#define _MONITOR_RESUME 484 +#define _MAYBE_EXPAND_METHOD 483 +#define _MAYBE_EXPAND_METHOD_KW 484 +#define _MONITOR_CALL 485 +#define _MONITOR_CALL_KW 486 +#define _MONITOR_JUMP_BACKWARD 487 +#define _MONITOR_RESUME 488 #define _NOP NOP -#define _POP_CALL 485 -#define _POP_CALL_LOAD_CONST_INLINE_BORROW 486 -#define _POP_CALL_ONE 487 -#define _POP_CALL_ONE_LOAD_CONST_INLINE_BORROW 488 -#define _POP_CALL_TWO 489 -#define _POP_CALL_TWO_LOAD_CONST_INLINE_BORROW 490 +#define _POP_CALL 489 +#define _POP_CALL_LOAD_CONST_INLINE_BORROW 490 +#define _POP_CALL_ONE 491 +#define _POP_CALL_ONE_LOAD_CONST_INLINE_BORROW 492 +#define _POP_CALL_TWO 493 +#define _POP_CALL_TWO_LOAD_CONST_INLINE_BORROW 494 #define _POP_EXCEPT POP_EXCEPT #define _POP_ITER POP_ITER -#define _POP_JUMP_IF_FALSE 491 -#define _POP_JUMP_IF_TRUE 492 +#define _POP_JUMP_IF_FALSE 495 +#define _POP_JUMP_IF_TRUE 496 #define _POP_TOP POP_TOP -#define _POP_TOP_LOAD_CONST_INLINE 493 -#define _POP_TOP_LOAD_CONST_INLINE_BORROW 494 -#define _POP_TWO 495 -#define _POP_TWO_LOAD_CONST_INLINE_BORROW 496 +#define _POP_TOP_LOAD_CONST_INLINE 497 +#define _POP_TOP_LOAD_CONST_INLINE_BORROW 498 +#define _POP_TWO 499 +#define _POP_TWO_LOAD_CONST_INLINE_BORROW 500 #define _PUSH_EXC_INFO PUSH_EXC_INFO -#define _PUSH_FRAME 497 +#define _PUSH_FRAME 501 #define _PUSH_NULL PUSH_NULL -#define _PUSH_NULL_CONDITIONAL 498 -#define _PY_FRAME_GENERAL 499 -#define _PY_FRAME_KW 500 -#define _QUICKEN_RESUME 501 -#define _REPLACE_WITH_TRUE 502 +#define _PUSH_NULL_CONDITIONAL 502 +#define _PY_FRAME_GENERAL 503 +#define _PY_FRAME_KW 504 +#define _QUICKEN_RESUME 505 +#define _REPLACE_WITH_TRUE 506 #define _RESUME_CHECK RESUME_CHECK #define _RETURN_GENERATOR RETURN_GENERATOR #define _RETURN_VALUE RETURN_VALUE -#define _SAVE_RETURN_OFFSET 503 -#define _SEND 504 -#define _SEND_GEN_FRAME 505 +#define _SAVE_RETURN_OFFSET 507 +#define _SEND 508 +#define _SEND_GEN_FRAME 509 #define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS #define _SET_ADD SET_ADD #define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE #define _SET_UPDATE SET_UPDATE -#define _START_EXECUTOR 506 -#define _STORE_ATTR 507 -#define _STORE_ATTR_INSTANCE_VALUE 508 -#define _STORE_ATTR_SLOT 509 -#define _STORE_ATTR_WITH_HINT 510 +#define _START_EXECUTOR 510 +#define _STORE_ATTR 511 +#define _STORE_ATTR_INSTANCE_VALUE 512 +#define _STORE_ATTR_SLOT 513 +#define _STORE_ATTR_WITH_HINT 514 #define _STORE_DEREF STORE_DEREF -#define _STORE_FAST 511 -#define _STORE_FAST_0 512 -#define _STORE_FAST_1 513 -#define _STORE_FAST_2 514 -#define _STORE_FAST_3 515 -#define _STORE_FAST_4 516 -#define _STORE_FAST_5 517 -#define _STORE_FAST_6 518 -#define _STORE_FAST_7 519 +#define _STORE_FAST 515 +#define _STORE_FAST_0 516 +#define _STORE_FAST_1 517 +#define _STORE_FAST_2 518 +#define _STORE_FAST_3 519 +#define _STORE_FAST_4 520 +#define _STORE_FAST_5 521 +#define _STORE_FAST_6 522 +#define _STORE_FAST_7 523 #define _STORE_FAST_LOAD_FAST STORE_FAST_LOAD_FAST #define _STORE_FAST_STORE_FAST STORE_FAST_STORE_FAST #define _STORE_GLOBAL STORE_GLOBAL #define _STORE_NAME STORE_NAME -#define _STORE_SLICE 520 -#define _STORE_SUBSCR 521 -#define _STORE_SUBSCR_DICT 522 -#define _STORE_SUBSCR_LIST_INT 523 -#define _SWAP SWAP -#define _TIER2_RESUME_CHECK 524 -#define _TO_BOOL 525 +#define _STORE_SLICE 524 +#define _STORE_SUBSCR 525 +#define _STORE_SUBSCR_DICT 526 +#define _STORE_SUBSCR_LIST_INT 527 +#define _SWAP 528 +#define _SWAP_2 529 +#define _SWAP_3 530 +#define _TIER2_RESUME_CHECK 531 +#define _TO_BOOL 532 #define _TO_BOOL_BOOL TO_BOOL_BOOL #define _TO_BOOL_INT TO_BOOL_INT -#define _TO_BOOL_LIST 526 +#define _TO_BOOL_LIST 533 #define _TO_BOOL_NONE TO_BOOL_NONE -#define _TO_BOOL_STR 527 +#define _TO_BOOL_STR 534 #define _UNARY_INVERT UNARY_INVERT #define _UNARY_NEGATIVE UNARY_NEGATIVE #define _UNARY_NOT UNARY_NOT #define _UNPACK_EX UNPACK_EX -#define _UNPACK_SEQUENCE 528 -#define _UNPACK_SEQUENCE_LIST 529 -#define _UNPACK_SEQUENCE_TUPLE 530 -#define _UNPACK_SEQUENCE_TWO_TUPLE 531 +#define _UNPACK_SEQUENCE 535 +#define _UNPACK_SEQUENCE_LIST 536 +#define _UNPACK_SEQUENCE_TUPLE 537 +#define _UNPACK_SEQUENCE_TWO_TUPLE 538 #define _WITH_EXCEPT_START WITH_EXCEPT_START #define _YIELD_VALUE YIELD_VALUE -#define MAX_UOP_ID 531 +#define MAX_UOP_ID 538 #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index cd36023c25cbb4..fad87d4b586e64 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -12,7 +12,8 @@ extern "C" { #include #include "pycore_uop_ids.h" extern const uint16_t _PyUop_Flags[MAX_UOP_ID+1]; -extern const uint8_t _PyUop_Replication[MAX_UOP_ID+1]; +typedef struct _rep_range { uint8_t start; uint8_t stop; } ReplicationRange; +extern const ReplicationRange _PyUop_Replication[MAX_UOP_ID+1]; extern const char * const _PyOpcode_uop_name[MAX_UOP_ID+1]; extern int _PyUop_num_popped(int opcode, int oparg); @@ -288,8 +289,13 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_CONVERT_VALUE] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_FORMAT_SIMPLE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_FORMAT_WITH_SPEC] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_COPY_1] = HAS_PURE_FLAG, + [_COPY_2] = HAS_PURE_FLAG, + [_COPY_3] = HAS_PURE_FLAG, [_COPY] = HAS_ARG_FLAG | HAS_PURE_FLAG, [_BINARY_OP] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, + [_SWAP_2] = HAS_PURE_FLAG, + [_SWAP_3] = HAS_PURE_FLAG, [_SWAP] = HAS_ARG_FLAG | HAS_PURE_FLAG, [_GUARD_IS_TRUE_POP] = HAS_EXIT_FLAG, [_GUARD_IS_FALSE_POP] = HAS_EXIT_FLAG, @@ -323,12 +329,14 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_TIER2_RESUME_CHECK] = HAS_DEOPT_FLAG, }; -const uint8_t _PyUop_Replication[MAX_UOP_ID+1] = { - [_LOAD_FAST] = 8, - [_LOAD_FAST_BORROW] = 8, - [_LOAD_SMALL_INT] = 4, - [_STORE_FAST] = 8, - [_INIT_CALL_PY_EXACT_ARGS] = 5, +const ReplicationRange _PyUop_Replication[MAX_UOP_ID+1] = { + [_LOAD_FAST] = { 0, 8 }, + [_LOAD_FAST_BORROW] = { 0, 8 }, + [_LOAD_SMALL_INT] = { 0, 4 }, + [_STORE_FAST] = { 0, 8 }, + [_INIT_CALL_PY_EXACT_ARGS] = { 0, 5 }, + [_COPY] = { 1, 4 }, + [_SWAP] = { 2, 4 }, }; const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { @@ -408,6 +416,9 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_CONTAINS_OP_SET] = "_CONTAINS_OP_SET", [_CONVERT_VALUE] = "_CONVERT_VALUE", [_COPY] = "_COPY", + [_COPY_1] = "_COPY_1", + [_COPY_2] = "_COPY_2", + [_COPY_3] = "_COPY_3", [_COPY_FREE_VARS] = "_COPY_FREE_VARS", [_CREATE_INIT_FRAME] = "_CREATE_INIT_FRAME", [_DELETE_ATTR] = "_DELETE_ATTR", @@ -617,6 +628,8 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_STORE_SUBSCR_DICT] = "_STORE_SUBSCR_DICT", [_STORE_SUBSCR_LIST_INT] = "_STORE_SUBSCR_LIST_INT", [_SWAP] = "_SWAP", + [_SWAP_2] = "_SWAP_2", + [_SWAP_3] = "_SWAP_3", [_TIER2_RESUME_CHECK] = "_TIER2_RESUME_CHECK", [_TO_BOOL] = "_TO_BOOL", [_TO_BOOL_BOOL] = "_TO_BOOL_BOOL", @@ -1176,10 +1189,20 @@ int _PyUop_num_popped(int opcode, int oparg) return 1; case _FORMAT_WITH_SPEC: return 2; + case _COPY_1: + return 0; + case _COPY_2: + return 0; + case _COPY_3: + return 0; case _COPY: return 0; case _BINARY_OP: return 2; + case _SWAP_2: + return 0; + case _SWAP_3: + return 0; case _SWAP: return 0; case _GUARD_IS_TRUE_POP: diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 971e97a5784692..27a04766cc8dd8 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -4946,8 +4946,7 @@ dummy_func( res = PyStackRef_FromPyObjectSteal(res_o); } - pure inst(COPY, (bottom, unused[oparg-1] -- bottom, unused[oparg-1], top)) { - assert(oparg > 0); + pure replicate(1:4) inst(COPY, (bottom, unused[oparg-1] -- bottom, unused[oparg-1], top)) { top = PyStackRef_DUP(bottom); } @@ -4980,12 +4979,11 @@ dummy_func( macro(BINARY_OP) = _SPECIALIZE_BINARY_OP + unused/4 + _BINARY_OP; - pure inst(SWAP, (bottom, unused[oparg-2], top -- + pure replicate(2:4) inst(SWAP, (bottom, unused[oparg-2], top -- bottom, unused[oparg-2], top)) { _PyStackRef temp = bottom; bottom = top; top = temp; - assert(oparg >= 2); } inst(INSTRUMENTED_LINE, ( -- )) { diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index dbfb2391bf0623..74c78e4d1f5a69 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -6763,12 +6763,44 @@ break; } + case _COPY_1: { + _PyStackRef bottom; + _PyStackRef top; + bottom = stack_pointer[-1]; + top = PyStackRef_DUP(bottom); + stack_pointer[0] = top; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _COPY_2: { + _PyStackRef bottom; + _PyStackRef top; + bottom = stack_pointer[-2]; + top = PyStackRef_DUP(bottom); + stack_pointer[0] = top; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _COPY_3: { + _PyStackRef bottom; + _PyStackRef top; + bottom = stack_pointer[-3]; + top = PyStackRef_DUP(bottom); + stack_pointer[0] = top; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + case _COPY: { _PyStackRef bottom; _PyStackRef top; oparg = CURRENT_OPARG(); bottom = stack_pointer[-1 - (oparg-1)]; - assert(oparg > 0); top = PyStackRef_DUP(bottom); stack_pointer[0] = top; stack_pointer += 1; @@ -6808,6 +6840,32 @@ break; } + case _SWAP_2: { + _PyStackRef top; + _PyStackRef bottom; + top = stack_pointer[-1]; + bottom = stack_pointer[-2]; + _PyStackRef temp = bottom; + bottom = top; + top = temp; + stack_pointer[-2] = bottom; + stack_pointer[-1] = top; + break; + } + + case _SWAP_3: { + _PyStackRef top; + _PyStackRef bottom; + top = stack_pointer[-1]; + bottom = stack_pointer[-3]; + _PyStackRef temp = bottom; + bottom = top; + top = temp; + stack_pointer[-3] = bottom; + stack_pointer[-1] = top; + break; + } + case _SWAP: { _PyStackRef top; _PyStackRef bottom; @@ -6817,7 +6875,6 @@ _PyStackRef temp = bottom; bottom = top; top = temp; - assert(oparg >= 2); stack_pointer[-2 - (oparg-2)] = bottom; stack_pointer[-1] = top; break; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 2cf027c539b992..4fc1d5266d0a87 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -5228,7 +5228,6 @@ _PyStackRef bottom; _PyStackRef top; bottom = stack_pointer[-1 - (oparg-1)]; - assert(oparg > 0); top = PyStackRef_DUP(bottom); stack_pointer[0] = top; stack_pointer += 1; @@ -11568,7 +11567,6 @@ _PyStackRef temp = bottom; bottom = top; top = temp; - assert(oparg >= 2); stack_pointer[-2 - (oparg-2)] = bottom; stack_pointer[-1] = top; DISPATCH(); diff --git a/Python/optimizer.c b/Python/optimizer.c index dde3dd8ebe745a..8d01d605ef4a2a 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -1292,8 +1292,8 @@ uop_optimize( for (int pc = 0; pc < length; pc++) { int opcode = buffer[pc].opcode; int oparg = buffer[pc].oparg; - if (oparg < _PyUop_Replication[opcode]) { - buffer[pc].opcode = opcode + oparg + 1; + if (oparg < _PyUop_Replication[opcode].stop && oparg >= _PyUop_Replication[opcode].start) { + buffer[pc].opcode = opcode + oparg + 1 - _PyUop_Replication[opcode].start; assert(strncmp(_PyOpcode_uop_name[buffer[pc].opcode], _PyOpcode_uop_name[opcode], strlen(_PyOpcode_uop_name[opcode])) == 0); } else if (is_terminator(&buffer[pc])) { diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index ca6d0301f3572d..6397a7e6029eaa 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -180,7 +180,7 @@ class Uop: properties: Properties _size: int = -1 implicitly_created: bool = False - replicated = 0 + replicated = range(0) replicates: "Uop | None" = None # Size of the instruction(s), only set for uops containing the INSTRUCTION_SIZE macro instruction_size: int | None = None @@ -868,6 +868,28 @@ def compute_properties(op: parser.CodeDef) -> Properties: needs_prev=variable_used(op, "prev_instr"), ) +def expand(items: list[StackItem], oparg: int) -> list[StackItem]: + index = -1 + for i, item in enumerate(items): + if "oparg" in item.size: + if index >= 0: + return items + index = i + if index < 0: + return items + try: + count = int(eval(items[index].size.replace("oparg", str(oparg)))) + except ValueError: + return items + return items[:index] + [ + StackItem(items[index].name + f"_{i}", "", items[index].peek, items[index].used) for i in range(count) + ] + items[index+1:] + +def scalarize_stack(stack: StackEffect, oparg: int) -> StackEffect: + # Only scalarize if no more than one input or output is array + stack.inputs = expand(stack.inputs, oparg) + stack.outputs = expand(stack.outputs, oparg) + return stack def make_uop( name: str, @@ -887,20 +909,26 @@ def make_uop( ) for anno in op.annotations: if anno.startswith("replicate"): - result.replicated = int(anno[10:-1]) + text = anno[10:-1] + start, stop = text.split(":") + result.replicated = range(int(start), int(stop)) break else: return result - for oparg in range(result.replicated): + for oparg in result.replicated: name_x = name + "_" + str(oparg) properties = compute_properties(op) properties.oparg = False - properties.const_oparg = oparg + stack = analyze_stack(op) + if not variable_used(op, "oparg"): + stack = scalarize_stack(stack, oparg) + else: + properties.const_oparg = oparg rep = Uop( name=name_x, context=op.context, annotations=op.annotations, - stack=analyze_stack(op), + stack=stack, caches=analyze_caches(inputs), local_stores=find_variable_stores(op), body=op.block, diff --git a/Tools/cases_generator/parsing.py b/Tools/cases_generator/parsing.py index a6dac48187525d..c7fe0d162ac6e4 100644 --- a/Tools/cases_generator/parsing.py +++ b/Tools/cases_generator/parsing.py @@ -379,9 +379,13 @@ def inst_header(self) -> InstHeader | None: while anno := self.expect(lx.ANNOTATION): if anno.text == "replicate": self.require(lx.LPAREN) - times = self.require(lx.NUMBER) + stop = self.require(lx.NUMBER) + start_text = "0" + if self.expect(lx.COLON): + start_text = stop.text + stop = self.require(lx.NUMBER) self.require(lx.RPAREN) - annotations.append(f"replicate({times.text})") + annotations.append(f"replicate({start_text}:{stop.text})") else: annotations.append(anno.text) tkn = self.expect(lx.INST) diff --git a/Tools/cases_generator/uop_metadata_generator.py b/Tools/cases_generator/uop_metadata_generator.py index 6f995e5c46bfcf..1cc23837a72dea 100644 --- a/Tools/cases_generator/uop_metadata_generator.py +++ b/Tools/cases_generator/uop_metadata_generator.py @@ -24,7 +24,8 @@ def generate_names_and_flags(analysis: Analysis, out: CWriter) -> None: out.emit("extern const uint16_t _PyUop_Flags[MAX_UOP_ID+1];\n") - out.emit("extern const uint8_t _PyUop_Replication[MAX_UOP_ID+1];\n") + out.emit("typedef struct _rep_range { uint8_t start; uint8_t stop; } ReplicationRange;\n") + out.emit("extern const ReplicationRange _PyUop_Replication[MAX_UOP_ID+1];\n") out.emit("extern const char * const _PyOpcode_uop_name[MAX_UOP_ID+1];\n\n") out.emit("extern int _PyUop_num_popped(int opcode, int oparg);\n\n") out.emit("#ifdef NEED_OPCODE_METADATA\n") @@ -34,10 +35,11 @@ def generate_names_and_flags(analysis: Analysis, out: CWriter) -> None: out.emit(f"[{uop.name}] = {cflags(uop.properties)},\n") out.emit("};\n\n") - out.emit("const uint8_t _PyUop_Replication[MAX_UOP_ID+1] = {\n") + out.emit("const ReplicationRange _PyUop_Replication[MAX_UOP_ID+1] = {\n") for uop in analysis.uops.values(): if uop.replicated: - out.emit(f"[{uop.name}] = {uop.replicated},\n") + assert(uop.replicated.step == 1) + out.emit(f"[{uop.name}] = {{ {uop.replicated.start}, {uop.replicated.stop} }},\n") out.emit("};\n\n") out.emit("const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = {\n") From ce421adcb637b6d41448f02f76a422d9584cc074 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Mon, 16 Jun 2025 15:08:41 +0100 Subject: [PATCH 2/3] Update comment --- Tools/cases_generator/analyzer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index 6397a7e6029eaa..17a4d48d6005c7 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -869,6 +869,7 @@ def compute_properties(op: parser.CodeDef) -> Properties: ) def expand(items: list[StackItem], oparg: int) -> list[StackItem]: + # Only replace array item with scalar if no more than item is an array index = -1 for i, item in enumerate(items): if "oparg" in item.size: @@ -886,7 +887,6 @@ def expand(items: list[StackItem], oparg: int) -> list[StackItem]: ] + items[index+1:] def scalarize_stack(stack: StackEffect, oparg: int) -> StackEffect: - # Only scalarize if no more than one input or output is array stack.inputs = expand(stack.inputs, oparg) stack.outputs = expand(stack.outputs, oparg) return stack From 72aa4a6f510876a3fc7dcdc0610ac58777c81e9c Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Mon, 16 Jun 2025 15:24:58 +0100 Subject: [PATCH 3/3] Update Tools/cases_generator/analyzer.py Co-authored-by: Ken Jin --- Tools/cases_generator/analyzer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index 17a4d48d6005c7..c6a9fbcad8891f 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -869,7 +869,7 @@ def compute_properties(op: parser.CodeDef) -> Properties: ) def expand(items: list[StackItem], oparg: int) -> list[StackItem]: - # Only replace array item with scalar if no more than item is an array + # Only replace array item with scalar if no more than one item is an array index = -1 for i, item in enumerate(items): if "oparg" in item.size: