From b993c480fd246bd6c7a4e69913e6e6ee9ac2b355 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sun, 31 Mar 2024 07:33:09 +0800 Subject: [PATCH 01/11] Optimizer constant propagation for 64-bit ints --- Include/internal/pycore_uop_ids.h | 54 ++++++++++---------- Include/internal/pycore_uop_metadata.h | 8 +++ Lib/test/test_capi/test_opt.py | 33 +++++++++++- Python/bytecodes.c | 12 +++++ Python/executor_cases.c.h | 26 ++++++++++ Python/optimizer_analysis.c | 70 ++++++++++++++++++++++++++ Python/optimizer_bytecodes.c | 24 ++++++--- Python/optimizer_cases.c.h | 42 +++++++++++++--- 8 files changed, 230 insertions(+), 39 deletions(-) diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index bcb10ab723ecba..b337fab7b447a4 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -203,6 +203,7 @@ extern "C" { #define _LOAD_GLOBAL 411 #define _LOAD_GLOBAL_BUILTINS 412 #define _LOAD_GLOBAL_MODULE 413 +#define _LOAD_INT 414 #define _LOAD_LOCALS LOAD_LOCALS #define _LOAD_NAME LOAD_NAME #define _LOAD_SUPER_ATTR_ATTR LOAD_SUPER_ATTR_ATTR @@ -216,49 +217,50 @@ extern "C" { #define _MATCH_SEQUENCE MATCH_SEQUENCE #define _NOP NOP #define _POP_EXCEPT POP_EXCEPT -#define _POP_FRAME 414 -#define _POP_JUMP_IF_FALSE 415 -#define _POP_JUMP_IF_TRUE 416 +#define _POP_FRAME 415 +#define _POP_JUMP_IF_FALSE 416 +#define _POP_JUMP_IF_TRUE 417 #define _POP_TOP POP_TOP -#define _POP_TOP_LOAD_CONST_INLINE_BORROW 417 +#define _POP_TOP_LOAD_CONST_INLINE_BORROW 418 +#define _POP_TWO_LOAD_INT 419 #define _PUSH_EXC_INFO PUSH_EXC_INFO -#define _PUSH_FRAME 418 +#define _PUSH_FRAME 420 #define _PUSH_NULL PUSH_NULL -#define _REPLACE_WITH_TRUE 419 +#define _REPLACE_WITH_TRUE 421 #define _RESUME_CHECK RESUME_CHECK -#define _SAVE_RETURN_OFFSET 420 -#define _SEND 421 +#define _SAVE_RETURN_OFFSET 422 +#define _SEND 423 #define _SEND_GEN SEND_GEN #define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS #define _SET_ADD SET_ADD #define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE #define _SET_UPDATE SET_UPDATE -#define _SIDE_EXIT 422 -#define _START_EXECUTOR 423 -#define _STORE_ATTR 424 -#define _STORE_ATTR_INSTANCE_VALUE 425 -#define _STORE_ATTR_SLOT 426 +#define _SIDE_EXIT 424 +#define _START_EXECUTOR 425 +#define _STORE_ATTR 426 +#define _STORE_ATTR_INSTANCE_VALUE 427 +#define _STORE_ATTR_SLOT 428 #define _STORE_ATTR_WITH_HINT STORE_ATTR_WITH_HINT #define _STORE_DEREF STORE_DEREF -#define _STORE_FAST 427 -#define _STORE_FAST_0 428 -#define _STORE_FAST_1 429 -#define _STORE_FAST_2 430 -#define _STORE_FAST_3 431 -#define _STORE_FAST_4 432 -#define _STORE_FAST_5 433 -#define _STORE_FAST_6 434 -#define _STORE_FAST_7 435 +#define _STORE_FAST 429 +#define _STORE_FAST_0 430 +#define _STORE_FAST_1 431 +#define _STORE_FAST_2 432 +#define _STORE_FAST_3 433 +#define _STORE_FAST_4 434 +#define _STORE_FAST_5 435 +#define _STORE_FAST_6 436 +#define _STORE_FAST_7 437 #define _STORE_FAST_LOAD_FAST STORE_FAST_LOAD_FAST #define _STORE_FAST_STORE_FAST STORE_FAST_STORE_FAST #define _STORE_GLOBAL STORE_GLOBAL #define _STORE_NAME STORE_NAME #define _STORE_SLICE STORE_SLICE -#define _STORE_SUBSCR 436 +#define _STORE_SUBSCR 438 #define _STORE_SUBSCR_DICT STORE_SUBSCR_DICT #define _STORE_SUBSCR_LIST_INT STORE_SUBSCR_LIST_INT #define _SWAP SWAP -#define _TO_BOOL 437 +#define _TO_BOOL 439 #define _TO_BOOL_BOOL TO_BOOL_BOOL #define _TO_BOOL_INT TO_BOOL_INT #define _TO_BOOL_LIST TO_BOOL_LIST @@ -268,12 +270,12 @@ extern "C" { #define _UNARY_NEGATIVE UNARY_NEGATIVE #define _UNARY_NOT UNARY_NOT #define _UNPACK_EX UNPACK_EX -#define _UNPACK_SEQUENCE 438 +#define _UNPACK_SEQUENCE 440 #define _UNPACK_SEQUENCE_LIST UNPACK_SEQUENCE_LIST #define _UNPACK_SEQUENCE_TUPLE UNPACK_SEQUENCE_TUPLE #define _UNPACK_SEQUENCE_TWO_TUPLE UNPACK_SEQUENCE_TWO_TUPLE #define _WITH_EXCEPT_START WITH_EXCEPT_START -#define MAX_UOP_ID 438 +#define MAX_UOP_ID 440 #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 51206cd4ca2fdf..5a74945ffb6828 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -236,6 +236,8 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_POP_TOP_LOAD_CONST_INLINE_BORROW] = HAS_PURE_FLAG, [_LOAD_CONST_INLINE_WITH_NULL] = HAS_PURE_FLAG, [_LOAD_CONST_INLINE_BORROW_WITH_NULL] = HAS_PURE_FLAG, + [_LOAD_INT] = HAS_ERROR_FLAG | HAS_PURE_FLAG, + [_POP_TWO_LOAD_INT] = HAS_ERROR_FLAG | HAS_PURE_FLAG, [_CHECK_FUNCTION] = HAS_DEOPT_FLAG, [_INTERNAL_INCREMENT_OPT_COUNTER] = 0, [_COLD_EXIT] = HAS_ARG_FLAG | HAS_ESCAPES_FLAG, @@ -415,6 +417,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_LOAD_GLOBAL] = "_LOAD_GLOBAL", [_LOAD_GLOBAL_BUILTINS] = "_LOAD_GLOBAL_BUILTINS", [_LOAD_GLOBAL_MODULE] = "_LOAD_GLOBAL_MODULE", + [_LOAD_INT] = "_LOAD_INT", [_LOAD_LOCALS] = "_LOAD_LOCALS", [_LOAD_SUPER_ATTR_ATTR] = "_LOAD_SUPER_ATTR_ATTR", [_LOAD_SUPER_ATTR_METHOD] = "_LOAD_SUPER_ATTR_METHOD", @@ -430,6 +433,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_POP_FRAME] = "_POP_FRAME", [_POP_TOP] = "_POP_TOP", [_POP_TOP_LOAD_CONST_INLINE_BORROW] = "_POP_TOP_LOAD_CONST_INLINE_BORROW", + [_POP_TWO_LOAD_INT] = "_POP_TWO_LOAD_INT", [_PUSH_EXC_INFO] = "_PUSH_EXC_INFO", [_PUSH_FRAME] = "_PUSH_FRAME", [_PUSH_NULL] = "_PUSH_NULL", @@ -918,6 +922,10 @@ int _PyUop_num_popped(int opcode, int oparg) return 0; case _LOAD_CONST_INLINE_BORROW_WITH_NULL: return 0; + case _LOAD_INT: + return 0; + case _POP_TWO_LOAD_INT: + return 2; case _CHECK_FUNCTION: return 0; case _INTERNAL_INCREMENT_OPT_COUNTER: diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index b59f4b74a8593e..a8637af28c4cc0 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -939,7 +939,8 @@ def testfunc(n): self.assertIsNotNone(ex) uops = get_opnames(ex) self.assertNotIn("_GUARD_BOTH_INT", uops) - self.assertIn("_BINARY_OP_ADD_INT", uops) + # Constant folded + self.assertIn("_LOAD_INT", uops) # Try again, but between the runs, set the global to a float. # This should result in no executor the second time. ns = {} @@ -978,6 +979,36 @@ def testfunc(n): self._run_with_optimizer(testfunc, 32) + def test_int_constant_propagation(self): + def testfunc(n): + for _ in range(n): + a = 1 + x = a + a - a * a + return x + + res, ex = self._run_with_optimizer(testfunc, 32) + self.assertTrue(res) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + self.assertNotIn("_BINARY_OP_ADD_INT", uops) + self.assertNotIn("_BINARY_OP_MULTIPLY_INT", uops) + self.assertNotIn("_BINARY_OP_SUBTRACT_INT", uops) + + def test_no_bigint_constant_propagation(self): + # We don't want to hold strong references in the trace. + def testfunc(n): + for _ in range(n): + a = 100000000000000000000000000000000000000 + x = a + a - a * a + return x + + res, ex = self._run_with_optimizer(testfunc, 32) + self.assertTrue(res) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + self.assertIn("_BINARY_OP_ADD_INT", uops) + self.assertIn("_BINARY_OP_MULTIPLY_INT", uops) + self.assertIn("_BINARY_OP_SUBTRACT_INT", uops) if __name__ == "__main__": unittest.main() diff --git a/Python/bytecodes.c b/Python/bytecodes.c index bfb378c4a41500..d4f0ba357099a0 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -4138,6 +4138,18 @@ dummy_func( null = NULL; } + tier2 pure op(_LOAD_INT, (cached/4 -- value)) { + value = PyLong_FromLong((int64_t)cached); + ERROR_IF(value == NULL, error); + } + + tier2 pure op(_POP_TWO_LOAD_INT, (cached/4, pop1, pop2 -- value)) { + Py_DECREF(pop1); + Py_DECREF(pop2); + value = PyLong_FromLong((int64_t)cached); + ERROR_IF(value == NULL, error); + } + tier2 op(_CHECK_FUNCTION, (func_version/2 -- )) { assert(PyFunction_Check(frame->f_funcobj)); DEOPT_IF(((PyFunctionObject *)frame->f_funcobj)->func_version != func_version); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index ce0dc235c54fcf..934f0ea194067f 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -3670,6 +3670,32 @@ break; } + case _LOAD_INT: { + PyObject *value; + PyObject *cached = (PyObject *)CURRENT_OPERAND(); + value = PyLong_FromLong((int64_t)cached); + if (value == NULL) JUMP_TO_ERROR(); + stack_pointer[0] = value; + stack_pointer += 1; + break; + } + + case _POP_TWO_LOAD_INT: { + PyObject *pop2; + PyObject *pop1; + PyObject *value; + pop2 = stack_pointer[-1]; + pop1 = stack_pointer[-2]; + PyObject *cached = (PyObject *)CURRENT_OPERAND(); + Py_DECREF(pop1); + Py_DECREF(pop2); + value = PyLong_FromLong((int64_t)cached); + if (value == NULL) JUMP_TO_ERROR(); + stack_pointer[-2] = value; + stack_pointer += -1; + break; + } + case _CHECK_FUNCTION: { uint32_t func_version = (uint32_t)CURRENT_OPERAND(); assert(PyFunction_Check(frame->f_funcobj)); diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 6f553f8ab8ad2e..501c4d3d3f3cef 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -449,7 +449,67 @@ optimize_uops( return trace_len; } +static bool +op_is_simple_load(int opcode) { + switch (opcode) { + case _LOAD_CONST_INLINE_BORROW: + case _LOAD_CONST_INLINE: + case _LOAD_FAST: + case _LOAD_INT: + return true; + default: + return false; + } +} +static bool +remove_simple_pops(int num_popped, _PyUOpInstruction *curr, _PyUOpInstruction *limit){ + int remaining = num_popped; + _PyUOpInstruction *original_curr = curr; + while (curr > limit && remaining != 0) { + int opcode = curr->opcode; + switch (opcode) { + case _NOP: + case _CHECK_VALIDITY_AND_SET_IP: + case _CHECK_VALIDITY: + case _SET_IP: + break; + default: + if (op_is_simple_load(opcode)) { + remaining--; + } + // Hit a non-simple instruction. Just bail early, + // so we don't end up with quadratic time. + else { + return false; + } + } + curr--; + } + if (remaining != 0) { + return false; + } + // Can eliminate. + remaining = num_popped; + curr = original_curr; + while (remaining != 0) { + int opcode = curr->opcode; + switch (opcode) { + case _NOP: + case _CHECK_VALIDITY_AND_SET_IP: + case _CHECK_VALIDITY: + case _SET_IP: + break; + default: + assert (op_is_simple_load(opcode)); + curr->opcode = _NOP; + remaining--; + break; + } + curr--; + } + return true; +} static int remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) { @@ -462,6 +522,16 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) for (int pc = 0; pc < buffer_size; pc++) { int opcode = buffer[pc].opcode; switch (opcode) { + case _POP_TOP_LOAD_CONST_INLINE_BORROW: + if (remove_simple_pops(1, &buffer[pc-1], buffer)) { + buffer[pc].opcode = _LOAD_CONST_INLINE_BORROW; + } + break; + case _POP_TWO_LOAD_INT: + if (remove_simple_pops(2, &buffer[pc-1], buffer)) { + buffer[pc].opcode = _LOAD_INT; + } + break; case _SET_IP: buffer[pc].opcode = _NOP; last_set_ip = pc; diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index e38428af108893..e2903601a7db23 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -144,8 +144,12 @@ dummy_func(void) { res = sym_new_const(ctx, temp); Py_DECREF(temp); OUT_OF_SPACE_IF_NULL(res); - // TODO gh-115506: - // replace opcode with constant propagated one and add tests! + if (_PyLong_IsCompact((PyLongObject *)temp)) { + Py_ssize_t val = _PyLong_CompactValue((PyLongObject *)temp); + if (val == (int64_t)val) { + REPLACE_OP(this_instr, _POP_TWO_LOAD_INT, 0, (int64_t)val); + } + } } else { OUT_OF_SPACE_IF_NULL(res = sym_new_type(ctx, &PyLong_Type)); @@ -166,8 +170,12 @@ dummy_func(void) { res = sym_new_const(ctx, temp); Py_DECREF(temp); OUT_OF_SPACE_IF_NULL(res); - // TODO gh-115506: - // replace opcode with constant propagated one and add tests! + if (_PyLong_IsCompact((PyLongObject *)temp)) { + Py_ssize_t val = _PyLong_CompactValue((PyLongObject *)temp); + if (val == (int64_t)val) { + REPLACE_OP(this_instr, _POP_TWO_LOAD_INT, 0, (int64_t)val); + } + } } else { OUT_OF_SPACE_IF_NULL(res = sym_new_type(ctx, &PyLong_Type)); @@ -188,8 +196,12 @@ dummy_func(void) { res = sym_new_const(ctx, temp); Py_DECREF(temp); OUT_OF_SPACE_IF_NULL(res); - // TODO gh-115506: - // replace opcode with constant propagated one and add tests! + if (_PyLong_IsCompact((PyLongObject *)temp)) { + Py_ssize_t val = _PyLong_CompactValue((PyLongObject *)temp); + if (val == (int64_t)val) { + REPLACE_OP(this_instr, _POP_TWO_LOAD_INT, 0, (int64_t)val); + } + } } else { OUT_OF_SPACE_IF_NULL(res = sym_new_type(ctx, &PyLong_Type)); diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index df73cc091dea26..a4490d879ef8aa 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -256,8 +256,12 @@ res = sym_new_const(ctx, temp); Py_DECREF(temp); OUT_OF_SPACE_IF_NULL(res); - // TODO gh-115506: - // replace opcode with constant propagated one and add tests! + if (_PyLong_IsCompact((PyLongObject *)temp)) { + Py_ssize_t val = _PyLong_CompactValue((PyLongObject *)temp); + if (val == (int64_t)val) { + REPLACE_OP(this_instr, _POP_TWO_LOAD_INT, 0, (int64_t)val); + } + } } else { OUT_OF_SPACE_IF_NULL(res = sym_new_type(ctx, &PyLong_Type)); @@ -286,8 +290,12 @@ res = sym_new_const(ctx, temp); Py_DECREF(temp); OUT_OF_SPACE_IF_NULL(res); - // TODO gh-115506: - // replace opcode with constant propagated one and add tests! + if (_PyLong_IsCompact((PyLongObject *)temp)) { + Py_ssize_t val = _PyLong_CompactValue((PyLongObject *)temp); + if (val == (int64_t)val) { + REPLACE_OP(this_instr, _POP_TWO_LOAD_INT, 0, (int64_t)val); + } + } } else { OUT_OF_SPACE_IF_NULL(res = sym_new_type(ctx, &PyLong_Type)); @@ -316,8 +324,12 @@ res = sym_new_const(ctx, temp); Py_DECREF(temp); OUT_OF_SPACE_IF_NULL(res); - // TODO gh-115506: - // replace opcode with constant propagated one and add tests! + if (_PyLong_IsCompact((PyLongObject *)temp)) { + Py_ssize_t val = _PyLong_CompactValue((PyLongObject *)temp); + if (val == (int64_t)val) { + REPLACE_OP(this_instr, _POP_TWO_LOAD_INT, 0, (int64_t)val); + } + } } else { OUT_OF_SPACE_IF_NULL(res = sym_new_type(ctx, &PyLong_Type)); @@ -1968,6 +1980,24 @@ break; } + case _LOAD_INT: { + _Py_UopsSymbol *value; + value = sym_new_not_null(ctx); + if (value == NULL) goto out_of_space; + stack_pointer[0] = value; + stack_pointer += 1; + break; + } + + case _POP_TWO_LOAD_INT: { + _Py_UopsSymbol *value; + value = sym_new_not_null(ctx); + if (value == NULL) goto out_of_space; + stack_pointer[-2] = value; + stack_pointer += -1; + break; + } + case _CHECK_FUNCTION: { break; } From 97c3c04e9661cfd27a66dec83de8933ea264f11d Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Sun, 31 Mar 2024 08:21:17 +0800 Subject: [PATCH 02/11] Add double constant propagation --- Include/internal/pycore_uop_ids.h | 64 +++++++++++++------------- Include/internal/pycore_uop_metadata.h | 8 ++++ Lib/test/test_capi/test_opt.py | 27 +++++------ Python/bytecodes.c | 16 +++++++ Python/executor_cases.c.h | 30 ++++++++++++ Python/optimizer_analysis.c | 16 ++++++- Python/optimizer_bytecodes.c | 12 ++--- Python/optimizer_cases.c.h | 30 +++++++++--- 8 files changed, 142 insertions(+), 61 deletions(-) diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index b337fab7b447a4..d81df79db01ce5 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -198,12 +198,13 @@ extern "C" { #define _LOAD_FAST_AND_CLEAR LOAD_FAST_AND_CLEAR #define _LOAD_FAST_CHECK LOAD_FAST_CHECK #define _LOAD_FAST_LOAD_FAST LOAD_FAST_LOAD_FAST +#define _LOAD_FLOAT 411 #define _LOAD_FROM_DICT_OR_DEREF LOAD_FROM_DICT_OR_DEREF #define _LOAD_FROM_DICT_OR_GLOBALS LOAD_FROM_DICT_OR_GLOBALS -#define _LOAD_GLOBAL 411 -#define _LOAD_GLOBAL_BUILTINS 412 -#define _LOAD_GLOBAL_MODULE 413 -#define _LOAD_INT 414 +#define _LOAD_GLOBAL 412 +#define _LOAD_GLOBAL_BUILTINS 413 +#define _LOAD_GLOBAL_MODULE 414 +#define _LOAD_INT 415 #define _LOAD_LOCALS LOAD_LOCALS #define _LOAD_NAME LOAD_NAME #define _LOAD_SUPER_ATTR_ATTR LOAD_SUPER_ATTR_ATTR @@ -217,50 +218,51 @@ extern "C" { #define _MATCH_SEQUENCE MATCH_SEQUENCE #define _NOP NOP #define _POP_EXCEPT POP_EXCEPT -#define _POP_FRAME 415 -#define _POP_JUMP_IF_FALSE 416 -#define _POP_JUMP_IF_TRUE 417 +#define _POP_FRAME 416 +#define _POP_JUMP_IF_FALSE 417 +#define _POP_JUMP_IF_TRUE 418 #define _POP_TOP POP_TOP -#define _POP_TOP_LOAD_CONST_INLINE_BORROW 418 -#define _POP_TWO_LOAD_INT 419 +#define _POP_TOP_LOAD_CONST_INLINE_BORROW 419 +#define _POP_TWO_LOAD_FLOAT 420 +#define _POP_TWO_LOAD_INT 421 #define _PUSH_EXC_INFO PUSH_EXC_INFO -#define _PUSH_FRAME 420 +#define _PUSH_FRAME 422 #define _PUSH_NULL PUSH_NULL -#define _REPLACE_WITH_TRUE 421 +#define _REPLACE_WITH_TRUE 423 #define _RESUME_CHECK RESUME_CHECK -#define _SAVE_RETURN_OFFSET 422 -#define _SEND 423 +#define _SAVE_RETURN_OFFSET 424 +#define _SEND 425 #define _SEND_GEN SEND_GEN #define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS #define _SET_ADD SET_ADD #define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE #define _SET_UPDATE SET_UPDATE -#define _SIDE_EXIT 424 -#define _START_EXECUTOR 425 -#define _STORE_ATTR 426 -#define _STORE_ATTR_INSTANCE_VALUE 427 -#define _STORE_ATTR_SLOT 428 +#define _SIDE_EXIT 426 +#define _START_EXECUTOR 427 +#define _STORE_ATTR 428 +#define _STORE_ATTR_INSTANCE_VALUE 429 +#define _STORE_ATTR_SLOT 430 #define _STORE_ATTR_WITH_HINT STORE_ATTR_WITH_HINT #define _STORE_DEREF STORE_DEREF -#define _STORE_FAST 429 -#define _STORE_FAST_0 430 -#define _STORE_FAST_1 431 -#define _STORE_FAST_2 432 -#define _STORE_FAST_3 433 -#define _STORE_FAST_4 434 -#define _STORE_FAST_5 435 -#define _STORE_FAST_6 436 -#define _STORE_FAST_7 437 +#define _STORE_FAST 431 +#define _STORE_FAST_0 432 +#define _STORE_FAST_1 433 +#define _STORE_FAST_2 434 +#define _STORE_FAST_3 435 +#define _STORE_FAST_4 436 +#define _STORE_FAST_5 437 +#define _STORE_FAST_6 438 +#define _STORE_FAST_7 439 #define _STORE_FAST_LOAD_FAST STORE_FAST_LOAD_FAST #define _STORE_FAST_STORE_FAST STORE_FAST_STORE_FAST #define _STORE_GLOBAL STORE_GLOBAL #define _STORE_NAME STORE_NAME #define _STORE_SLICE STORE_SLICE -#define _STORE_SUBSCR 438 +#define _STORE_SUBSCR 440 #define _STORE_SUBSCR_DICT STORE_SUBSCR_DICT #define _STORE_SUBSCR_LIST_INT STORE_SUBSCR_LIST_INT #define _SWAP SWAP -#define _TO_BOOL 439 +#define _TO_BOOL 441 #define _TO_BOOL_BOOL TO_BOOL_BOOL #define _TO_BOOL_INT TO_BOOL_INT #define _TO_BOOL_LIST TO_BOOL_LIST @@ -270,12 +272,12 @@ extern "C" { #define _UNARY_NEGATIVE UNARY_NEGATIVE #define _UNARY_NOT UNARY_NOT #define _UNPACK_EX UNPACK_EX -#define _UNPACK_SEQUENCE 440 +#define _UNPACK_SEQUENCE 442 #define _UNPACK_SEQUENCE_LIST UNPACK_SEQUENCE_LIST #define _UNPACK_SEQUENCE_TUPLE UNPACK_SEQUENCE_TUPLE #define _UNPACK_SEQUENCE_TWO_TUPLE UNPACK_SEQUENCE_TWO_TUPLE #define _WITH_EXCEPT_START WITH_EXCEPT_START -#define MAX_UOP_ID 440 +#define MAX_UOP_ID 442 #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 5a74945ffb6828..66b594331b9463 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -238,6 +238,8 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_LOAD_CONST_INLINE_BORROW_WITH_NULL] = HAS_PURE_FLAG, [_LOAD_INT] = HAS_ERROR_FLAG | HAS_PURE_FLAG, [_POP_TWO_LOAD_INT] = HAS_ERROR_FLAG | HAS_PURE_FLAG, + [_LOAD_FLOAT] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG | HAS_PURE_FLAG, + [_POP_TWO_LOAD_FLOAT] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG | HAS_PURE_FLAG, [_CHECK_FUNCTION] = HAS_DEOPT_FLAG, [_INTERNAL_INCREMENT_OPT_COUNTER] = 0, [_COLD_EXIT] = HAS_ARG_FLAG | HAS_ESCAPES_FLAG, @@ -412,6 +414,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_LOAD_FAST_AND_CLEAR] = "_LOAD_FAST_AND_CLEAR", [_LOAD_FAST_CHECK] = "_LOAD_FAST_CHECK", [_LOAD_FAST_LOAD_FAST] = "_LOAD_FAST_LOAD_FAST", + [_LOAD_FLOAT] = "_LOAD_FLOAT", [_LOAD_FROM_DICT_OR_DEREF] = "_LOAD_FROM_DICT_OR_DEREF", [_LOAD_FROM_DICT_OR_GLOBALS] = "_LOAD_FROM_DICT_OR_GLOBALS", [_LOAD_GLOBAL] = "_LOAD_GLOBAL", @@ -433,6 +436,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_POP_FRAME] = "_POP_FRAME", [_POP_TOP] = "_POP_TOP", [_POP_TOP_LOAD_CONST_INLINE_BORROW] = "_POP_TOP_LOAD_CONST_INLINE_BORROW", + [_POP_TWO_LOAD_FLOAT] = "_POP_TWO_LOAD_FLOAT", [_POP_TWO_LOAD_INT] = "_POP_TWO_LOAD_INT", [_PUSH_EXC_INFO] = "_PUSH_EXC_INFO", [_PUSH_FRAME] = "_PUSH_FRAME", @@ -926,6 +930,10 @@ int _PyUop_num_popped(int opcode, int oparg) return 0; case _POP_TWO_LOAD_INT: return 2; + case _LOAD_FLOAT: + return 0; + case _POP_TWO_LOAD_FLOAT: + return 2; case _CHECK_FUNCTION: return 0; case _INTERNAL_INCREMENT_OPT_COUNTER: diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index a8637af28c4cc0..b3b4110079bf04 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -792,8 +792,8 @@ def testfunc(n): def test_float_add_constant_propagation(self): def testfunc(n): - a = 1.0 for _ in range(n): + a = 1.0 a = a + 0.25 a = a + 0.25 a = a + 0.25 @@ -801,19 +801,16 @@ def testfunc(n): return a res, ex = self._run_with_optimizer(testfunc, 32) - self.assertAlmostEqual(res, 33.0) + self.assertAlmostEqual(res, 2.0) self.assertIsNotNone(ex) uops = get_opnames(ex) - guard_both_float_count = [opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_FLOAT"] - self.assertLessEqual(len(guard_both_float_count), 1) - # TODO gh-115506: this assertion may change after propagating constants. - # We'll also need to verify that propagation actually occurs. - self.assertIn("_BINARY_OP_ADD_FLOAT", uops) + self.assertNotIn("_BINARY_OP_ADD_FLOAT", uops) + self.assertIn("_LOAD_FLOAT", uops) def test_float_subtract_constant_propagation(self): def testfunc(n): - a = 1.0 for _ in range(n): + a = 1.0 a = a - 0.25 a = a - 0.25 a = a - 0.25 @@ -821,19 +818,18 @@ def testfunc(n): return a res, ex = self._run_with_optimizer(testfunc, 32) - self.assertAlmostEqual(res, -31.0) + self.assertAlmostEqual(res, 0.0) self.assertIsNotNone(ex) uops = get_opnames(ex) guard_both_float_count = [opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_FLOAT"] self.assertLessEqual(len(guard_both_float_count), 1) - # TODO gh-115506: this assertion may change after propagating constants. - # We'll also need to verify that propagation actually occurs. - self.assertIn("_BINARY_OP_SUBTRACT_FLOAT", uops) + self.assertNotIn("_BINARY_OP_SUBTRACT_FLOAT", uops) + self.assertIn("_LOAD_FLOAT", uops) def test_float_multiply_constant_propagation(self): def testfunc(n): - a = 1.0 for _ in range(n): + a = 1.0 a = a * 1.0 a = a * 1.0 a = a * 1.0 @@ -846,9 +842,8 @@ def testfunc(n): uops = get_opnames(ex) guard_both_float_count = [opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_FLOAT"] self.assertLessEqual(len(guard_both_float_count), 1) - # TODO gh-115506: this assertion may change after propagating constants. - # We'll also need to verify that propagation actually occurs. - self.assertIn("_BINARY_OP_MULTIPLY_FLOAT", uops) + self.assertNotIn("_BINARY_OP_MULTIPLY_FLOAT", uops) + self.assertIn("_LOAD_FLOAT", uops) def test_add_unicode_propagation(self): def testfunc(n): diff --git a/Python/bytecodes.c b/Python/bytecodes.c index d4f0ba357099a0..5460b8d1c41ed7 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -4150,6 +4150,22 @@ dummy_func( ERROR_IF(value == NULL, error); } + tier2 pure op(_LOAD_FLOAT, (cached/4 -- value)) { + double dst; + memcpy(&dst, &cached, sizeof(PyObject *)); + value = PyFloat_FromDouble(dst); + ERROR_IF(value == NULL, error); + } + + tier2 pure op(_POP_TWO_LOAD_FLOAT, (cached/4, pop1, pop2 -- value)) { + Py_DECREF(pop1); + Py_DECREF(pop2); + double dst; + memcpy(&dst, &cached, sizeof(PyObject *)); + value = PyFloat_FromDouble(dst); + ERROR_IF(value == NULL, error); + } + tier2 op(_CHECK_FUNCTION, (func_version/2 -- )) { assert(PyFunction_Check(frame->f_funcobj)); DEOPT_IF(((PyFunctionObject *)frame->f_funcobj)->func_version != func_version); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 934f0ea194067f..12d7f5809576c8 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -3696,6 +3696,36 @@ break; } + case _LOAD_FLOAT: { + PyObject *value; + PyObject *cached = (PyObject *)CURRENT_OPERAND(); + double dst; + memcpy(&dst, &cached, sizeof(PyObject *)); + value = PyFloat_FromDouble(dst); + if (value == NULL) JUMP_TO_ERROR(); + stack_pointer[0] = value; + stack_pointer += 1; + break; + } + + case _POP_TWO_LOAD_FLOAT: { + PyObject *pop2; + PyObject *pop1; + PyObject *value; + pop2 = stack_pointer[-1]; + pop1 = stack_pointer[-2]; + PyObject *cached = (PyObject *)CURRENT_OPERAND(); + Py_DECREF(pop1); + Py_DECREF(pop2); + double dst; + memcpy(&dst, &cached, sizeof(PyObject *)); + value = PyFloat_FromDouble(dst); + if (value == NULL) JUMP_TO_ERROR(); + stack_pointer[-2] = value; + stack_pointer += -1; + break; + } + case _CHECK_FUNCTION: { uint32_t func_version = (uint32_t)CURRENT_OPERAND(); assert(PyFunction_Check(frame->f_funcobj)); diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 501c4d3d3f3cef..a9f89b4f3f5424 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -284,7 +284,13 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, return 0; } - +static int64_t +double_as_int64_t(double in) { + double temp = in; + int64_t result; + memcpy(&result, &temp, sizeof(double)); + return result; +} #define STACK_LEVEL() ((int)(stack_pointer - ctx->frame->stack)) @@ -449,13 +455,14 @@ optimize_uops( return trace_len; } -static bool +static inline bool op_is_simple_load(int opcode) { switch (opcode) { case _LOAD_CONST_INLINE_BORROW: case _LOAD_CONST_INLINE: case _LOAD_FAST: case _LOAD_INT: + case _LOAD_FLOAT: return true; default: return false; @@ -532,6 +539,11 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) buffer[pc].opcode = _LOAD_INT; } break; + case _POP_TWO_LOAD_FLOAT: + if (remove_simple_pops(2, &buffer[pc-1], buffer)) { + buffer[pc].opcode = _LOAD_FLOAT; + } + break; case _SET_IP: buffer[pc].opcode = _NOP; last_set_ip = pc; diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index e2903601a7db23..a459527223b343 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -223,8 +223,8 @@ dummy_func(void) { res = sym_new_const(ctx, temp); Py_DECREF(temp); OUT_OF_SPACE_IF_NULL(res); - // TODO gh-115506: - // replace opcode with constant propagated one and update tests! + double f = PyFloat_AS_DOUBLE(temp); + REPLACE_OP(this_instr, _POP_TWO_LOAD_FLOAT, 0, double_as_int64_t(f)); } else { OUT_OF_SPACE_IF_NULL(res = sym_new_type(ctx, &PyFloat_Type)); @@ -246,8 +246,8 @@ dummy_func(void) { res = sym_new_const(ctx, temp); Py_DECREF(temp); OUT_OF_SPACE_IF_NULL(res); - // TODO gh-115506: - // replace opcode with constant propagated one and update tests! + double f = PyFloat_AS_DOUBLE(temp); + REPLACE_OP(this_instr, _POP_TWO_LOAD_FLOAT, 0, double_as_int64_t(f)); } else { OUT_OF_SPACE_IF_NULL(res = sym_new_type(ctx, &PyFloat_Type)); @@ -269,8 +269,8 @@ dummy_func(void) { res = sym_new_const(ctx, temp); Py_DECREF(temp); OUT_OF_SPACE_IF_NULL(res); - // TODO gh-115506: - // replace opcode with constant propagated one and update tests! + double f = PyFloat_AS_DOUBLE(temp); + REPLACE_OP(this_instr, _POP_TWO_LOAD_FLOAT, 0, double_as_int64_t(f)); } else { OUT_OF_SPACE_IF_NULL(res = sym_new_type(ctx, &PyFloat_Type)); diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index a4490d879ef8aa..ccbdcbc51dba2f 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -377,8 +377,8 @@ res = sym_new_const(ctx, temp); Py_DECREF(temp); OUT_OF_SPACE_IF_NULL(res); - // TODO gh-115506: - // replace opcode with constant propagated one and update tests! + double f = PyFloat_AS_DOUBLE(temp); + REPLACE_OP(this_instr, _POP_TWO_LOAD_FLOAT, 0, double_as_int64_t(f)); } else { OUT_OF_SPACE_IF_NULL(res = sym_new_type(ctx, &PyFloat_Type)); @@ -408,8 +408,8 @@ res = sym_new_const(ctx, temp); Py_DECREF(temp); OUT_OF_SPACE_IF_NULL(res); - // TODO gh-115506: - // replace opcode with constant propagated one and update tests! + double f = PyFloat_AS_DOUBLE(temp); + REPLACE_OP(this_instr, _POP_TWO_LOAD_FLOAT, 0, double_as_int64_t(f)); } else { OUT_OF_SPACE_IF_NULL(res = sym_new_type(ctx, &PyFloat_Type)); @@ -439,8 +439,8 @@ res = sym_new_const(ctx, temp); Py_DECREF(temp); OUT_OF_SPACE_IF_NULL(res); - // TODO gh-115506: - // replace opcode with constant propagated one and update tests! + double f = PyFloat_AS_DOUBLE(temp); + REPLACE_OP(this_instr, _POP_TWO_LOAD_FLOAT, 0, double_as_int64_t(f)); } else { OUT_OF_SPACE_IF_NULL(res = sym_new_type(ctx, &PyFloat_Type)); @@ -1998,6 +1998,24 @@ break; } + case _LOAD_FLOAT: { + _Py_UopsSymbol *value; + value = sym_new_not_null(ctx); + if (value == NULL) goto out_of_space; + stack_pointer[0] = value; + stack_pointer += 1; + break; + } + + case _POP_TWO_LOAD_FLOAT: { + _Py_UopsSymbol *value; + value = sym_new_not_null(ctx); + if (value == NULL) goto out_of_space; + stack_pointer[-2] = value; + stack_pointer += -1; + break; + } + case _CHECK_FUNCTION: { break; } From ffea734b6467796916f4c5e010c2625157378a3a Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Mon, 1 Apr 2024 21:28:15 +0800 Subject: [PATCH 03/11] include string.h --- Tools/jit/template.c | 1 + 1 file changed, 1 insertion(+) diff --git a/Tools/jit/template.c b/Tools/jit/template.c index 54160084cda460..bee97fac2092ca 100644 --- a/Tools/jit/template.c +++ b/Tools/jit/template.c @@ -15,6 +15,7 @@ #include "pycore_setobject.h" #include "pycore_sliceobject.h" #include "pycore_descrobject.h" +#include "string.h" #include "ceval_macros.h" From a01c2f1dbcd568528f3e140f0ffa79747c1429d5 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Mon, 1 Apr 2024 21:29:45 +0800 Subject: [PATCH 04/11] minor fixups --- Python/optimizer_analysis.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index a9f89b4f3f5424..7b30858bb989c9 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -482,14 +482,13 @@ remove_simple_pops(int num_popped, _PyUOpInstruction *curr, _PyUOpInstruction *l case _SET_IP: break; default: - if (op_is_simple_load(opcode)) { - remaining--; - } // Hit a non-simple instruction. Just bail early, // so we don't end up with quadratic time. - else { + if (!op_is_simple_load(opcode)) { return false; } + remaining--; + break; } curr--; } @@ -508,7 +507,7 @@ remove_simple_pops(int num_popped, _PyUOpInstruction *curr, _PyUOpInstruction *l case _SET_IP: break; default: - assert (op_is_simple_load(opcode)); + assert(op_is_simple_load(opcode)); curr->opcode = _NOP; remaining--; break; From c5b3d54ea78ad2177c3bd85c093ae283b1f768d4 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Tue, 2 Apr 2024 00:00:06 +0800 Subject: [PATCH 05/11] fix windows build --- Include/internal/pycore_optimizer.h | 3 +++ Python/bytecodes.c | 8 ++------ Python/executor_cases.c.h | 8 ++------ Python/optimizer.c | 9 +++++++++ Python/optimizer_analysis.c | 4 ++++ Tools/jit/template.c | 1 - 6 files changed, 20 insertions(+), 13 deletions(-) diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index 44cafe61b75596..69ecf05c0a4cc9 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -115,6 +115,9 @@ PyAPI_FUNC(PyObject *) _Py_uop_symbols_test(PyObject *self, PyObject *ignored); PyAPI_FUNC(int) _PyOptimizer_Optimize(_PyInterpreterFrame *frame, _Py_CODEUNIT *start, PyObject **stack_pointer, _PyExecutorObject **exec_ptr); +PyAPI_FUNC(PyObject *) _Py_64_bits_as_double(int64_t); + + #ifdef __cplusplus } #endif diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 5460b8d1c41ed7..3eb583462c3ec8 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -4151,18 +4151,14 @@ dummy_func( } tier2 pure op(_LOAD_FLOAT, (cached/4 -- value)) { - double dst; - memcpy(&dst, &cached, sizeof(PyObject *)); - value = PyFloat_FromDouble(dst); + value = _Py_64_bits_as_double((int64_t)cached); ERROR_IF(value == NULL, error); } tier2 pure op(_POP_TWO_LOAD_FLOAT, (cached/4, pop1, pop2 -- value)) { Py_DECREF(pop1); Py_DECREF(pop2); - double dst; - memcpy(&dst, &cached, sizeof(PyObject *)); - value = PyFloat_FromDouble(dst); + value = _Py_64_bits_as_double((int64_t)cached); ERROR_IF(value == NULL, error); } diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 12d7f5809576c8..70c334daaa404a 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -3699,9 +3699,7 @@ case _LOAD_FLOAT: { PyObject *value; PyObject *cached = (PyObject *)CURRENT_OPERAND(); - double dst; - memcpy(&dst, &cached, sizeof(PyObject *)); - value = PyFloat_FromDouble(dst); + value = _Py_64_bits_as_double((int64_t)cached); if (value == NULL) JUMP_TO_ERROR(); stack_pointer[0] = value; stack_pointer += 1; @@ -3717,9 +3715,7 @@ PyObject *cached = (PyObject *)CURRENT_OPERAND(); Py_DECREF(pop1); Py_DECREF(pop2); - double dst; - memcpy(&dst, &cached, sizeof(PyObject *)); - value = PyFloat_FromDouble(dst); + value = _Py_64_bits_as_double((int64_t)cached); if (value == NULL) JUMP_TO_ERROR(); stack_pointer[-2] = value; stack_pointer += -1; diff --git a/Python/optimizer.c b/Python/optimizer.c index 38ab6d3cf61c72..815369fb9bb302 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -1615,3 +1615,12 @@ _Py_Executors_InvalidateAll(PyInterpreterState *interp, int is_invalidation) } } } + +PyObject* +_Py_64_bits_as_double(int64_t val) +{ + assert(sizeof(double) == sizeof(int64_t)); + double dst; + memcpy(&dst, &val, sizeof(int64_t)); + return PyFloat_FromDouble(dst); +} \ No newline at end of file diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 7b30858bb989c9..ca0c106345bea3 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -286,6 +286,7 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, static int64_t double_as_int64_t(double in) { + assert(sizeof(double) == sizeof(int64_t)); double temp = in; int64_t result; memcpy(&result, &temp, sizeof(double)); @@ -538,11 +539,14 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) buffer[pc].opcode = _LOAD_INT; } break; +// This guarantees that in C99, doubles are 64-bit. +#ifdef __STDC_IEC_559__ case _POP_TWO_LOAD_FLOAT: if (remove_simple_pops(2, &buffer[pc-1], buffer)) { buffer[pc].opcode = _LOAD_FLOAT; } break; +#endif case _SET_IP: buffer[pc].opcode = _NOP; last_set_ip = pc; diff --git a/Tools/jit/template.c b/Tools/jit/template.c index bee97fac2092ca..54160084cda460 100644 --- a/Tools/jit/template.c +++ b/Tools/jit/template.c @@ -15,7 +15,6 @@ #include "pycore_setobject.h" #include "pycore_sliceobject.h" #include "pycore_descrobject.h" -#include "string.h" #include "ceval_macros.h" From c84dd91ffcb3e9f34890085ca678177526348af4 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Tue, 2 Apr 2024 00:03:43 +0800 Subject: [PATCH 06/11] move to better name and place --- Include/internal/pycore_floatobject.h | 1 + Include/internal/pycore_optimizer.h | 3 --- Objects/floatobject.c | 9 +++++++++ Python/bytecodes.c | 4 ++-- Python/executor_cases.c.h | 4 ++-- Python/optimizer.c | 9 --------- 6 files changed, 14 insertions(+), 16 deletions(-) diff --git a/Include/internal/pycore_floatobject.h b/Include/internal/pycore_floatobject.h index f984df695696c3..07da8f238df65d 100644 --- a/Include/internal/pycore_floatobject.h +++ b/Include/internal/pycore_floatobject.h @@ -55,6 +55,7 @@ extern PyObject* _Py_string_to_number_with_underscores( extern double _Py_parse_inf_or_nan(const char *p, char **endptr); +PyAPI_FUNC(PyObject *) _PyFloat_From64Bits(int64_t); #ifdef __cplusplus } diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index 69ecf05c0a4cc9..44cafe61b75596 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -115,9 +115,6 @@ PyAPI_FUNC(PyObject *) _Py_uop_symbols_test(PyObject *self, PyObject *ignored); PyAPI_FUNC(int) _PyOptimizer_Optimize(_PyInterpreterFrame *frame, _Py_CODEUNIT *start, PyObject **stack_pointer, _PyExecutorObject **exec_ptr); -PyAPI_FUNC(PyObject *) _Py_64_bits_as_double(int64_t); - - #ifdef __cplusplus } #endif diff --git a/Objects/floatobject.c b/Objects/floatobject.c index 96227f2cf7d76f..1f1fd119e92de8 100644 --- a/Objects/floatobject.c +++ b/Objects/floatobject.c @@ -2614,3 +2614,12 @@ PyFloat_Unpack8(const char *data, int le) return x; } } + +PyObject* +_PyFloat_From64Bits(int64_t val) +{ + assert(sizeof(double) == sizeof(int64_t)); + double dst; + memcpy(&dst, &val, sizeof(int64_t)); + return PyFloat_FromDouble(dst); +} diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 3eb583462c3ec8..471ea56399691b 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -4151,14 +4151,14 @@ dummy_func( } tier2 pure op(_LOAD_FLOAT, (cached/4 -- value)) { - value = _Py_64_bits_as_double((int64_t)cached); + value = _PyFloat_From64Bits((int64_t)cached); ERROR_IF(value == NULL, error); } tier2 pure op(_POP_TWO_LOAD_FLOAT, (cached/4, pop1, pop2 -- value)) { Py_DECREF(pop1); Py_DECREF(pop2); - value = _Py_64_bits_as_double((int64_t)cached); + value = _PyFloat_From64Bits((int64_t)cached); ERROR_IF(value == NULL, error); } diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 70c334daaa404a..8db65c15638f6c 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -3699,7 +3699,7 @@ case _LOAD_FLOAT: { PyObject *value; PyObject *cached = (PyObject *)CURRENT_OPERAND(); - value = _Py_64_bits_as_double((int64_t)cached); + value = _PyFloat_From64Bits((int64_t)cached); if (value == NULL) JUMP_TO_ERROR(); stack_pointer[0] = value; stack_pointer += 1; @@ -3715,7 +3715,7 @@ PyObject *cached = (PyObject *)CURRENT_OPERAND(); Py_DECREF(pop1); Py_DECREF(pop2); - value = _Py_64_bits_as_double((int64_t)cached); + value = _PyFloat_From64Bits((int64_t)cached); if (value == NULL) JUMP_TO_ERROR(); stack_pointer[-2] = value; stack_pointer += -1; diff --git a/Python/optimizer.c b/Python/optimizer.c index 815369fb9bb302..38ab6d3cf61c72 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -1615,12 +1615,3 @@ _Py_Executors_InvalidateAll(PyInterpreterState *interp, int is_invalidation) } } } - -PyObject* -_Py_64_bits_as_double(int64_t val) -{ - assert(sizeof(double) == sizeof(int64_t)); - double dst; - memcpy(&dst, &val, sizeof(int64_t)); - return PyFloat_FromDouble(dst); -} \ No newline at end of file From dc7c0d9e861c10366c4536efa1542834ba3d5118 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Tue, 2 Apr 2024 00:22:18 +0800 Subject: [PATCH 07/11] fix check --- Python/optimizer_analysis.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index ca0c106345bea3..a032098bdf2bb6 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -539,14 +539,12 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) buffer[pc].opcode = _LOAD_INT; } break; -// This guarantees that in C99, doubles are 64-bit. -#ifdef __STDC_IEC_559__ case _POP_TWO_LOAD_FLOAT: - if (remove_simple_pops(2, &buffer[pc-1], buffer)) { + if ((sizeof(double) == sizeof(int64_t)) && + remove_simple_pops(2, &buffer[pc-1], buffer)) { buffer[pc].opcode = _LOAD_FLOAT; } break; -#endif case _SET_IP: buffer[pc].opcode = _NOP; last_set_ip = pc; From 8b10b50e906cf846a2ab8462319f5ce1f4c95773 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Tue, 2 Apr 2024 00:33:27 +0800 Subject: [PATCH 08/11] more tests --- Lib/test/test_capi/test_opt.py | 14 ++++++++++++++ Python/optimizer_analysis.c | 3 +-- Python/optimizer_bytecodes.c | 18 ++++++++++++------ Python/optimizer_cases.c.h | 18 ++++++++++++------ 4 files changed, 39 insertions(+), 14 deletions(-) diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index b3b4110079bf04..817ee7ee40efae 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -845,6 +845,20 @@ def testfunc(n): self.assertNotIn("_BINARY_OP_MULTIPLY_FLOAT", uops) self.assertIn("_LOAD_FLOAT", uops) + def test_int_add_constant_propagation_peepholer_advanced(self): + def testfunc(n): + for _ in range(n): + a = 1 + a = (a + a) + (a + a + (a + a)) + return a + + res, ex = self._run_with_optimizer(testfunc, 32) + self.assertAlmostEqual(res, 6) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + self.assertNotIn("_BINARY_OP_ADD_INT", uops) + self.assertIn("_LOAD_INT", uops) + def test_add_unicode_propagation(self): def testfunc(n): a = "" diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index a032098bdf2bb6..6a76136e718cab 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -540,8 +540,7 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) } break; case _POP_TWO_LOAD_FLOAT: - if ((sizeof(double) == sizeof(int64_t)) && - remove_simple_pops(2, &buffer[pc-1], buffer)) { + if (remove_simple_pops(2, &buffer[pc-1], buffer)) { buffer[pc].opcode = _LOAD_FLOAT; } break; diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index a459527223b343..d4249d79799481 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -223,8 +223,10 @@ dummy_func(void) { res = sym_new_const(ctx, temp); Py_DECREF(temp); OUT_OF_SPACE_IF_NULL(res); - double f = PyFloat_AS_DOUBLE(temp); - REPLACE_OP(this_instr, _POP_TWO_LOAD_FLOAT, 0, double_as_int64_t(f)); + if (sizeof(double) == sizeof(int64_t)) { + double f = PyFloat_AS_DOUBLE(temp); + REPLACE_OP(this_instr, _POP_TWO_LOAD_FLOAT, 0, double_as_int64_t(f)); + } } else { OUT_OF_SPACE_IF_NULL(res = sym_new_type(ctx, &PyFloat_Type)); @@ -246,8 +248,10 @@ dummy_func(void) { res = sym_new_const(ctx, temp); Py_DECREF(temp); OUT_OF_SPACE_IF_NULL(res); - double f = PyFloat_AS_DOUBLE(temp); - REPLACE_OP(this_instr, _POP_TWO_LOAD_FLOAT, 0, double_as_int64_t(f)); + if (sizeof(double) == sizeof(int64_t)) { + double f = PyFloat_AS_DOUBLE(temp); + REPLACE_OP(this_instr, _POP_TWO_LOAD_FLOAT, 0, double_as_int64_t(f)); + } } else { OUT_OF_SPACE_IF_NULL(res = sym_new_type(ctx, &PyFloat_Type)); @@ -269,8 +273,10 @@ dummy_func(void) { res = sym_new_const(ctx, temp); Py_DECREF(temp); OUT_OF_SPACE_IF_NULL(res); - double f = PyFloat_AS_DOUBLE(temp); - REPLACE_OP(this_instr, _POP_TWO_LOAD_FLOAT, 0, double_as_int64_t(f)); + if (sizeof(double) == sizeof(int64_t)) { + double f = PyFloat_AS_DOUBLE(temp); + REPLACE_OP(this_instr, _POP_TWO_LOAD_FLOAT, 0, double_as_int64_t(f)); + } } else { OUT_OF_SPACE_IF_NULL(res = sym_new_type(ctx, &PyFloat_Type)); diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index ccbdcbc51dba2f..8d0315100199b9 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -377,8 +377,10 @@ res = sym_new_const(ctx, temp); Py_DECREF(temp); OUT_OF_SPACE_IF_NULL(res); - double f = PyFloat_AS_DOUBLE(temp); - REPLACE_OP(this_instr, _POP_TWO_LOAD_FLOAT, 0, double_as_int64_t(f)); + if (sizeof(double) == sizeof(int64_t)) { + double f = PyFloat_AS_DOUBLE(temp); + REPLACE_OP(this_instr, _POP_TWO_LOAD_FLOAT, 0, double_as_int64_t(f)); + } } else { OUT_OF_SPACE_IF_NULL(res = sym_new_type(ctx, &PyFloat_Type)); @@ -408,8 +410,10 @@ res = sym_new_const(ctx, temp); Py_DECREF(temp); OUT_OF_SPACE_IF_NULL(res); - double f = PyFloat_AS_DOUBLE(temp); - REPLACE_OP(this_instr, _POP_TWO_LOAD_FLOAT, 0, double_as_int64_t(f)); + if (sizeof(double) == sizeof(int64_t)) { + double f = PyFloat_AS_DOUBLE(temp); + REPLACE_OP(this_instr, _POP_TWO_LOAD_FLOAT, 0, double_as_int64_t(f)); + } } else { OUT_OF_SPACE_IF_NULL(res = sym_new_type(ctx, &PyFloat_Type)); @@ -439,8 +443,10 @@ res = sym_new_const(ctx, temp); Py_DECREF(temp); OUT_OF_SPACE_IF_NULL(res); - double f = PyFloat_AS_DOUBLE(temp); - REPLACE_OP(this_instr, _POP_TWO_LOAD_FLOAT, 0, double_as_int64_t(f)); + if (sizeof(double) == sizeof(int64_t)) { + double f = PyFloat_AS_DOUBLE(temp); + REPLACE_OP(this_instr, _POP_TWO_LOAD_FLOAT, 0, double_as_int64_t(f)); + } } else { OUT_OF_SPACE_IF_NULL(res = sym_new_type(ctx, &PyFloat_Type)); From 6de675cb5e3b0a5cf306456a5b8c21bb5cea5cfe Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Tue, 2 Apr 2024 01:05:31 +0800 Subject: [PATCH 09/11] alternative forms of black magic --- Objects/floatobject.c | 9 ++++++--- Python/optimizer_analysis.c | 10 ++++++---- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/Objects/floatobject.c b/Objects/floatobject.c index 1f1fd119e92de8..82b66e3764d695 100644 --- a/Objects/floatobject.c +++ b/Objects/floatobject.c @@ -2618,8 +2618,11 @@ PyFloat_Unpack8(const char *data, int le) PyObject* _PyFloat_From64Bits(int64_t val) { + union caster { + int64_t from; + double to; + }; assert(sizeof(double) == sizeof(int64_t)); - double dst; - memcpy(&dst, &val, sizeof(int64_t)); - return PyFloat_FromDouble(dst); + union caster temp = {.from = val}; + return PyFloat_FromDouble(temp.to); } diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 6a76136e718cab..0465ca8d17c66a 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -286,11 +286,13 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, static int64_t double_as_int64_t(double in) { + union caster { + double from; + int64_t to; + }; assert(sizeof(double) == sizeof(int64_t)); - double temp = in; - int64_t result; - memcpy(&result, &temp, sizeof(double)); - return result; + union caster temp = {.from = in}; + return temp.to; } #define STACK_LEVEL() ((int)(stack_pointer - ctx->frame->stack)) From e66c0dd7d0c6a42fe7bb574d6a898003005b2a83 Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Tue, 2 Apr 2024 01:06:31 +0800 Subject: [PATCH 10/11] Revert "alternative forms of black magic" This reverts commit 6de675cb5e3b0a5cf306456a5b8c21bb5cea5cfe. --- Objects/floatobject.c | 9 +++------ Python/optimizer_analysis.c | 10 ++++------ 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/Objects/floatobject.c b/Objects/floatobject.c index 82b66e3764d695..1f1fd119e92de8 100644 --- a/Objects/floatobject.c +++ b/Objects/floatobject.c @@ -2618,11 +2618,8 @@ PyFloat_Unpack8(const char *data, int le) PyObject* _PyFloat_From64Bits(int64_t val) { - union caster { - int64_t from; - double to; - }; assert(sizeof(double) == sizeof(int64_t)); - union caster temp = {.from = val}; - return PyFloat_FromDouble(temp.to); + double dst; + memcpy(&dst, &val, sizeof(int64_t)); + return PyFloat_FromDouble(dst); } diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 0465ca8d17c66a..6a76136e718cab 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -286,13 +286,11 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, static int64_t double_as_int64_t(double in) { - union caster { - double from; - int64_t to; - }; assert(sizeof(double) == sizeof(int64_t)); - union caster temp = {.from = in}; - return temp.to; + double temp = in; + int64_t result; + memcpy(&result, &temp, sizeof(double)); + return result; } #define STACK_LEVEL() ((int)(stack_pointer - ctx->frame->stack)) From ab0836a622203547ac19ef1f582e26881045001b Mon Sep 17 00:00:00 2001 From: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> Date: Tue, 2 Apr 2024 01:13:19 +0800 Subject: [PATCH 11/11] fix 32-bit pointer issues --- Python/bytecodes.c | 4 ++-- Python/executor_cases.c.h | 4 ++-- Tools/cases_generator/analyzer.py | 5 ++++- Tools/cases_generator/parsing.py | 23 ++++++++++++++++++----- Tools/cases_generator/tier2_generator.py | 2 +- 5 files changed, 27 insertions(+), 11 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 471ea56399691b..affb6ae22d9ae1 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -4150,12 +4150,12 @@ dummy_func( ERROR_IF(value == NULL, error); } - tier2 pure op(_LOAD_FLOAT, (cached/4 -- value)) { + tier2 pure op(_LOAD_FLOAT, (cached/4: int64_t -- value)) { value = _PyFloat_From64Bits((int64_t)cached); ERROR_IF(value == NULL, error); } - tier2 pure op(_POP_TWO_LOAD_FLOAT, (cached/4, pop1, pop2 -- value)) { + tier2 pure op(_POP_TWO_LOAD_FLOAT, (cached/4: int64_t, pop1, pop2 -- value)) { Py_DECREF(pop1); Py_DECREF(pop2); value = _PyFloat_From64Bits((int64_t)cached); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 8db65c15638f6c..64ce8ec97836a2 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -3698,7 +3698,7 @@ case _LOAD_FLOAT: { PyObject *value; - PyObject *cached = (PyObject *)CURRENT_OPERAND(); + int64_t cached = (int64_t )CURRENT_OPERAND(); value = _PyFloat_From64Bits((int64_t)cached); if (value == NULL) JUMP_TO_ERROR(); stack_pointer[0] = value; @@ -3712,7 +3712,7 @@ PyObject *value; pop2 = stack_pointer[-1]; pop1 = stack_pointer[-2]; - PyObject *cached = (PyObject *)CURRENT_OPERAND(); + int64_t cached = (int64_t )CURRENT_OPERAND(); Py_DECREF(pop1); Py_DECREF(pop2); value = _PyFloat_From64Bits((int64_t)cached); diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index ddafcf99ca1e37..6036f88806edd3 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -133,8 +133,11 @@ def __str__(self) -> str: class CacheEntry: name: str size: int + typ: str def __str__(self) -> str: + if self.typ: + return f"{self.name}/{self.size}: {self.typ}" return f"{self.name}/{self.size}" @@ -321,7 +324,7 @@ def analyze_caches(inputs: list[parser.InputEffect]) -> list[CacheEntry]: raise analysis_error( "Unused cache entry in op. Move to enclosing macro.", cache.tokens[0] ) - return [CacheEntry(i.name, int(i.size)) for i in caches] + return [CacheEntry(i.name, int(i.size), i.typ) for i in caches] def variable_used(node: parser.InstDef, name: str) -> bool: diff --git a/Tools/cases_generator/parsing.py b/Tools/cases_generator/parsing.py index 0d54820e4e71fb..2161cf53e3150a 100644 --- a/Tools/cases_generator/parsing.py +++ b/Tools/cases_generator/parsing.py @@ -91,6 +91,7 @@ class Expression(Node): class CacheEffect(Node): name: str size: int + typ: str @dataclass @@ -247,7 +248,7 @@ def output(self) -> OutputEffect | None: @contextual def cache_effect(self) -> CacheEffect | None: - # IDENTIFIER '/' NUMBER + # IDENTIFIER '/' NUMBER [: IDENTIFIER] if tkn := self.expect(lx.IDENTIFIER): if self.expect(lx.DIVIDE): num = self.require(lx.NUMBER).text @@ -255,8 +256,14 @@ def cache_effect(self) -> CacheEffect | None: size = int(num) except ValueError: raise self.make_syntax_error(f"Expected integer, got {num!r}") - else: - return CacheEffect(tkn.text, size) + type_text = "" + if self.expect(lx.COLON): + type_text = self.require(lx.IDENTIFIER).text.strip() + if self.expect(lx.TIMES): + type_text += " *" + else: + type_text += " " + return CacheEffect(tkn.text, size, type_text) return None @contextual @@ -356,8 +363,14 @@ def uop(self) -> UOp | None: raise self.make_syntax_error( f"Expected integer, got {num.text!r}" ) - else: - return CacheEffect(tkn.text, size) + type_text = "" + if self.expect(lx.COLON): + type_text = self.require(lx.IDENTIFIER).text.strip() + if self.expect(lx.TIMES): + type_text += " *" + else: + type_text += " " + return CacheEffect(tkn.text, size, type_text) raise self.make_syntax_error("Expected integer") else: return OpName(tkn.text) diff --git a/Tools/cases_generator/tier2_generator.py b/Tools/cases_generator/tier2_generator.py index 114d28ee745632..e1f2e431c674d6 100644 --- a/Tools/cases_generator/tier2_generator.py +++ b/Tools/cases_generator/tier2_generator.py @@ -164,7 +164,7 @@ def write_uop(uop: Uop, out: CWriter, stack: Stack) -> None: for cache in uop.caches: if cache.name != "unused": if cache.size == 4: - type = cast = "PyObject *" + type = cast = cache.typ or "PyObject *" else: type = f"uint{cache.size*16}_t " cast = f"uint{cache.size*16}_t"