diff --git a/Include/internal/pycore_floatobject.h b/Include/internal/pycore_floatobject.h index f984df695696c3..07da8f238df65d 100644 --- a/Include/internal/pycore_floatobject.h +++ b/Include/internal/pycore_floatobject.h @@ -55,6 +55,7 @@ extern PyObject* _Py_string_to_number_with_underscores( extern double _Py_parse_inf_or_nan(const char *p, char **endptr); +PyAPI_FUNC(PyObject *) _PyFloat_From64Bits(int64_t); #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index 3e4dd8b4009cd4..e76ba2dafe6b22 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -199,11 +199,13 @@ extern "C" { #define _LOAD_FAST_AND_CLEAR LOAD_FAST_AND_CLEAR #define _LOAD_FAST_CHECK LOAD_FAST_CHECK #define _LOAD_FAST_LOAD_FAST LOAD_FAST_LOAD_FAST +#define _LOAD_FLOAT 412 #define _LOAD_FROM_DICT_OR_DEREF LOAD_FROM_DICT_OR_DEREF #define _LOAD_FROM_DICT_OR_GLOBALS LOAD_FROM_DICT_OR_GLOBALS -#define _LOAD_GLOBAL 412 -#define _LOAD_GLOBAL_BUILTINS 413 -#define _LOAD_GLOBAL_MODULE 414 +#define _LOAD_GLOBAL 413 +#define _LOAD_GLOBAL_BUILTINS 414 +#define _LOAD_GLOBAL_MODULE 415 +#define _LOAD_INT 416 #define _LOAD_LOCALS LOAD_LOCALS #define _LOAD_NAME LOAD_NAME #define _LOAD_SUPER_ATTR_ATTR LOAD_SUPER_ATTR_ATTR @@ -217,49 +219,51 @@ extern "C" { #define _MATCH_SEQUENCE MATCH_SEQUENCE #define _NOP NOP #define _POP_EXCEPT POP_EXCEPT -#define _POP_FRAME 415 -#define _POP_JUMP_IF_FALSE 416 -#define _POP_JUMP_IF_TRUE 417 +#define _POP_FRAME 417 +#define _POP_JUMP_IF_FALSE 418 +#define _POP_JUMP_IF_TRUE 419 #define _POP_TOP POP_TOP -#define _POP_TOP_LOAD_CONST_INLINE_BORROW 418 +#define _POP_TOP_LOAD_CONST_INLINE_BORROW 420 +#define _POP_TWO_LOAD_FLOAT 421 +#define _POP_TWO_LOAD_INT 422 #define _PUSH_EXC_INFO PUSH_EXC_INFO -#define _PUSH_FRAME 419 +#define _PUSH_FRAME 423 #define _PUSH_NULL PUSH_NULL -#define _REPLACE_WITH_TRUE 420 +#define _REPLACE_WITH_TRUE 424 #define _RESUME_CHECK RESUME_CHECK -#define _SAVE_RETURN_OFFSET 421 -#define _SEND 422 +#define _SAVE_RETURN_OFFSET 425 +#define _SEND 426 #define _SEND_GEN SEND_GEN #define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS #define _SET_ADD SET_ADD #define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE #define _SET_UPDATE SET_UPDATE -#define _SIDE_EXIT 423 -#define _START_EXECUTOR 424 -#define _STORE_ATTR 425 -#define _STORE_ATTR_INSTANCE_VALUE 426 -#define _STORE_ATTR_SLOT 427 +#define _SIDE_EXIT 427 +#define _START_EXECUTOR 428 +#define _STORE_ATTR 429 +#define _STORE_ATTR_INSTANCE_VALUE 430 +#define _STORE_ATTR_SLOT 431 #define _STORE_ATTR_WITH_HINT STORE_ATTR_WITH_HINT #define _STORE_DEREF STORE_DEREF -#define _STORE_FAST 428 -#define _STORE_FAST_0 429 -#define _STORE_FAST_1 430 -#define _STORE_FAST_2 431 -#define _STORE_FAST_3 432 -#define _STORE_FAST_4 433 -#define _STORE_FAST_5 434 -#define _STORE_FAST_6 435 -#define _STORE_FAST_7 436 +#define _STORE_FAST 432 +#define _STORE_FAST_0 433 +#define _STORE_FAST_1 434 +#define _STORE_FAST_2 435 +#define _STORE_FAST_3 436 +#define _STORE_FAST_4 437 +#define _STORE_FAST_5 438 +#define _STORE_FAST_6 439 +#define _STORE_FAST_7 440 #define _STORE_FAST_LOAD_FAST STORE_FAST_LOAD_FAST #define _STORE_FAST_STORE_FAST STORE_FAST_STORE_FAST #define _STORE_GLOBAL STORE_GLOBAL #define _STORE_NAME STORE_NAME #define _STORE_SLICE STORE_SLICE -#define _STORE_SUBSCR 437 +#define _STORE_SUBSCR 441 #define _STORE_SUBSCR_DICT STORE_SUBSCR_DICT #define _STORE_SUBSCR_LIST_INT STORE_SUBSCR_LIST_INT #define _SWAP SWAP -#define _TO_BOOL 438 +#define _TO_BOOL 442 #define _TO_BOOL_BOOL TO_BOOL_BOOL #define _TO_BOOL_INT TO_BOOL_INT #define _TO_BOOL_LIST TO_BOOL_LIST @@ -269,12 +273,12 @@ extern "C" { #define _UNARY_NEGATIVE UNARY_NEGATIVE #define _UNARY_NOT UNARY_NOT #define _UNPACK_EX UNPACK_EX -#define _UNPACK_SEQUENCE 439 +#define _UNPACK_SEQUENCE 443 #define _UNPACK_SEQUENCE_LIST UNPACK_SEQUENCE_LIST #define _UNPACK_SEQUENCE_TUPLE UNPACK_SEQUENCE_TUPLE #define _UNPACK_SEQUENCE_TWO_TUPLE UNPACK_SEQUENCE_TWO_TUPLE #define _WITH_EXCEPT_START WITH_EXCEPT_START -#define MAX_UOP_ID 439 +#define MAX_UOP_ID 443 #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 111824a938f6cc..e4594da8f2ce31 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -237,6 +237,10 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_POP_TOP_LOAD_CONST_INLINE_BORROW] = HAS_PURE_FLAG, [_LOAD_CONST_INLINE_WITH_NULL] = HAS_PURE_FLAG, [_LOAD_CONST_INLINE_BORROW_WITH_NULL] = HAS_PURE_FLAG, + [_LOAD_INT] = HAS_ERROR_FLAG | HAS_PURE_FLAG, + [_POP_TWO_LOAD_INT] = HAS_ERROR_FLAG | HAS_PURE_FLAG, + [_LOAD_FLOAT] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG | HAS_PURE_FLAG, + [_POP_TWO_LOAD_FLOAT] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG | HAS_PURE_FLAG, [_CHECK_FUNCTION] = HAS_DEOPT_FLAG, [_INTERNAL_INCREMENT_OPT_COUNTER] = 0, [_COLD_EXIT] = HAS_ARG_FLAG | HAS_ESCAPES_FLAG, @@ -412,11 +416,13 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_LOAD_FAST_AND_CLEAR] = "_LOAD_FAST_AND_CLEAR", [_LOAD_FAST_CHECK] = "_LOAD_FAST_CHECK", [_LOAD_FAST_LOAD_FAST] = "_LOAD_FAST_LOAD_FAST", + [_LOAD_FLOAT] = "_LOAD_FLOAT", [_LOAD_FROM_DICT_OR_DEREF] = "_LOAD_FROM_DICT_OR_DEREF", [_LOAD_FROM_DICT_OR_GLOBALS] = "_LOAD_FROM_DICT_OR_GLOBALS", [_LOAD_GLOBAL] = "_LOAD_GLOBAL", [_LOAD_GLOBAL_BUILTINS] = "_LOAD_GLOBAL_BUILTINS", [_LOAD_GLOBAL_MODULE] = "_LOAD_GLOBAL_MODULE", + [_LOAD_INT] = "_LOAD_INT", [_LOAD_LOCALS] = "_LOAD_LOCALS", [_LOAD_SUPER_ATTR_ATTR] = "_LOAD_SUPER_ATTR_ATTR", [_LOAD_SUPER_ATTR_METHOD] = "_LOAD_SUPER_ATTR_METHOD", @@ -432,6 +438,8 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_POP_FRAME] = "_POP_FRAME", [_POP_TOP] = "_POP_TOP", [_POP_TOP_LOAD_CONST_INLINE_BORROW] = "_POP_TOP_LOAD_CONST_INLINE_BORROW", + [_POP_TWO_LOAD_FLOAT] = "_POP_TWO_LOAD_FLOAT", + [_POP_TWO_LOAD_INT] = "_POP_TWO_LOAD_INT", [_PUSH_EXC_INFO] = "_PUSH_EXC_INFO", [_PUSH_FRAME] = "_PUSH_FRAME", [_PUSH_NULL] = "_PUSH_NULL", @@ -922,6 +930,14 @@ int _PyUop_num_popped(int opcode, int oparg) return 0; case _LOAD_CONST_INLINE_BORROW_WITH_NULL: return 0; + case _LOAD_INT: + return 0; + case _POP_TWO_LOAD_INT: + return 2; + case _LOAD_FLOAT: + return 0; + case _POP_TWO_LOAD_FLOAT: + return 2; case _CHECK_FUNCTION: return 0; case _INTERNAL_INCREMENT_OPT_COUNTER: diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 7ca0f6927fe4a1..b44ed8ad48239f 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -794,8 +794,8 @@ def testfunc(n): def test_float_add_constant_propagation(self): def testfunc(n): - a = 1.0 for _ in range(n): + a = 1.0 a = a + 0.25 a = a + 0.25 a = a + 0.25 @@ -803,19 +803,16 @@ def testfunc(n): return a res, ex = self._run_with_optimizer(testfunc, 32) - self.assertAlmostEqual(res, 33.0) + self.assertAlmostEqual(res, 2.0) self.assertIsNotNone(ex) uops = get_opnames(ex) - guard_both_float_count = [opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_FLOAT"] - self.assertLessEqual(len(guard_both_float_count), 1) - # TODO gh-115506: this assertion may change after propagating constants. - # We'll also need to verify that propagation actually occurs. - self.assertIn("_BINARY_OP_ADD_FLOAT", uops) + self.assertNotIn("_BINARY_OP_ADD_FLOAT", uops) + self.assertIn("_LOAD_FLOAT", uops) def test_float_subtract_constant_propagation(self): def testfunc(n): - a = 1.0 for _ in range(n): + a = 1.0 a = a - 0.25 a = a - 0.25 a = a - 0.25 @@ -823,19 +820,18 @@ def testfunc(n): return a res, ex = self._run_with_optimizer(testfunc, 32) - self.assertAlmostEqual(res, -31.0) + self.assertAlmostEqual(res, 0.0) self.assertIsNotNone(ex) uops = get_opnames(ex) guard_both_float_count = [opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_FLOAT"] self.assertLessEqual(len(guard_both_float_count), 1) - # TODO gh-115506: this assertion may change after propagating constants. - # We'll also need to verify that propagation actually occurs. - self.assertIn("_BINARY_OP_SUBTRACT_FLOAT", uops) + self.assertNotIn("_BINARY_OP_SUBTRACT_FLOAT", uops) + self.assertIn("_LOAD_FLOAT", uops) def test_float_multiply_constant_propagation(self): def testfunc(n): - a = 1.0 for _ in range(n): + a = 1.0 a = a * 1.0 a = a * 1.0 a = a * 1.0 @@ -848,9 +844,22 @@ def testfunc(n): uops = get_opnames(ex) guard_both_float_count = [opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_FLOAT"] self.assertLessEqual(len(guard_both_float_count), 1) - # TODO gh-115506: this assertion may change after propagating constants. - # We'll also need to verify that propagation actually occurs. - self.assertIn("_BINARY_OP_MULTIPLY_FLOAT", uops) + self.assertNotIn("_BINARY_OP_MULTIPLY_FLOAT", uops) + self.assertIn("_LOAD_FLOAT", uops) + + def test_int_add_constant_propagation_peepholer_advanced(self): + def testfunc(n): + for _ in range(n): + a = 1 + a = (a + a) + (a + a + (a + a)) + return a + + res, ex = self._run_with_optimizer(testfunc, 32) + self.assertAlmostEqual(res, 6) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + self.assertNotIn("_BINARY_OP_ADD_INT", uops) + self.assertIn("_LOAD_INT", uops) def test_add_unicode_propagation(self): def testfunc(n): @@ -941,7 +950,8 @@ def testfunc(n): self.assertIsNotNone(ex) uops = get_opnames(ex) self.assertNotIn("_GUARD_BOTH_INT", uops) - self.assertIn("_BINARY_OP_ADD_INT", uops) + # Constant folded + self.assertIn("_LOAD_INT", uops) # Try again, but between the runs, set the global to a float. # This should result in no executor the second time. ns = {} @@ -1245,5 +1255,36 @@ def testfunc(n): self.assertEqual(res, 32 * 32) self.assertIsNone(ex) + def test_int_constant_propagation(self): + def testfunc(n): + for _ in range(n): + a = 1 + x = a + a - a * a + return x + + res, ex = self._run_with_optimizer(testfunc, 32) + self.assertTrue(res) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + self.assertNotIn("_BINARY_OP_ADD_INT", uops) + self.assertNotIn("_BINARY_OP_MULTIPLY_INT", uops) + self.assertNotIn("_BINARY_OP_SUBTRACT_INT", uops) + + def test_no_bigint_constant_propagation(self): + # We don't want to hold strong references in the trace. + def testfunc(n): + for _ in range(n): + a = 100000000000000000000000000000000000000 + x = a + a - a * a + return x + + res, ex = self._run_with_optimizer(testfunc, 32) + self.assertTrue(res) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + self.assertIn("_BINARY_OP_ADD_INT", uops) + self.assertIn("_BINARY_OP_MULTIPLY_INT", uops) + self.assertIn("_BINARY_OP_SUBTRACT_INT", uops) + if __name__ == "__main__": unittest.main() diff --git a/Objects/floatobject.c b/Objects/floatobject.c index 96227f2cf7d76f..1f1fd119e92de8 100644 --- a/Objects/floatobject.c +++ b/Objects/floatobject.c @@ -2614,3 +2614,12 @@ PyFloat_Unpack8(const char *data, int le) return x; } } + +PyObject* +_PyFloat_From64Bits(int64_t val) +{ + assert(sizeof(double) == sizeof(int64_t)); + double dst; + memcpy(&dst, &val, sizeof(int64_t)); + return PyFloat_FromDouble(dst); +} diff --git a/Python/bytecodes.c b/Python/bytecodes.c index d6fb66a7be34ac..4cb82e12ad963c 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -4127,6 +4127,30 @@ dummy_func( null = NULL; } + tier2 pure op(_LOAD_INT, (cached/4 -- value)) { + value = PyLong_FromLong((int64_t)cached); + ERROR_IF(value == NULL, error); + } + + tier2 pure op(_POP_TWO_LOAD_INT, (cached/4, pop1, pop2 -- value)) { + Py_DECREF(pop1); + Py_DECREF(pop2); + value = PyLong_FromLong((int64_t)cached); + ERROR_IF(value == NULL, error); + } + + tier2 pure op(_LOAD_FLOAT, (cached/4: int64_t -- value)) { + value = _PyFloat_From64Bits((int64_t)cached); + ERROR_IF(value == NULL, error); + } + + tier2 pure op(_POP_TWO_LOAD_FLOAT, (cached/4: int64_t, pop1, pop2 -- value)) { + Py_DECREF(pop1); + Py_DECREF(pop2); + value = _PyFloat_From64Bits((int64_t)cached); + ERROR_IF(value == NULL, error); + } + tier2 op(_CHECK_FUNCTION, (func_version/2 -- )) { assert(PyFunction_Check(frame->f_funcobj)); DEOPT_IF(((PyFunctionObject *)frame->f_funcobj)->func_version != func_version); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index a3447da00477ca..1358a50fbce69b 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -4075,6 +4075,58 @@ break; } + case _LOAD_INT: { + PyObject *value; + PyObject *cached = (PyObject *)CURRENT_OPERAND(); + value = PyLong_FromLong((int64_t)cached); + if (value == NULL) JUMP_TO_ERROR(); + stack_pointer[0] = value; + stack_pointer += 1; + break; + } + + case _POP_TWO_LOAD_INT: { + PyObject *pop2; + PyObject *pop1; + PyObject *value; + pop2 = stack_pointer[-1]; + pop1 = stack_pointer[-2]; + PyObject *cached = (PyObject *)CURRENT_OPERAND(); + Py_DECREF(pop1); + Py_DECREF(pop2); + value = PyLong_FromLong((int64_t)cached); + if (value == NULL) JUMP_TO_ERROR(); + stack_pointer[-2] = value; + stack_pointer += -1; + break; + } + + case _LOAD_FLOAT: { + PyObject *value; + int64_t cached = (int64_t )CURRENT_OPERAND(); + value = _PyFloat_From64Bits((int64_t)cached); + if (value == NULL) JUMP_TO_ERROR(); + stack_pointer[0] = value; + stack_pointer += 1; + break; + } + + case _POP_TWO_LOAD_FLOAT: { + PyObject *pop2; + PyObject *pop1; + PyObject *value; + pop2 = stack_pointer[-1]; + pop1 = stack_pointer[-2]; + int64_t cached = (int64_t )CURRENT_OPERAND(); + Py_DECREF(pop1); + Py_DECREF(pop2); + value = _PyFloat_From64Bits((int64_t)cached); + if (value == NULL) JUMP_TO_ERROR(); + stack_pointer[-2] = value; + stack_pointer += -1; + break; + } + case _CHECK_FUNCTION: { uint32_t func_version = (uint32_t)CURRENT_OPERAND(); assert(PyFunction_Check(frame->f_funcobj)); diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index a21679f366a74e..92372f97546955 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -284,7 +284,14 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, return 0; } - +static int64_t +double_as_int64_t(double in) { + assert(sizeof(double) == sizeof(int64_t)); + double temp = in; + int64_t result; + memcpy(&result, &temp, sizeof(double)); + return result; +} #define STACK_LEVEL() ((int)(stack_pointer - ctx->frame->stack)) @@ -449,7 +456,67 @@ optimize_uops( return trace_len; } +static inline bool +op_is_simple_load(int opcode) { + switch (opcode) { + case _LOAD_CONST_INLINE_BORROW: + case _LOAD_CONST_INLINE: + case _LOAD_FAST: + case _LOAD_INT: + case _LOAD_FLOAT: + return true; + default: + return false; + } +} +static bool +remove_simple_pops(int num_popped, _PyUOpInstruction *curr, _PyUOpInstruction *limit){ + int remaining = num_popped; + _PyUOpInstruction *original_curr = curr; + while (curr > limit && remaining != 0) { + int opcode = curr->opcode; + switch (opcode) { + case _NOP: + case _CHECK_VALIDITY_AND_SET_IP: + case _CHECK_VALIDITY: + case _SET_IP: + break; + default: + // Hit a non-simple instruction. Just bail early, + // so we don't end up with quadratic time. + if (!op_is_simple_load(opcode)) { + return false; + } + remaining--; + break; + } + curr--; + } + if (remaining != 0) { + return false; + } + // Can eliminate. + remaining = num_popped; + curr = original_curr; + while (remaining != 0) { + int opcode = curr->opcode; + switch (opcode) { + case _NOP: + case _CHECK_VALIDITY_AND_SET_IP: + case _CHECK_VALIDITY: + case _SET_IP: + break; + default: + assert(op_is_simple_load(opcode)); + curr->opcode = _NOP; + remaining--; + break; + } + curr--; + } + return true; +} static int remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) { @@ -462,6 +529,21 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) for (int pc = 0; pc < buffer_size; pc++) { int opcode = buffer[pc].opcode; switch (opcode) { + case _POP_TOP_LOAD_CONST_INLINE_BORROW: + if (remove_simple_pops(1, &buffer[pc-1], buffer)) { + buffer[pc].opcode = _LOAD_CONST_INLINE_BORROW; + } + break; + case _POP_TWO_LOAD_INT: + if (remove_simple_pops(2, &buffer[pc-1], buffer)) { + buffer[pc].opcode = _LOAD_INT; + } + break; + case _POP_TWO_LOAD_FLOAT: + if (remove_simple_pops(2, &buffer[pc-1], buffer)) { + buffer[pc].opcode = _LOAD_FLOAT; + } + break; case _SET_IP: buffer[pc].opcode = _NOP; last_set_ip = pc; diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index e38428af108893..d4249d79799481 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -144,8 +144,12 @@ dummy_func(void) { res = sym_new_const(ctx, temp); Py_DECREF(temp); OUT_OF_SPACE_IF_NULL(res); - // TODO gh-115506: - // replace opcode with constant propagated one and add tests! + if (_PyLong_IsCompact((PyLongObject *)temp)) { + Py_ssize_t val = _PyLong_CompactValue((PyLongObject *)temp); + if (val == (int64_t)val) { + REPLACE_OP(this_instr, _POP_TWO_LOAD_INT, 0, (int64_t)val); + } + } } else { OUT_OF_SPACE_IF_NULL(res = sym_new_type(ctx, &PyLong_Type)); @@ -166,8 +170,12 @@ dummy_func(void) { res = sym_new_const(ctx, temp); Py_DECREF(temp); OUT_OF_SPACE_IF_NULL(res); - // TODO gh-115506: - // replace opcode with constant propagated one and add tests! + if (_PyLong_IsCompact((PyLongObject *)temp)) { + Py_ssize_t val = _PyLong_CompactValue((PyLongObject *)temp); + if (val == (int64_t)val) { + REPLACE_OP(this_instr, _POP_TWO_LOAD_INT, 0, (int64_t)val); + } + } } else { OUT_OF_SPACE_IF_NULL(res = sym_new_type(ctx, &PyLong_Type)); @@ -188,8 +196,12 @@ dummy_func(void) { res = sym_new_const(ctx, temp); Py_DECREF(temp); OUT_OF_SPACE_IF_NULL(res); - // TODO gh-115506: - // replace opcode with constant propagated one and add tests! + if (_PyLong_IsCompact((PyLongObject *)temp)) { + Py_ssize_t val = _PyLong_CompactValue((PyLongObject *)temp); + if (val == (int64_t)val) { + REPLACE_OP(this_instr, _POP_TWO_LOAD_INT, 0, (int64_t)val); + } + } } else { OUT_OF_SPACE_IF_NULL(res = sym_new_type(ctx, &PyLong_Type)); @@ -211,8 +223,10 @@ dummy_func(void) { res = sym_new_const(ctx, temp); Py_DECREF(temp); OUT_OF_SPACE_IF_NULL(res); - // TODO gh-115506: - // replace opcode with constant propagated one and update tests! + if (sizeof(double) == sizeof(int64_t)) { + double f = PyFloat_AS_DOUBLE(temp); + REPLACE_OP(this_instr, _POP_TWO_LOAD_FLOAT, 0, double_as_int64_t(f)); + } } else { OUT_OF_SPACE_IF_NULL(res = sym_new_type(ctx, &PyFloat_Type)); @@ -234,8 +248,10 @@ dummy_func(void) { res = sym_new_const(ctx, temp); Py_DECREF(temp); OUT_OF_SPACE_IF_NULL(res); - // TODO gh-115506: - // replace opcode with constant propagated one and update tests! + if (sizeof(double) == sizeof(int64_t)) { + double f = PyFloat_AS_DOUBLE(temp); + REPLACE_OP(this_instr, _POP_TWO_LOAD_FLOAT, 0, double_as_int64_t(f)); + } } else { OUT_OF_SPACE_IF_NULL(res = sym_new_type(ctx, &PyFloat_Type)); @@ -257,8 +273,10 @@ dummy_func(void) { res = sym_new_const(ctx, temp); Py_DECREF(temp); OUT_OF_SPACE_IF_NULL(res); - // TODO gh-115506: - // replace opcode with constant propagated one and update tests! + if (sizeof(double) == sizeof(int64_t)) { + double f = PyFloat_AS_DOUBLE(temp); + REPLACE_OP(this_instr, _POP_TWO_LOAD_FLOAT, 0, double_as_int64_t(f)); + } } else { OUT_OF_SPACE_IF_NULL(res = sym_new_type(ctx, &PyFloat_Type)); diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 209be370c4aa38..54fbdf8cd800e1 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -256,8 +256,12 @@ res = sym_new_const(ctx, temp); Py_DECREF(temp); OUT_OF_SPACE_IF_NULL(res); - // TODO gh-115506: - // replace opcode with constant propagated one and add tests! + if (_PyLong_IsCompact((PyLongObject *)temp)) { + Py_ssize_t val = _PyLong_CompactValue((PyLongObject *)temp); + if (val == (int64_t)val) { + REPLACE_OP(this_instr, _POP_TWO_LOAD_INT, 0, (int64_t)val); + } + } } else { OUT_OF_SPACE_IF_NULL(res = sym_new_type(ctx, &PyLong_Type)); @@ -286,8 +290,12 @@ res = sym_new_const(ctx, temp); Py_DECREF(temp); OUT_OF_SPACE_IF_NULL(res); - // TODO gh-115506: - // replace opcode with constant propagated one and add tests! + if (_PyLong_IsCompact((PyLongObject *)temp)) { + Py_ssize_t val = _PyLong_CompactValue((PyLongObject *)temp); + if (val == (int64_t)val) { + REPLACE_OP(this_instr, _POP_TWO_LOAD_INT, 0, (int64_t)val); + } + } } else { OUT_OF_SPACE_IF_NULL(res = sym_new_type(ctx, &PyLong_Type)); @@ -316,8 +324,12 @@ res = sym_new_const(ctx, temp); Py_DECREF(temp); OUT_OF_SPACE_IF_NULL(res); - // TODO gh-115506: - // replace opcode with constant propagated one and add tests! + if (_PyLong_IsCompact((PyLongObject *)temp)) { + Py_ssize_t val = _PyLong_CompactValue((PyLongObject *)temp); + if (val == (int64_t)val) { + REPLACE_OP(this_instr, _POP_TWO_LOAD_INT, 0, (int64_t)val); + } + } } else { OUT_OF_SPACE_IF_NULL(res = sym_new_type(ctx, &PyLong_Type)); @@ -365,8 +377,10 @@ res = sym_new_const(ctx, temp); Py_DECREF(temp); OUT_OF_SPACE_IF_NULL(res); - // TODO gh-115506: - // replace opcode with constant propagated one and update tests! + if (sizeof(double) == sizeof(int64_t)) { + double f = PyFloat_AS_DOUBLE(temp); + REPLACE_OP(this_instr, _POP_TWO_LOAD_FLOAT, 0, double_as_int64_t(f)); + } } else { OUT_OF_SPACE_IF_NULL(res = sym_new_type(ctx, &PyFloat_Type)); @@ -396,8 +410,10 @@ res = sym_new_const(ctx, temp); Py_DECREF(temp); OUT_OF_SPACE_IF_NULL(res); - // TODO gh-115506: - // replace opcode with constant propagated one and update tests! + if (sizeof(double) == sizeof(int64_t)) { + double f = PyFloat_AS_DOUBLE(temp); + REPLACE_OP(this_instr, _POP_TWO_LOAD_FLOAT, 0, double_as_int64_t(f)); + } } else { OUT_OF_SPACE_IF_NULL(res = sym_new_type(ctx, &PyFloat_Type)); @@ -427,8 +443,10 @@ res = sym_new_const(ctx, temp); Py_DECREF(temp); OUT_OF_SPACE_IF_NULL(res); - // TODO gh-115506: - // replace opcode with constant propagated one and update tests! + if (sizeof(double) == sizeof(int64_t)) { + double f = PyFloat_AS_DOUBLE(temp); + REPLACE_OP(this_instr, _POP_TWO_LOAD_FLOAT, 0, double_as_int64_t(f)); + } } else { OUT_OF_SPACE_IF_NULL(res = sym_new_type(ctx, &PyFloat_Type)); @@ -1972,6 +1990,42 @@ break; } + case _LOAD_INT: { + _Py_UopsSymbol *value; + value = sym_new_not_null(ctx); + if (value == NULL) goto out_of_space; + stack_pointer[0] = value; + stack_pointer += 1; + break; + } + + case _POP_TWO_LOAD_INT: { + _Py_UopsSymbol *value; + value = sym_new_not_null(ctx); + if (value == NULL) goto out_of_space; + stack_pointer[-2] = value; + stack_pointer += -1; + break; + } + + case _LOAD_FLOAT: { + _Py_UopsSymbol *value; + value = sym_new_not_null(ctx); + if (value == NULL) goto out_of_space; + stack_pointer[0] = value; + stack_pointer += 1; + break; + } + + case _POP_TWO_LOAD_FLOAT: { + _Py_UopsSymbol *value; + value = sym_new_not_null(ctx); + if (value == NULL) goto out_of_space; + stack_pointer[-2] = value; + stack_pointer += -1; + break; + } + case _CHECK_FUNCTION: { break; } diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index 4261378d459107..092a3258e0a20c 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -133,8 +133,11 @@ def __str__(self) -> str: class CacheEntry: name: str size: int + typ: str def __str__(self) -> str: + if self.typ: + return f"{self.name}/{self.size}: {self.typ}" return f"{self.name}/{self.size}" @@ -321,7 +324,7 @@ def analyze_caches(inputs: list[parser.InputEffect]) -> list[CacheEntry]: raise analysis_error( "Unused cache entry in op. Move to enclosing macro.", cache.tokens[0] ) - return [CacheEntry(i.name, int(i.size)) for i in caches] + return [CacheEntry(i.name, int(i.size), i.typ) for i in caches] def variable_used(node: parser.InstDef, name: str) -> bool: diff --git a/Tools/cases_generator/parsing.py b/Tools/cases_generator/parsing.py index 0d54820e4e71fb..2161cf53e3150a 100644 --- a/Tools/cases_generator/parsing.py +++ b/Tools/cases_generator/parsing.py @@ -91,6 +91,7 @@ class Expression(Node): class CacheEffect(Node): name: str size: int + typ: str @dataclass @@ -247,7 +248,7 @@ def output(self) -> OutputEffect | None: @contextual def cache_effect(self) -> CacheEffect | None: - # IDENTIFIER '/' NUMBER + # IDENTIFIER '/' NUMBER [: IDENTIFIER] if tkn := self.expect(lx.IDENTIFIER): if self.expect(lx.DIVIDE): num = self.require(lx.NUMBER).text @@ -255,8 +256,14 @@ def cache_effect(self) -> CacheEffect | None: size = int(num) except ValueError: raise self.make_syntax_error(f"Expected integer, got {num!r}") - else: - return CacheEffect(tkn.text, size) + type_text = "" + if self.expect(lx.COLON): + type_text = self.require(lx.IDENTIFIER).text.strip() + if self.expect(lx.TIMES): + type_text += " *" + else: + type_text += " " + return CacheEffect(tkn.text, size, type_text) return None @contextual @@ -356,8 +363,14 @@ def uop(self) -> UOp | None: raise self.make_syntax_error( f"Expected integer, got {num.text!r}" ) - else: - return CacheEffect(tkn.text, size) + type_text = "" + if self.expect(lx.COLON): + type_text = self.require(lx.IDENTIFIER).text.strip() + if self.expect(lx.TIMES): + type_text += " *" + else: + type_text += " " + return CacheEffect(tkn.text, size, type_text) raise self.make_syntax_error("Expected integer") else: return OpName(tkn.text) diff --git a/Tools/cases_generator/tier2_generator.py b/Tools/cases_generator/tier2_generator.py index 944d134f12a18e..785ce6c1956ab6 100644 --- a/Tools/cases_generator/tier2_generator.py +++ b/Tools/cases_generator/tier2_generator.py @@ -170,7 +170,7 @@ def write_uop(uop: Uop, out: CWriter, stack: Stack) -> None: for cache in uop.caches: if cache.name != "unused": if cache.size == 4: - type = cast = "PyObject *" + type = cast = cache.typ or "PyObject *" else: type = f"uint{cache.size*16}_t " cast = f"uint{cache.size*16}_t"