From 996be64b4d5d85ad0d92f03f8f7de87e737ebde6 Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Fri, 10 Jun 2022 09:33:48 +0100 Subject: [PATCH 01/14] move line/noline to arg and remove _noline version of functions --- Python/compile.c | 110 ++++++++++++++++++++--------------------------- 1 file changed, 47 insertions(+), 63 deletions(-) diff --git a/Python/compile.c b/Python/compile.c index bbd71936cf3468..37a890d905cb50 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -432,10 +432,9 @@ static int basicblock_next_instr(basicblock *); static int compiler_enter_scope(struct compiler *, identifier, int, void *, int); static void compiler_free(struct compiler *); static basicblock *compiler_new_block(struct compiler *); -static int compiler_addop(struct compiler *, int); -static int compiler_addop_i(struct compiler *, int, Py_ssize_t); -static int compiler_addop_j(struct compiler *, int, basicblock *); -static int compiler_addop_j_noline(struct compiler *, int, basicblock *); +static int compiler_addop(struct compiler *, int, bool); +static int compiler_addop_i(struct compiler *, int, Py_ssize_t, bool); +static int compiler_addop_j(struct compiler *, int, basicblock *, bool); static int compiler_error(struct compiler *, const char *, ...); static int compiler_warn(struct compiler *, const char *, ...); static int compiler_nameop(struct compiler *, identifier, expr_context_ty); @@ -1269,8 +1268,8 @@ compiler_use_new_implicit_block_if_needed(struct compiler *c) */ static int -basicblock_addop_line(basicblock *b, int opcode, int line, - int end_line, int col_offset, int end_col_offset) +basicblock_addop(basicblock *b, int opcode, int lineno, + int end_lineno, int col_offset, int end_col_offset) { assert(IS_WITHIN_OPCODE_RANGE(opcode)); assert(!IS_ASSEMBLER_OPCODE(opcode)); @@ -1283,8 +1282,8 @@ basicblock_addop_line(basicblock *b, int opcode, int line, struct instr *i = &b->b_instr[off]; i->i_opcode = opcode; i->i_oparg = 0; - i->i_lineno = line; - i->i_end_lineno = end_line; + i->i_lineno = lineno; + i->i_end_lineno = end_lineno; i->i_col_offset = col_offset; i->i_end_col_offset = end_col_offset; @@ -1292,25 +1291,20 @@ basicblock_addop_line(basicblock *b, int opcode, int line, } static int -compiler_addop(struct compiler *c, int opcode) +compiler_addop(struct compiler *c, int opcode, bool line) { if (compiler_use_new_implicit_block_if_needed(c) < 0) { return -1; } - return basicblock_addop_line(c->u->u_curblock, opcode, c->u->u_lineno, c->u->u_end_lineno, - c->u->u_col_offset, c->u->u_end_col_offset); -} + int lineno = line ? c->u->u_lineno : -1; + int end_lineno = line ? c->u->u_end_lineno : 0; + int col_offset = line ? c->u->u_col_offset : 0; + int end_col_offset = line ? c->u->u_end_col_offset : 0; -static int -compiler_addop_noline(struct compiler *c, int opcode) -{ - if (compiler_use_new_implicit_block_if_needed(c) < 0) { - return -1; - } - return basicblock_addop_line(c->u->u_curblock, opcode, -1, 0, 0, 0); + return basicblock_addop(c->u->u_curblock, opcode, + lineno, end_lineno, col_offset, end_col_offset); } - static Py_ssize_t compiler_add_o(PyObject *dict, PyObject *o) { @@ -1467,7 +1461,7 @@ compiler_addop_load_const(struct compiler *c, PyObject *o) Py_ssize_t arg = compiler_add_const(c, o); if (arg < 0) return 0; - return compiler_addop_i(c, LOAD_CONST, arg); + return compiler_addop_i(c, LOAD_CONST, arg, true); } static int @@ -1477,7 +1471,7 @@ compiler_addop_o(struct compiler *c, int opcode, PyObject *dict, Py_ssize_t arg = compiler_add_o(dict, o); if (arg < 0) return 0; - return compiler_addop_i(c, opcode, arg); + return compiler_addop_i(c, opcode, arg, true); } static int @@ -1493,7 +1487,7 @@ compiler_addop_name(struct compiler *c, int opcode, PyObject *dict, Py_DECREF(mangled); if (arg < 0) return 0; - return compiler_addop_i(c, opcode, arg); + return compiler_addop_i(c, opcode, arg, true); } /* Add an opcode with an integer argument. @@ -1501,9 +1495,9 @@ compiler_addop_name(struct compiler *c, int opcode, PyObject *dict, */ static int -basicblock_addop_i_line(basicblock *b, int opcode, Py_ssize_t oparg, - int lineno, int end_lineno, - int col_offset, int end_col_offset) +basicblock_addop_i(basicblock *b, int opcode, Py_ssize_t oparg, + int lineno, int end_lineno, + int col_offset, int end_col_offset) { /* oparg value is unsigned, but a signed C int is usually used to store it in the C code (like Python/ceval.c). @@ -1515,7 +1509,6 @@ basicblock_addop_i_line(basicblock *b, int opcode, Py_ssize_t oparg, assert(IS_WITHIN_OPCODE_RANGE(opcode)); assert(!IS_ASSEMBLER_OPCODE(opcode)); - assert(HAS_ARG(opcode)); assert(0 <= oparg && oparg <= 2147483647); int off = basicblock_next_instr(b); @@ -1534,23 +1527,18 @@ basicblock_addop_i_line(basicblock *b, int opcode, Py_ssize_t oparg, } static int -compiler_addop_i(struct compiler *c, int opcode, Py_ssize_t oparg) +compiler_addop_i(struct compiler *c, int opcode, Py_ssize_t oparg, bool line) { if (compiler_use_new_implicit_block_if_needed(c) < 0) { return -1; } - return basicblock_addop_i_line(c->u->u_curblock, opcode, oparg, - c->u->u_lineno, c->u->u_end_lineno, - c->u->u_col_offset, c->u->u_end_col_offset); -} + int lineno = line ? c->u->u_lineno : -1; + int end_lineno = line ? c->u->u_end_lineno : 0; + int col_offset = line ? c->u->u_col_offset : 0; + int end_col_offset = line ? c->u->u_end_col_offset : 0; -static int -compiler_addop_i_noline(struct compiler *c, int opcode, Py_ssize_t oparg) -{ - if (compiler_use_new_implicit_block_if_needed(c) < 0) { - return -1; - } - return basicblock_addop_i_line(c->u->u_curblock, opcode, oparg, -1, 0, 0, 0); + return basicblock_addop_i(c->u->u_curblock, opcode, oparg, + lineno, end_lineno, col_offset, end_col_offset); } static int @@ -1580,37 +1568,33 @@ basicblock_add_jump(basicblock *b, int opcode, } static int -compiler_addop_j(struct compiler *c, int opcode, basicblock *b) +compiler_addop_j(struct compiler *c, int opcode, basicblock *target, bool line) { if (compiler_use_new_implicit_block_if_needed(c) < 0) { return -1; } - return basicblock_add_jump(c->u->u_curblock, opcode, c->u->u_lineno, - c->u->u_end_lineno, c->u->u_col_offset, - c->u->u_end_col_offset, b); -} + int lineno = line ? c->u->u_lineno : -1; + int end_lineno = line ? c->u->u_end_lineno : 0; + int col_offset = line ? c->u->u_col_offset : 0; + int end_col_offset = line ? c->u->u_end_col_offset : 0; -static int -compiler_addop_j_noline(struct compiler *c, int opcode, basicblock *b) -{ - if (compiler_use_new_implicit_block_if_needed(c) < 0) { - return -1; - } - return basicblock_add_jump(c->u->u_curblock, opcode, -1, 0, 0, 0, b); + return basicblock_add_jump(c->u->u_curblock, opcode, + lineno, end_lineno, col_offset, end_col_offset, + target); } #define ADDOP(C, OP) { \ - if (!compiler_addop((C), (OP))) \ + if (!compiler_addop((C), (OP), true)) \ return 0; \ } #define ADDOP_NOLINE(C, OP) { \ - if (!compiler_addop_noline((C), (OP))) \ + if (!compiler_addop((C), (OP), false)) \ return 0; \ } #define ADDOP_IN_SCOPE(C, OP) { \ - if (!compiler_addop((C), (OP))) { \ + if (!compiler_addop((C), (OP), true)) { \ compiler_exit_scope(c); \ return 0; \ } \ @@ -1649,17 +1633,17 @@ compiler_addop_j_noline(struct compiler *c, int opcode, basicblock *b) } #define ADDOP_I(C, OP, O) { \ - if (!compiler_addop_i((C), (OP), (O))) \ + if (!compiler_addop_i((C), (OP), (O), true)) \ return 0; \ } #define ADDOP_I_NOLINE(C, OP, O) { \ - if (!compiler_addop_i_noline((C), (OP), (O))) \ + if (!compiler_addop_i((C), (OP), (O), false)) \ return 0; \ } #define ADDOP_JUMP(C, OP, O) { \ - if (!compiler_addop_j((C), (OP), (O))) \ + if (!compiler_addop_j((C), (OP), (O), true)) \ return 0; \ } @@ -1667,7 +1651,7 @@ compiler_addop_j_noline(struct compiler *c, int opcode, basicblock *b) * Used for artificial jumps that have no corresponding * token in the source code. */ #define ADDOP_JUMP_NOLINE(C, OP, O) { \ - if (!compiler_addop_j_noline((C), (OP), (O))) \ + if (!compiler_addop_j((C), (OP), (O), false)) \ return 0; \ } @@ -4320,7 +4304,7 @@ compiler_nameop(struct compiler *c, identifier name, expr_context_ty ctx) if (op == LOAD_GLOBAL) { arg <<= 1; } - return compiler_addop_i(c, op, arg); + return compiler_addop_i(c, op, arg, true); } static int @@ -6299,7 +6283,7 @@ emit_and_reset_fail_pop(struct compiler *c, pattern_context *pc) } while (--pc->fail_pop_size) { compiler_use_next_block(c, pc->fail_pop[pc->fail_pop_size]); - if (!compiler_addop(c, POP_TOP)) { + if (!compiler_addop(c, POP_TOP, true)) { pc->fail_pop_size = 0; PyObject_Free(pc->fail_pop); pc->fail_pop = NULL; @@ -6733,7 +6717,7 @@ compiler_pattern_or(struct compiler *c, pattern_ty p, pattern_context *pc) pc->fail_pop = NULL; pc->fail_pop_size = 0; pc->on_top = 0; - if (!compiler_addop_i(c, COPY, 1) || !compiler_pattern(c, alt, pc)) { + if (!compiler_addop_i(c, COPY, 1, true) || !compiler_pattern(c, alt, pc)) { goto error; } // Success! @@ -6796,7 +6780,7 @@ compiler_pattern_or(struct compiler *c, pattern_ty p, pattern_context *pc) } } assert(control); - if (!compiler_addop_j(c, JUMP, end) || + if (!compiler_addop_j(c, JUMP, end, true) || !emit_and_reset_fail_pop(c, pc)) { goto error; @@ -6808,7 +6792,7 @@ compiler_pattern_or(struct compiler *c, pattern_ty p, pattern_context *pc) // Need to NULL this for the PyObject_Free call in the error block. old_pc.fail_pop = NULL; // No match. Pop the remaining copy of the subject and fail: - if (!compiler_addop(c, POP_TOP) || !jump_to_fail_pop(c, pc, JUMP)) { + if (!compiler_addop(c, POP_TOP, true) || !jump_to_fail_pop(c, pc, JUMP)) { goto error; } compiler_use_next_block(c, end); From af31e92ce2855ac6a3289145883690918e7af0af Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Fri, 10 Jun 2022 10:35:27 +0100 Subject: [PATCH 02/14] add struct location to reduce boilerplate --- Python/compile.c | 82 ++++++++++++++++++++++++------------------------ 1 file changed, 41 insertions(+), 41 deletions(-) diff --git a/Python/compile.c b/Python/compile.c index 37a890d905cb50..7cfc2b411bd249 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -966,6 +966,25 @@ basicblock_next_instr(basicblock *b) (new).i_end_lineno = (old).i_end_lineno; \ (new).i_end_col_offset = (old).i_end_col_offset; + +struct location { + int lineno; + int end_lineno; + int col_offset; + int end_col_offset; +}; + +#define NO_LOCATION ((struct location){-1, 0, 0, 0}) + +/* current compiler unit's location */ +#define CU_LOCATION(CU) \ + ((struct location){ \ + (CU)->u_lineno, \ + (CU)->u_end_lineno, \ + (CU)->u_col_offset, \ + (CU)->u_end_col_offset, \ + }) + /* Return the stack effect of opcode with argument oparg. Some opcodes have different stack effect when jump to the target and @@ -1268,8 +1287,7 @@ compiler_use_new_implicit_block_if_needed(struct compiler *c) */ static int -basicblock_addop(basicblock *b, int opcode, int lineno, - int end_lineno, int col_offset, int end_col_offset) +basicblock_addop(basicblock *b, int opcode, struct location loc) { assert(IS_WITHIN_OPCODE_RANGE(opcode)); assert(!IS_ASSEMBLER_OPCODE(opcode)); @@ -1282,10 +1300,10 @@ basicblock_addop(basicblock *b, int opcode, int lineno, struct instr *i = &b->b_instr[off]; i->i_opcode = opcode; i->i_oparg = 0; - i->i_lineno = lineno; - i->i_end_lineno = end_lineno; - i->i_col_offset = col_offset; - i->i_end_col_offset = end_col_offset; + i->i_lineno = loc.lineno; + i->i_end_lineno = loc.end_lineno; + i->i_col_offset = loc.col_offset; + i->i_end_col_offset = loc.end_col_offset; return 1; } @@ -1296,13 +1314,9 @@ compiler_addop(struct compiler *c, int opcode, bool line) if (compiler_use_new_implicit_block_if_needed(c) < 0) { return -1; } - int lineno = line ? c->u->u_lineno : -1; - int end_lineno = line ? c->u->u_end_lineno : 0; - int col_offset = line ? c->u->u_col_offset : 0; - int end_col_offset = line ? c->u->u_end_col_offset : 0; - return basicblock_addop(c->u->u_curblock, opcode, - lineno, end_lineno, col_offset, end_col_offset); + struct location loc = line ? CU_LOCATION(c->u) : NO_LOCATION; + return basicblock_addop(c->u->u_curblock, opcode, loc); } static Py_ssize_t @@ -1496,8 +1510,7 @@ compiler_addop_name(struct compiler *c, int opcode, PyObject *dict, static int basicblock_addop_i(basicblock *b, int opcode, Py_ssize_t oparg, - int lineno, int end_lineno, - int col_offset, int end_col_offset) + struct location loc) { /* oparg value is unsigned, but a signed C int is usually used to store it in the C code (like Python/ceval.c). @@ -1518,10 +1531,10 @@ basicblock_addop_i(basicblock *b, int opcode, Py_ssize_t oparg, struct instr *i = &b->b_instr[off]; i->i_opcode = opcode; i->i_oparg = Py_SAFE_DOWNCAST(oparg, Py_ssize_t, int); - i->i_lineno = lineno; - i->i_end_lineno = end_lineno; - i->i_col_offset = col_offset; - i->i_end_col_offset = end_col_offset; + i->i_lineno = loc.lineno; + i->i_end_lineno = loc.end_lineno; + i->i_col_offset = loc.col_offset; + i->i_end_col_offset = loc.end_col_offset; return 1; } @@ -1532,20 +1545,13 @@ compiler_addop_i(struct compiler *c, int opcode, Py_ssize_t oparg, bool line) if (compiler_use_new_implicit_block_if_needed(c) < 0) { return -1; } - int lineno = line ? c->u->u_lineno : -1; - int end_lineno = line ? c->u->u_end_lineno : 0; - int col_offset = line ? c->u->u_col_offset : 0; - int end_col_offset = line ? c->u->u_end_col_offset : 0; - - return basicblock_addop_i(c->u->u_curblock, opcode, oparg, - lineno, end_lineno, col_offset, end_col_offset); + struct location loc = line ? CU_LOCATION(c->u) : NO_LOCATION; + return basicblock_addop_i(c->u->u_curblock, opcode, oparg, loc); } static int basicblock_add_jump(basicblock *b, int opcode, - int lineno, int end_lineno, - int col_offset, int end_col_offset, - basicblock *target) + struct location loc, basicblock *target) { assert(IS_WITHIN_OPCODE_RANGE(opcode)); assert(!IS_ASSEMBLER_OPCODE(opcode)); @@ -1559,10 +1565,10 @@ basicblock_add_jump(basicblock *b, int opcode, } i->i_opcode = opcode; i->i_target = target; - i->i_lineno = lineno; - i->i_end_lineno = end_lineno; - i->i_col_offset = col_offset; - i->i_end_col_offset = end_col_offset; + i->i_lineno = loc.lineno; + i->i_end_lineno = loc.end_lineno; + i->i_col_offset = loc.col_offset; + i->i_end_col_offset = loc.end_col_offset; return 1; } @@ -1573,14 +1579,8 @@ compiler_addop_j(struct compiler *c, int opcode, basicblock *target, bool line) if (compiler_use_new_implicit_block_if_needed(c) < 0) { return -1; } - int lineno = line ? c->u->u_lineno : -1; - int end_lineno = line ? c->u->u_end_lineno : 0; - int col_offset = line ? c->u->u_col_offset : 0; - int end_col_offset = line ? c->u->u_end_col_offset : 0; - - return basicblock_add_jump(c->u->u_curblock, opcode, - lineno, end_lineno, col_offset, end_col_offset, - target); + struct location loc = line ? CU_LOCATION(c->u) : NO_LOCATION; + return basicblock_add_jump(c->u->u_curblock, opcode, loc, target); } #define ADDOP(C, OP) { \ @@ -7455,7 +7455,7 @@ push_cold_blocks_to_end(struct compiler *c, basicblock *entry, int code_flags) { if (explicit_jump == NULL) { return -1; } - basicblock_add_jump(explicit_jump, JUMP, -1, 0, 0, 0, b->b_next); + basicblock_add_jump(explicit_jump, JUMP, NO_LOCATION, b->b_next); explicit_jump->b_cold = 1; explicit_jump->b_next = b->b_next; From f8a076783d5bbfdaca5a909dfafd184dde47204d Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Fri, 10 Jun 2022 11:41:35 +0100 Subject: [PATCH 03/14] merge basicblock_addop, basicblock_addop_i and basicblock_add_jump into one functions to reduce unnecessary repetition --- Python/compile.c | 87 ++++++++++++++++-------------------------------- 1 file changed, 28 insertions(+), 59 deletions(-) diff --git a/Python/compile.c b/Python/compile.c index 7cfc2b411bd249..975a903c2649ff 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -105,6 +105,11 @@ (IS_VIRTUAL_JUMP_OPCODE(opcode) || \ is_bit_set_in_table(_PyOpcode_Jump, opcode)) +#define IS_BLOCK_PUSH_OPCODE(opcode) \ + ((opcode) == SETUP_FINALLY || \ + (opcode) == SETUP_WITH || \ + (opcode) == SETUP_CLEANUP) + /* opcodes which are not emitted in codegen stage, only by the assembler */ #define IS_ASSEMBLER_OPCODE(opcode) \ ((opcode) == JUMP_FORWARD || \ @@ -191,7 +196,7 @@ static inline int is_block_push(struct instr *instr) { int opcode = instr->i_opcode; - return opcode == SETUP_FINALLY || opcode == SETUP_WITH || opcode == SETUP_CLEANUP; + return IS_BLOCK_PUSH_OPCODE(opcode); } static inline int @@ -1287,11 +1292,17 @@ compiler_use_new_implicit_block_if_needed(struct compiler *c) */ static int -basicblock_addop(basicblock *b, int opcode, struct location loc) +basicblock_addop(basicblock *b, int opcode, int oparg, + basicblock *target, struct location loc) { assert(IS_WITHIN_OPCODE_RANGE(opcode)); assert(!IS_ASSEMBLER_OPCODE(opcode)); - assert(!HAS_ARG(opcode) || IS_ARTIFICIAL(opcode)); + assert(HAS_ARG(opcode) || oparg == 0); + assert(0 <= oparg && oparg <= 2147483647); + assert((target == NULL) || + IS_JUMP_OPCODE(opcode) || + IS_BLOCK_PUSH_OPCODE(opcode)); + assert(oparg == 0 || target == NULL); int off = basicblock_next_instr(b); if (off < 0) { @@ -1299,7 +1310,8 @@ basicblock_addop(basicblock *b, int opcode, struct location loc) } struct instr *i = &b->b_instr[off]; i->i_opcode = opcode; - i->i_oparg = 0; + i->i_oparg = oparg; + i->i_target = target; i->i_lineno = loc.lineno; i->i_end_lineno = loc.end_lineno; i->i_col_offset = loc.col_offset; @@ -1311,12 +1323,13 @@ basicblock_addop(basicblock *b, int opcode, struct location loc) static int compiler_addop(struct compiler *c, int opcode, bool line) { + assert(!HAS_ARG(opcode) || IS_ARTIFICIAL(opcode)); if (compiler_use_new_implicit_block_if_needed(c) < 0) { return -1; } struct location loc = line ? CU_LOCATION(c->u) : NO_LOCATION; - return basicblock_addop(c->u->u_curblock, opcode, loc); + return basicblock_addop(c->u->u_curblock, opcode, 0, NULL, loc); } static Py_ssize_t @@ -1507,11 +1520,12 @@ compiler_addop_name(struct compiler *c, int opcode, PyObject *dict, /* Add an opcode with an integer argument. Returns 0 on failure, 1 on success. */ - static int -basicblock_addop_i(basicblock *b, int opcode, Py_ssize_t oparg, - struct location loc) +compiler_addop_i(struct compiler *c, int opcode, Py_ssize_t oparg, bool line) { + if (compiler_use_new_implicit_block_if_needed(c) < 0) { + return -1; + } /* oparg value is unsigned, but a signed C int is usually used to store it in the C code (like Python/ceval.c). @@ -1520,57 +1534,10 @@ basicblock_addop_i(basicblock *b, int opcode, Py_ssize_t oparg, The argument of a concrete bytecode instruction is limited to 8-bit. EXTENDED_ARG is used for 16, 24, and 32-bit arguments. */ - assert(IS_WITHIN_OPCODE_RANGE(opcode)); - assert(!IS_ASSEMBLER_OPCODE(opcode)); - assert(0 <= oparg && oparg <= 2147483647); - - int off = basicblock_next_instr(b); - if (off < 0) { - return 0; - } - struct instr *i = &b->b_instr[off]; - i->i_opcode = opcode; - i->i_oparg = Py_SAFE_DOWNCAST(oparg, Py_ssize_t, int); - i->i_lineno = loc.lineno; - i->i_end_lineno = loc.end_lineno; - i->i_col_offset = loc.col_offset; - i->i_end_col_offset = loc.end_col_offset; + int oparg_ = Py_SAFE_DOWNCAST(oparg, Py_ssize_t, int); - return 1; -} - -static int -compiler_addop_i(struct compiler *c, int opcode, Py_ssize_t oparg, bool line) -{ - if (compiler_use_new_implicit_block_if_needed(c) < 0) { - return -1; - } struct location loc = line ? CU_LOCATION(c->u) : NO_LOCATION; - return basicblock_addop_i(c->u->u_curblock, opcode, oparg, loc); -} - -static int -basicblock_add_jump(basicblock *b, int opcode, - struct location loc, basicblock *target) -{ - assert(IS_WITHIN_OPCODE_RANGE(opcode)); - assert(!IS_ASSEMBLER_OPCODE(opcode)); - assert(HAS_ARG(opcode) || IS_VIRTUAL_OPCODE(opcode)); - assert(target != NULL); - - int off = basicblock_next_instr(b); - struct instr *i = &b->b_instr[off]; - if (off < 0) { - return 0; - } - i->i_opcode = opcode; - i->i_target = target; - i->i_lineno = loc.lineno; - i->i_end_lineno = loc.end_lineno; - i->i_col_offset = loc.col_offset; - i->i_end_col_offset = loc.end_col_offset; - - return 1; + return basicblock_addop(c->u->u_curblock, opcode, oparg_, NULL, loc); } static int @@ -1580,7 +1547,9 @@ compiler_addop_j(struct compiler *c, int opcode, basicblock *target, bool line) return -1; } struct location loc = line ? CU_LOCATION(c->u) : NO_LOCATION; - return basicblock_add_jump(c->u->u_curblock, opcode, loc, target); + assert(target != NULL); + assert(IS_JUMP_OPCODE(opcode) || IS_BLOCK_PUSH_OPCODE(opcode)); + return basicblock_addop(c->u->u_curblock, opcode, 0, target, loc); } #define ADDOP(C, OP) { \ @@ -7455,7 +7424,7 @@ push_cold_blocks_to_end(struct compiler *c, basicblock *entry, int code_flags) { if (explicit_jump == NULL) { return -1; } - basicblock_add_jump(explicit_jump, JUMP, NO_LOCATION, b->b_next); + basicblock_addop(explicit_jump, JUMP, 0, b->b_next, NO_LOCATION); explicit_jump->b_cold = 1; explicit_jump->b_next = b->b_next; From 430b4b97bc87581daf7f3fbfde91d8c5e8d15b6e Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Fri, 10 Jun 2022 16:57:52 +0100 Subject: [PATCH 04/14] add news --- .../2022-06-10-16-57-35.gh-issue-93678.1WBnHt.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2022-06-10-16-57-35.gh-issue-93678.1WBnHt.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-06-10-16-57-35.gh-issue-93678.1WBnHt.rst b/Misc/NEWS.d/next/Core and Builtins/2022-06-10-16-57-35.gh-issue-93678.1WBnHt.rst new file mode 100644 index 00000000000000..f4378763404b50 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2022-06-10-16-57-35.gh-issue-93678.1WBnHt.rst @@ -0,0 +1,2 @@ +Refactor the compiler's code-gen functions to reduce boilerplate and +repetition. From 66d560935aa920881b3ccf84823f0d84f310db60 Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Fri, 10 Jun 2022 17:19:20 +0100 Subject: [PATCH 05/14] assume that oparg is < (1<<30) --- Python/compile.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/compile.c b/Python/compile.c index 975a903c2649ff..3cdf1b880b9672 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -1298,7 +1298,7 @@ basicblock_addop(basicblock *b, int opcode, int oparg, assert(IS_WITHIN_OPCODE_RANGE(opcode)); assert(!IS_ASSEMBLER_OPCODE(opcode)); assert(HAS_ARG(opcode) || oparg == 0); - assert(0 <= oparg && oparg <= 2147483647); + assert(0 <= oparg && oparg < (1 << 30)); assert((target == NULL) || IS_JUMP_OPCODE(opcode) || IS_BLOCK_PUSH_OPCODE(opcode)); From 7d74572d77d5c11cb236bb1bb30dba2d2bd84fca Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Fri, 10 Jun 2022 20:11:47 +0100 Subject: [PATCH 06/14] use the new location struct for the compiler unit's location --- Python/compile.c | 133 ++++++++++++++++++----------------------------- 1 file changed, 51 insertions(+), 82 deletions(-) diff --git a/Python/compile.c b/Python/compile.c index 362a07692db5d3..5108c249960344 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -339,6 +339,19 @@ enum { COMPILER_SCOPE_COMPREHENSION, }; +struct location { + int lineno; + int end_lineno; + int col_offset; + int end_col_offset; +}; + +#define LOCATION(LNO, END_LNO, COL, END_COL) \ + ((struct location){(LNO), (END_LNO), (COL), (END_COL)}) + +#define NO_LOCATION (LOCATION(-1, -1, -1, -1)) + + /* The following items change on entry and exit of code blocks. They must be saved and restored when returning to a block. */ @@ -373,12 +386,10 @@ struct compiler_unit { struct fblockinfo u_fblock[CO_MAXBLOCKS]; int u_firstlineno; /* the first lineno of the block */ - int u_lineno; /* the lineno for the current stmt */ - int u_col_offset; /* the offset of the current stmt */ - int u_end_lineno; /* the end line of the current stmt */ - int u_end_col_offset; /* the end offset of the current stmt */ + struct location u_loc; /* line/column info of the current stmt */ }; + /* This struct captures the global state of a compilation. The u pointer points to the current compilation unit, while units @@ -952,18 +963,14 @@ basicblock_next_instr(basicblock *b) - before the "except" and "finally" clauses */ -#define SET_LOC(c, x) \ - (c)->u->u_lineno = (x)->lineno; \ - (c)->u->u_col_offset = (x)->col_offset; \ - (c)->u->u_end_lineno = (x)->end_lineno; \ - (c)->u->u_end_col_offset = (x)->end_col_offset; +#define SET_LOC(c, x) \ + (c)->u->u_loc = LOCATION((x)->lineno, \ + (x)->end_lineno, \ + (x)->col_offset, \ + (x)->end_col_offset) // Artificial instructions -#define UNSET_LOC(c) \ - (c)->u->u_lineno = -1; \ - (c)->u->u_col_offset = -1; \ - (c)->u->u_end_lineno = -1; \ - (c)->u->u_end_col_offset = -1; +#define UNSET_LOC(c) (c)->u->u_loc = NO_LOCATION #define COPY_INSTR_LOC(old, new) \ (new).i_lineno = (old).i_lineno; \ @@ -972,24 +979,6 @@ basicblock_next_instr(basicblock *b) (new).i_end_col_offset = (old).i_end_col_offset; -struct location { - int lineno; - int end_lineno; - int col_offset; - int end_col_offset; -}; - -#define NO_LOCATION ((struct location){-1, 0, 0, 0}) - -/* current compiler unit's location */ -#define CU_LOCATION(CU) \ - ((struct location){ \ - (CU)->u_lineno, \ - (CU)->u_end_lineno, \ - (CU)->u_col_offset, \ - (CU)->u_end_col_offset, \ - }) - /* Return the stack effect of opcode with argument oparg. Some opcodes have different stack effect when jump to the target and @@ -1328,7 +1317,7 @@ compiler_addop(struct compiler *c, int opcode, bool line) return -1; } - struct location loc = line ? CU_LOCATION(c->u) : NO_LOCATION; + struct location loc = line ? c->u->u_loc : NO_LOCATION; return basicblock_addop(c->u->u_curblock, opcode, 0, NULL, loc); } @@ -1536,7 +1525,7 @@ compiler_addop_i(struct compiler *c, int opcode, Py_ssize_t oparg, bool line) int oparg_ = Py_SAFE_DOWNCAST(oparg, Py_ssize_t, int); - struct location loc = line ? CU_LOCATION(c->u) : NO_LOCATION; + struct location loc = line ? c->u->u_loc : NO_LOCATION; return basicblock_addop(c->u->u_curblock, opcode, oparg_, NULL, loc); } @@ -1546,7 +1535,7 @@ compiler_addop_j(struct compiler *c, int opcode, basicblock *target, bool line) if (compiler_use_new_implicit_block_if_needed(c) < 0) { return -1; } - struct location loc = line ? CU_LOCATION(c->u) : NO_LOCATION; + struct location loc = line ? c->u->u_loc : NO_LOCATION; assert(target != NULL); assert(IS_JUMP_OPCODE(opcode) || IS_BLOCK_PUSH_OPCODE(opcode)); return basicblock_addop(c->u->u_curblock, opcode, 0, target, loc); @@ -1740,10 +1729,7 @@ compiler_enter_scope(struct compiler *c, identifier name, u->u_blocks = NULL; u->u_nfblocks = 0; u->u_firstlineno = lineno; - u->u_lineno = lineno; - u->u_col_offset = 0; - u->u_end_lineno = lineno; - u->u_end_col_offset = 0; + u->u_loc = LOCATION(lineno, lineno, 0, 0); u->u_consts = PyDict_New(); if (!u->u_consts) { compiler_unit_free(u); @@ -1779,7 +1765,7 @@ compiler_enter_scope(struct compiler *c, identifier name, c->u->u_curblock = block; if (u->u_scope_type == COMPILER_SCOPE_MODULE) { - c->u->u_lineno = -1; + c->u->u_loc.lineno = -1; } else { if (!compiler_set_qualname(c)) @@ -2150,7 +2136,7 @@ compiler_mod(struct compiler *c, mod_ty mod) mod, 1)) { return NULL; } - c->u->u_lineno = 1; + c->u->u_loc.lineno = 1; switch (mod->kind) { case Module_kind: if (!compiler_body(c, mod->v.Module.body)) { @@ -2294,18 +2280,12 @@ compiler_apply_decorators(struct compiler *c, asdl_expr_seq* decos) if (!decos) return 1; - int old_lineno = c->u->u_lineno; - int old_end_lineno = c->u->u_end_lineno; - int old_col_offset = c->u->u_col_offset; - int old_end_col_offset = c->u->u_end_col_offset; + struct location old_loc = c->u->u_loc; for (Py_ssize_t i = asdl_seq_LEN(decos) - 1; i > -1; i--) { SET_LOC(c, (expr_ty)asdl_seq_GET(decos, i)); ADDOP_I(c, CALL, 0); } - c->u->u_lineno = old_lineno; - c->u->u_end_lineno = old_end_lineno; - c->u->u_col_offset = old_col_offset; - c->u->u_end_col_offset = old_end_col_offset; + c->u->u_loc = old_loc; return 1; } @@ -4795,8 +4775,8 @@ maybe_optimize_method_call(struct compiler *c, expr_ty e) } /* Alright, we can optimize the code. */ VISIT(c, expr, meth->v.Attribute.value); - int old_lineno = c->u->u_lineno; - c->u->u_lineno = meth->end_lineno; + int old_lineno = c->u->u_loc.lineno; + c->u->u_loc.lineno = meth->end_lineno; ADDOP_NAME(c, LOAD_METHOD, meth->v.Attribute.attr, names); VISIT_SEQ(c, expr, e->v.Call.args); @@ -4807,7 +4787,7 @@ maybe_optimize_method_call(struct compiler *c, expr_ty e) }; } ADDOP_I(c, CALL, argsl + kwdsl); - c->u->u_lineno = old_lineno; + c->u->u_loc.lineno = old_lineno; return 1; } @@ -5821,20 +5801,20 @@ compiler_visit_expr1(struct compiler *c, expr_ty e) switch (e->v.Attribute.ctx) { case Load: { - int old_lineno = c->u->u_lineno; - c->u->u_lineno = e->end_lineno; + int old_lineno = c->u->u_loc.lineno; + c->u->u_loc.lineno = e->end_lineno; ADDOP_NAME(c, LOAD_ATTR, e->v.Attribute.attr, names); - c->u->u_lineno = old_lineno; + c->u->u_loc.lineno = old_lineno; break; } case Store: if (forbidden_name(c, e->v.Attribute.attr, e->v.Attribute.ctx)) { return 0; } - int old_lineno = c->u->u_lineno; - c->u->u_lineno = e->end_lineno; + int old_lineno = c->u->u_loc.lineno; + c->u->u_loc.lineno = e->end_lineno; ADDOP_NAME(c, STORE_ATTR, e->v.Attribute.attr, names); - c->u->u_lineno = old_lineno; + c->u->u_loc.lineno = old_lineno; break; case Del: ADDOP_NAME(c, DELETE_ATTR, e->v.Attribute.attr, names); @@ -5871,16 +5851,10 @@ compiler_visit_expr1(struct compiler *c, expr_ty e) static int compiler_visit_expr(struct compiler *c, expr_ty e) { - int old_lineno = c->u->u_lineno; - int old_end_lineno = c->u->u_end_lineno; - int old_col_offset = c->u->u_col_offset; - int old_end_col_offset = c->u->u_end_col_offset; + struct location old_loc = c->u->u_loc; SET_LOC(c, e); int res = compiler_visit_expr1(c, e); - c->u->u_lineno = old_lineno; - c->u->u_end_lineno = old_end_lineno; - c->u->u_col_offset = old_col_offset; - c->u->u_end_col_offset = old_end_col_offset; + c->u->u_loc = old_loc; return res; } @@ -5890,20 +5864,17 @@ compiler_augassign(struct compiler *c, stmt_ty s) assert(s->kind == AugAssign_kind); expr_ty e = s->v.AugAssign.target; - int old_lineno = c->u->u_lineno; - int old_end_lineno = c->u->u_end_lineno; - int old_col_offset = c->u->u_col_offset; - int old_end_col_offset = c->u->u_end_col_offset; + struct location old_loc = c->u->u_loc; SET_LOC(c, e); switch (e->kind) { case Attribute_kind: VISIT(c, expr, e->v.Attribute.value); ADDOP_I(c, COPY, 1); - int old_lineno = c->u->u_lineno; - c->u->u_lineno = e->end_lineno; + int old_lineno = c->u->u_loc.lineno; + c->u->u_loc.lineno = e->end_lineno; ADDOP_NAME(c, LOAD_ATTR, e->v.Attribute.attr, names); - c->u->u_lineno = old_lineno; + c->u->u_loc.lineno = old_lineno; break; case Subscript_kind: VISIT(c, expr, e->v.Subscript.value); @@ -5923,10 +5894,7 @@ compiler_augassign(struct compiler *c, stmt_ty s) return 0; } - c->u->u_lineno = old_lineno; - c->u->u_end_lineno = old_end_lineno; - c->u->u_col_offset = old_col_offset; - c->u->u_end_col_offset = old_end_col_offset; + c->u->u_loc = old_loc; VISIT(c, expr, s->v.AugAssign.value); ADDOP_INPLACE(c, s->v.AugAssign.op); @@ -5935,7 +5903,7 @@ compiler_augassign(struct compiler *c, stmt_ty s) switch (e->kind) { case Attribute_kind: - c->u->u_lineno = e->end_lineno; + c->u->u_loc.lineno = e->end_lineno; ADDOP_I(c, SWAP, 2); ADDOP_NAME(c, STORE_ATTR, e->v.Attribute.attr, names); break; @@ -6084,14 +6052,15 @@ compiler_error(struct compiler *c, const char *format, ...) if (msg == NULL) { return 0; } - PyObject *loc = PyErr_ProgramTextObject(c->c_filename, c->u->u_lineno); + PyObject *loc = PyErr_ProgramTextObject(c->c_filename, c->u->u_loc.lineno); if (loc == NULL) { Py_INCREF(Py_None); loc = Py_None; } + struct location u_loc = c->u->u_loc; PyObject *args = Py_BuildValue("O(OiiOii)", msg, c->c_filename, - c->u->u_lineno, c->u->u_col_offset + 1, loc, - c->u->u_end_lineno, c->u->u_end_col_offset + 1); + u_loc.lineno, u_loc.col_offset + 1, loc, + u_loc.end_lineno, u_loc.end_col_offset + 1); Py_DECREF(msg); if (args == NULL) { goto exit; @@ -6118,7 +6087,7 @@ compiler_warn(struct compiler *c, const char *format, ...) return 0; } if (PyErr_WarnExplicitObject(PyExc_SyntaxWarning, msg, c->c_filename, - c->u->u_lineno, NULL, NULL) < 0) + c->u->u_loc.lineno, NULL, NULL) < 0) { if (PyErr_ExceptionMatches(PyExc_SyntaxWarning)) { /* Replace the SyntaxWarning exception with a SyntaxError From 016ebbc821bcc3f3b3a7591d01d961ce671fae22 Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Fri, 10 Jun 2022 21:19:19 +0100 Subject: [PATCH 07/14] use the new location struct for the instruction location --- Python/compile.c | 149 +++++++++++++++++++---------------------------- 1 file changed, 59 insertions(+), 90 deletions(-) diff --git a/Python/compile.c b/Python/compile.c index 5108c249960344..9290c32fbdb723 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -149,6 +149,18 @@ (c->c_flags->cf_flags & PyCF_ALLOW_TOP_LEVEL_AWAIT) \ && (c->u->u_ste->ste_type == ModuleBlock)) +struct location { + int lineno; + int end_lineno; + int col_offset; + int end_col_offset; +}; + +#define LOCATION(LNO, END_LNO, COL, END_COL) \ + ((struct location){(LNO), (END_LNO), (COL), (END_COL)}) + +#define NO_LOCATION (LOCATION(-1, -1, -1, -1)) + struct instr { int i_opcode; int i_oparg; @@ -156,10 +168,7 @@ struct instr { struct basicblock_ *i_target; /* target block when exception is raised, should not be set by front-end. */ struct basicblock_ *i_except; - int i_lineno; - int i_end_lineno; - int i_col_offset; - int i_end_col_offset; + struct location i_loc; }; typedef struct exceptstack { @@ -339,18 +348,6 @@ enum { COMPILER_SCOPE_COMPREHENSION, }; -struct location { - int lineno; - int end_lineno; - int col_offset; - int end_col_offset; -}; - -#define LOCATION(LNO, END_LNO, COL, END_COL) \ - ((struct location){(LNO), (END_LNO), (COL), (END_COL)}) - -#define NO_LOCATION (LOCATION(-1, -1, -1, -1)) - /* The following items change on entry and exit of code blocks. They must be saved and restored when returning to a block. @@ -972,12 +969,6 @@ basicblock_next_instr(basicblock *b) // Artificial instructions #define UNSET_LOC(c) (c)->u->u_loc = NO_LOCATION -#define COPY_INSTR_LOC(old, new) \ - (new).i_lineno = (old).i_lineno; \ - (new).i_col_offset = (old).i_col_offset; \ - (new).i_end_lineno = (old).i_end_lineno; \ - (new).i_end_col_offset = (old).i_end_col_offset; - /* Return the stack effect of opcode with argument oparg. @@ -1301,10 +1292,7 @@ basicblock_addop(basicblock *b, int opcode, int oparg, i->i_opcode = opcode; i->i_oparg = oparg; i->i_target = target; - i->i_lineno = loc.lineno; - i->i_end_lineno = loc.end_lineno; - i->i_col_offset = loc.col_offset; - i->i_end_col_offset = loc.end_col_offset; + i->i_loc = loc; return 1; } @@ -7617,11 +7605,11 @@ write_location_info_long_form(struct assembler* a, struct instr* i, int length) { assert(length > 0 && length <= 8); write_location_first_byte(a, PY_CODE_LOCATION_INFO_LONG, length); - write_location_signed_varint(a, i->i_lineno - a->a_lineno); - assert(i->i_end_lineno >= i->i_lineno); - write_location_varint(a, i->i_end_lineno - i->i_lineno); - write_location_varint(a, i->i_col_offset+1); - write_location_varint(a, i->i_end_col_offset+1); + write_location_signed_varint(a, i->i_loc.lineno - a->a_lineno); + assert(i->i_loc.end_lineno >= i->i_loc.lineno); + write_location_varint(a, i->i_loc.end_lineno - i->i_loc.lineno); + write_location_varint(a, i->i_loc.col_offset + 1); + write_location_varint(a, i->i_loc.end_col_offset + 1); } static void @@ -7649,35 +7637,35 @@ write_location_info_entry(struct assembler* a, struct instr* i, int isize) return 0; } } - if (i->i_lineno < 0) { + if (i->i_loc.lineno < 0) { write_location_info_none(a, isize); return 1; } - int line_delta = i->i_lineno - a->a_lineno; - int column = i->i_col_offset; - int end_column = i->i_end_col_offset; + int line_delta = i->i_loc.lineno - a->a_lineno; + int column = i->i_loc.col_offset; + int end_column = i->i_loc.end_col_offset; assert(column >= -1); assert(end_column >= -1); if (column < 0 || end_column < 0) { - if (i->i_end_lineno == i->i_lineno || i->i_end_lineno == -1) { + if (i->i_loc.end_lineno == i->i_loc.lineno || i->i_loc.end_lineno == -1) { write_location_info_no_column(a, isize, line_delta); - a->a_lineno = i->i_lineno; + a->a_lineno = i->i_loc.lineno; return 1; } } - else if (i->i_end_lineno == i->i_lineno) { + else if (i->i_loc.end_lineno == i->i_loc.lineno) { if (line_delta == 0 && column < 80 && end_column - column < 16) { write_location_info_short_form(a, isize, column, end_column); return 1; } if (line_delta >= 0 && line_delta < 3 && column < 128 && end_column < 128) { write_location_info_oneline_form(a, isize, line_delta, column, end_column); - a->a_lineno = i->i_lineno; + a->a_lineno = i->i_loc.lineno; return 1; } } write_location_info_long_form(a, i, isize); - a->a_lineno = i->i_lineno; + a->a_lineno = i->i_loc.lineno; return 1; } @@ -8216,7 +8204,7 @@ dump_instr(struct instr *i) sprintf(arg, "except_target: %p ", i->i_target); } fprintf(stderr, "line: %d, opcode: %d %s%s%s\n", - i->i_lineno, i->i_opcode, arg, jabs, jrel); + i->i_loc.lineno, i->i_opcode, arg, jabs, jrel); } static void @@ -8310,10 +8298,7 @@ insert_prefix_instructions(struct compiler *c, basicblock *entryblock, struct instr make_gen = { .i_opcode = RETURN_GENERATOR, .i_oparg = 0, - .i_lineno = c->u->u_firstlineno, - .i_col_offset = -1, - .i_end_lineno = c->u->u_firstlineno, - .i_end_col_offset = -1, + .i_loc = LOCATION(c->u->u_firstlineno, c->u->u_firstlineno, -1, -1), .i_target = NULL, }; if (insert_instruction(entryblock, 0, &make_gen) < 0) { @@ -8322,10 +8307,7 @@ insert_prefix_instructions(struct compiler *c, basicblock *entryblock, struct instr pop_top = { .i_opcode = POP_TOP, .i_oparg = 0, - .i_lineno = -1, - .i_col_offset = -1, - .i_end_lineno = -1, - .i_end_col_offset = -1, + .i_loc = NO_LOCATION, .i_target = NULL, }; if (insert_instruction(entryblock, 1, &pop_top) < 0) { @@ -8357,10 +8339,7 @@ insert_prefix_instructions(struct compiler *c, basicblock *entryblock, .i_opcode = MAKE_CELL, // This will get fixed in offset_derefs(). .i_oparg = oldindex, - .i_lineno = -1, - .i_col_offset = -1, - .i_end_lineno = -1, - .i_end_col_offset = -1, + .i_loc = NO_LOCATION, .i_target = NULL, }; if (insert_instruction(entryblock, ncellsused, &make_cell) < 0) { @@ -8375,10 +8354,7 @@ insert_prefix_instructions(struct compiler *c, basicblock *entryblock, struct instr copy_frees = { .i_opcode = COPY_FREE_VARS, .i_oparg = nfreevars, - .i_lineno = -1, - .i_col_offset = -1, - .i_end_lineno = -1, - .i_end_col_offset = -1, + .i_loc = NO_LOCATION, .i_target = NULL, }; if (insert_instruction(entryblock, 0, ©_frees) < 0) { @@ -8403,17 +8379,17 @@ guarantee_lineno_for_exits(struct assembler *a, int firstlineno) { continue; } struct instr *last = &b->b_instr[b->b_iused-1]; - if (last->i_lineno < 0) { + if (last->i_loc.lineno < 0) { if (last->i_opcode == RETURN_VALUE) { for (int i = 0; i < b->b_iused; i++) { - assert(b->b_instr[i].i_lineno < 0); + assert(b->b_instr[i].i_loc.lineno < 0); - b->b_instr[i].i_lineno = lineno; + b->b_instr[i].i_loc.lineno = lineno; } } } else { - lineno = last->i_lineno; + lineno = last->i_loc.lineno; } } } @@ -8557,8 +8533,8 @@ assemble(struct compiler *c, int addNone) /* Set firstlineno if it wasn't explicitly set. */ if (!c->u->u_firstlineno) { - if (entryblock->b_instr && entryblock->b_instr->i_lineno) { - c->u->u_firstlineno = entryblock->b_instr->i_lineno; + if (entryblock->b_instr && entryblock->b_instr->i_loc.lineno) { + c->u->u_firstlineno = entryblock->b_instr->i_loc.lineno; } else { c->u->u_firstlineno = 1; @@ -8880,7 +8856,7 @@ next_swappable_instruction(basicblock *block, int i, int lineno) { while (++i < block->b_iused) { struct instr *instruction = &block->b_instr[i]; - if (0 <= lineno && instruction->i_lineno != lineno) { + if (0 <= lineno && instruction->i_loc.lineno != lineno) { // Optimizing across this instruction could cause user-visible // changes in the names bound between line tracing events! return -1; @@ -8919,7 +8895,7 @@ apply_static_swaps(basicblock *block, int i) return; } int k = j; - int lineno = block->b_instr[j].i_lineno; + int lineno = block->b_instr[j].i_loc.lineno; for (int count = swap->i_oparg - 1; 0 < count; count--) { k = next_swappable_instruction(block, k, lineno); if (k < 0) { @@ -8945,7 +8921,7 @@ jump_thread(struct instr *inst, struct instr *target, int opcode) assert(is_jump(target)); // bpo-45773: If inst->i_target == target->i_target, then nothing actually // changes (and we fall into an infinite loop): - if ((inst->i_lineno == target->i_lineno || target->i_lineno == -1) && + if ((inst->i_loc.lineno == target->i_loc.lineno || target->i_loc.lineno == -1) && inst->i_target != target->i_target) { inst->i_target = target->i_target; @@ -9099,7 +9075,7 @@ optimize_basic_block(struct compiler *c, basicblock *bb, PyObject *consts) break; case JUMP_IF_TRUE_OR_POP: case POP_JUMP_IF_TRUE: - if (inst->i_lineno == target->i_lineno) { + if (inst->i_loc.lineno == target->i_loc.lineno) { // We don't need to bother checking for loops here, // since a block's b_next cannot point to itself: assert(inst->i_target != inst->i_target->b_next); @@ -9121,7 +9097,7 @@ optimize_basic_block(struct compiler *c, basicblock *bb, PyObject *consts) break; case JUMP_IF_FALSE_OR_POP: case POP_JUMP_IF_FALSE: - if (inst->i_lineno == target->i_lineno) { + if (inst->i_loc.lineno == target->i_loc.lineno) { // We don't need to bother checking for loops here, // since a block's b_next cannot point to itself: assert(inst->i_target != inst->i_target->b_next); @@ -9232,7 +9208,7 @@ clean_basic_block(basicblock *bb) { int dest = 0; int prev_lineno = -1; for (int src = 0; src < bb->b_iused; src++) { - int lineno = bb->b_instr[src].i_lineno; + int lineno = bb->b_instr[src].i_loc.lineno; if (bb->b_instr[src].i_opcode == NOP) { /* Eliminate no-op if it doesn't have a line number */ if (lineno < 0) { @@ -9244,9 +9220,9 @@ clean_basic_block(basicblock *bb) { } /* or, if the next instruction has same line number or no line number */ if (src < bb->b_iused - 1) { - int next_lineno = bb->b_instr[src+1].i_lineno; + int next_lineno = bb->b_instr[src+1].i_loc.lineno; if (next_lineno < 0 || next_lineno == lineno) { - COPY_INSTR_LOC(bb->b_instr[src], bb->b_instr[src+1]); + bb->b_instr[src+1].i_loc = bb->b_instr[src].i_loc; continue; } } @@ -9257,7 +9233,7 @@ clean_basic_block(basicblock *bb) { } /* or if last instruction in BB and next BB has same line number */ if (next) { - if (lineno == next->b_instr[0].i_lineno) { + if (lineno == next->b_instr[0].i_loc.lineno) { continue; } } @@ -9384,33 +9360,26 @@ propagate_line_numbers(struct assembler *a) { continue; } - // Not a real instruction, only to store positions - // from previous instructions and propagate them. - struct instr prev_instr = { - .i_lineno = -1, - .i_col_offset = -1, - .i_end_lineno = -1, - .i_end_col_offset = -1, - }; + struct location prev_location = NO_LOCATION; for (int i = 0; i < b->b_iused; i++) { - if (b->b_instr[i].i_lineno < 0) { - COPY_INSTR_LOC(prev_instr, b->b_instr[i]); + if (b->b_instr[i].i_loc.lineno < 0) { + b->b_instr[i].i_loc = prev_location; } else { - COPY_INSTR_LOC(b->b_instr[i], prev_instr); + prev_location = b->b_instr[i].i_loc; } } if (BB_HAS_FALLTHROUGH(b) && b->b_next->b_predecessors == 1) { assert(b->b_next->b_iused); - if (b->b_next->b_instr[0].i_lineno < 0) { - COPY_INSTR_LOC(prev_instr, b->b_next->b_instr[0]); + if (b->b_next->b_instr[0].i_loc.lineno < 0) { + b->b_next->b_instr[0].i_loc = prev_location; } } if (is_jump(&b->b_instr[b->b_iused-1])) { basicblock *target = b->b_instr[b->b_iused-1].i_target; if (target->b_predecessors == 1) { - if (target->b_instr[0].i_lineno < 0) { - COPY_INSTR_LOC(prev_instr, target->b_instr[0]); + if (target->b_instr[0].i_loc.lineno < 0) { + target->b_instr[0].i_loc = prev_location; } } } @@ -9491,7 +9460,7 @@ is_exit_without_lineno(basicblock *b) { return 0; } for (int i = 0; i < b->b_iused; i++) { - if (b->b_instr[i].i_lineno >= 0) { + if (b->b_instr[i].i_loc.lineno >= 0) { return 0; } } @@ -9520,7 +9489,7 @@ duplicate_exits_without_lineno(struct compiler *c) if (new_target == NULL) { return -1; } - COPY_INSTR_LOC(b->b_instr[b->b_iused-1], new_target->b_instr[0]); + new_target->b_instr[0].i_loc = b->b_instr[b->b_iused-1].i_loc; b->b_instr[b->b_iused-1].i_target = new_target; target->b_predecessors--; new_target->b_predecessors = 1; @@ -9541,7 +9510,7 @@ duplicate_exits_without_lineno(struct compiler *c) if (BB_HAS_FALLTHROUGH(b) && b->b_next && b->b_iused > 0) { if (is_exit_without_lineno(b->b_next)) { assert(b->b_next->b_iused > 0); - COPY_INSTR_LOC(b->b_instr[b->b_iused-1], b->b_next->b_instr[0]); + b->b_next->b_instr[0].i_loc = b->b_instr[b->b_iused-1].i_loc; } } } From 5e06cad8cf2d4023ff97cd6eefef5e2f4166eba2 Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Fri, 10 Jun 2022 23:09:24 +0100 Subject: [PATCH 08/14] basicblock_addop takes location by reference --- Python/compile.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/Python/compile.c b/Python/compile.c index 9290c32fbdb723..009fd03ac136a3 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -159,7 +159,7 @@ struct location { #define LOCATION(LNO, END_LNO, COL, END_COL) \ ((struct location){(LNO), (END_LNO), (COL), (END_COL)}) -#define NO_LOCATION (LOCATION(-1, -1, -1, -1)) +static struct location NO_LOCATION = (LOCATION(-1, -1, -1, -1)); struct instr { int i_opcode; @@ -348,7 +348,6 @@ enum { COMPILER_SCOPE_COMPREHENSION, }; - /* The following items change on entry and exit of code blocks. They must be saved and restored when returning to a block. */ @@ -1273,7 +1272,7 @@ compiler_use_new_implicit_block_if_needed(struct compiler *c) static int basicblock_addop(basicblock *b, int opcode, int oparg, - basicblock *target, struct location loc) + basicblock *target, const struct location *loc) { assert(IS_WITHIN_OPCODE_RANGE(opcode)); assert(!IS_ASSEMBLER_OPCODE(opcode)); @@ -1292,7 +1291,7 @@ basicblock_addop(basicblock *b, int opcode, int oparg, i->i_opcode = opcode; i->i_oparg = oparg; i->i_target = target; - i->i_loc = loc; + i->i_loc = *loc; return 1; } @@ -1305,7 +1304,7 @@ compiler_addop(struct compiler *c, int opcode, bool line) return -1; } - struct location loc = line ? c->u->u_loc : NO_LOCATION; + const struct location *loc = line ? &c->u->u_loc : &NO_LOCATION; return basicblock_addop(c->u->u_curblock, opcode, 0, NULL, loc); } @@ -1513,7 +1512,7 @@ compiler_addop_i(struct compiler *c, int opcode, Py_ssize_t oparg, bool line) int oparg_ = Py_SAFE_DOWNCAST(oparg, Py_ssize_t, int); - struct location loc = line ? c->u->u_loc : NO_LOCATION; + const struct location *loc = line ? &c->u->u_loc : &NO_LOCATION; return basicblock_addop(c->u->u_curblock, opcode, oparg_, NULL, loc); } @@ -1523,7 +1522,7 @@ compiler_addop_j(struct compiler *c, int opcode, basicblock *target, bool line) if (compiler_use_new_implicit_block_if_needed(c) < 0) { return -1; } - struct location loc = line ? c->u->u_loc : NO_LOCATION; + const struct location *loc = line ? &c->u->u_loc : &NO_LOCATION; assert(target != NULL); assert(IS_JUMP_OPCODE(opcode) || IS_BLOCK_PUSH_OPCODE(opcode)); return basicblock_addop(c->u->u_curblock, opcode, 0, target, loc); @@ -7387,7 +7386,7 @@ push_cold_blocks_to_end(struct compiler *c, basicblock *entry, int code_flags) { if (explicit_jump == NULL) { return -1; } - basicblock_addop(explicit_jump, JUMP, 0, b->b_next, NO_LOCATION); + basicblock_addop(explicit_jump, JUMP, 0, b->b_next, &NO_LOCATION); explicit_jump->b_cold = 1; explicit_jump->b_next = b->b_next; From c23e2cf0b5020bbe5f33c686037cae308cf3bc34 Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Fri, 10 Jun 2022 23:35:44 +0100 Subject: [PATCH 09/14] LOCATION needs to return a const --- Python/compile.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/compile.c b/Python/compile.c index 009fd03ac136a3..a59b34ea84cfbc 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -157,7 +157,7 @@ struct location { }; #define LOCATION(LNO, END_LNO, COL, END_COL) \ - ((struct location){(LNO), (END_LNO), (COL), (END_COL)}) + ((const struct location){(LNO), (END_LNO), (COL), (END_COL)}) static struct location NO_LOCATION = (LOCATION(-1, -1, -1, -1)); From fc7455552ce1cafdbadf15d133aef72aff4ca50e Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Fri, 10 Jun 2022 23:49:12 +0100 Subject: [PATCH 10/14] define static no_location in function scope --- Python/compile.c | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/Python/compile.c b/Python/compile.c index a59b34ea84cfbc..44307fd10f356d 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -159,7 +159,7 @@ struct location { #define LOCATION(LNO, END_LNO, COL, END_COL) \ ((const struct location){(LNO), (END_LNO), (COL), (END_COL)}) -static struct location NO_LOCATION = (LOCATION(-1, -1, -1, -1)); +#define NO_LOCATION (LOCATION(-1, -1, -1, -1)) struct instr { int i_opcode; @@ -1283,6 +1283,8 @@ basicblock_addop(basicblock *b, int opcode, int oparg, IS_BLOCK_PUSH_OPCODE(opcode)); assert(oparg == 0 || target == NULL); + static struct location no_location = NO_LOCATION; + int off = basicblock_next_instr(b); if (off < 0) { return 0; @@ -1291,7 +1293,7 @@ basicblock_addop(basicblock *b, int opcode, int oparg, i->i_opcode = opcode; i->i_oparg = oparg; i->i_target = target; - i->i_loc = *loc; + i->i_loc = loc ? *loc : no_location; return 1; } @@ -1304,7 +1306,7 @@ compiler_addop(struct compiler *c, int opcode, bool line) return -1; } - const struct location *loc = line ? &c->u->u_loc : &NO_LOCATION; + const struct location *loc = line ? &c->u->u_loc : NULL; return basicblock_addop(c->u->u_curblock, opcode, 0, NULL, loc); } @@ -1512,7 +1514,7 @@ compiler_addop_i(struct compiler *c, int opcode, Py_ssize_t oparg, bool line) int oparg_ = Py_SAFE_DOWNCAST(oparg, Py_ssize_t, int); - const struct location *loc = line ? &c->u->u_loc : &NO_LOCATION; + const struct location *loc = line ? &c->u->u_loc : NULL; return basicblock_addop(c->u->u_curblock, opcode, oparg_, NULL, loc); } @@ -1522,7 +1524,7 @@ compiler_addop_j(struct compiler *c, int opcode, basicblock *target, bool line) if (compiler_use_new_implicit_block_if_needed(c) < 0) { return -1; } - const struct location *loc = line ? &c->u->u_loc : &NO_LOCATION; + const struct location *loc = line ? &c->u->u_loc : NULL; assert(target != NULL); assert(IS_JUMP_OPCODE(opcode) || IS_BLOCK_PUSH_OPCODE(opcode)); return basicblock_addop(c->u->u_curblock, opcode, 0, target, loc); @@ -7386,7 +7388,8 @@ push_cold_blocks_to_end(struct compiler *c, basicblock *entry, int code_flags) { if (explicit_jump == NULL) { return -1; } - basicblock_addop(explicit_jump, JUMP, 0, b->b_next, &NO_LOCATION); + static struct location no_location = NO_LOCATION; + basicblock_addop(explicit_jump, JUMP, 0, b->b_next, &no_location); explicit_jump->b_cold = 1; explicit_jump->b_next = b->b_next; @@ -8289,9 +8292,10 @@ static int insert_prefix_instructions(struct compiler *c, basicblock *entryblock, int *fixed, int nfreevars, int code_flags) { - assert(c->u->u_firstlineno > 0); + static struct location no_location = NO_LOCATION; + /* Add the generator prefix instructions. */ if (code_flags & (CO_GENERATOR | CO_COROUTINE | CO_ASYNC_GENERATOR)) { struct instr make_gen = { @@ -8306,7 +8310,7 @@ insert_prefix_instructions(struct compiler *c, basicblock *entryblock, struct instr pop_top = { .i_opcode = POP_TOP, .i_oparg = 0, - .i_loc = NO_LOCATION, + .i_loc = no_location, .i_target = NULL, }; if (insert_instruction(entryblock, 1, &pop_top) < 0) { @@ -8338,7 +8342,7 @@ insert_prefix_instructions(struct compiler *c, basicblock *entryblock, .i_opcode = MAKE_CELL, // This will get fixed in offset_derefs(). .i_oparg = oldindex, - .i_loc = NO_LOCATION, + .i_loc = no_location, .i_target = NULL, }; if (insert_instruction(entryblock, ncellsused, &make_cell) < 0) { @@ -8353,7 +8357,7 @@ insert_prefix_instructions(struct compiler *c, basicblock *entryblock, struct instr copy_frees = { .i_opcode = COPY_FREE_VARS, .i_oparg = nfreevars, - .i_loc = NO_LOCATION, + .i_loc = no_location, .i_target = NULL, }; if (insert_instruction(entryblock, 0, ©_frees) < 0) { @@ -9359,7 +9363,8 @@ propagate_line_numbers(struct assembler *a) { continue; } - struct location prev_location = NO_LOCATION; + static struct location no_location = NO_LOCATION; + struct location prev_location = no_location; for (int i = 0; i < b->b_iused; i++) { if (b->b_instr[i].i_loc.lineno < 0) { b->b_instr[i].i_loc = prev_location; From 9220fc12ff64cc31d5799309d8d0ebe892ed1418 Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Sat, 11 Jun 2022 00:35:00 +0100 Subject: [PATCH 11/14] are we const now? --- Python/compile.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/Python/compile.c b/Python/compile.c index 44307fd10f356d..207163cfb9a52e 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -159,7 +159,7 @@ struct location { #define LOCATION(LNO, END_LNO, COL, END_COL) \ ((const struct location){(LNO), (END_LNO), (COL), (END_COL)}) -#define NO_LOCATION (LOCATION(-1, -1, -1, -1)) +#define NO_LOCATION {-1, -1, -1, -1} struct instr { int i_opcode; @@ -959,14 +959,18 @@ basicblock_next_instr(basicblock *b) - before the "except" and "finally" clauses */ -#define SET_LOC(c, x) \ - (c)->u->u_loc = LOCATION((x)->lineno, \ - (x)->end_lineno, \ - (x)->col_offset, \ - (x)->end_col_offset) +#define SET_LOC(c, x) \ + (c)->u->u_loc.lineno = (x)->lineno; \ + (c)->u->u_loc.end_lineno = (x)->end_lineno; \ + (c)->u->u_loc.col_offset = (x)->col_offset; \ + (c)->u->u_loc.end_col_offset = (x)->end_col_offset; // Artificial instructions -#define UNSET_LOC(c) (c)->u->u_loc = NO_LOCATION +#define UNSET_LOC(c) \ + (c)->u->u_loc.lineno = -1; \ + (c)->u->u_loc.end_lineno = -1; \ + (c)->u->u_loc.col_offset = -1; \ + (c)->u->u_loc.end_col_offset = -1; /* Return the stack effect of opcode with argument oparg. From 9b5dc34b512317fe6a0ff5915a622cb5040b935c Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Sat, 11 Jun 2022 00:55:00 +0100 Subject: [PATCH 12/14] trivial stuff --- .../2022-06-10-16-57-35.gh-issue-93678.1WBnHt.rst | 3 +-- Python/compile.c | 6 ++---- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-06-10-16-57-35.gh-issue-93678.1WBnHt.rst b/Misc/NEWS.d/next/Core and Builtins/2022-06-10-16-57-35.gh-issue-93678.1WBnHt.rst index f4378763404b50..24a0d1042d81ae 100644 --- a/Misc/NEWS.d/next/Core and Builtins/2022-06-10-16-57-35.gh-issue-93678.1WBnHt.rst +++ b/Misc/NEWS.d/next/Core and Builtins/2022-06-10-16-57-35.gh-issue-93678.1WBnHt.rst @@ -1,2 +1 @@ -Refactor the compiler's code-gen functions to reduce boilerplate and -repetition. +Refactor the compiler to reduce boilerplate and repetition. diff --git a/Python/compile.c b/Python/compile.c index 207163cfb9a52e..65e84eeaf6174d 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -202,10 +202,9 @@ is_relative_jump(struct instr *i) } static inline int -is_block_push(struct instr *instr) +is_block_push(struct instr *i) { - int opcode = instr->i_opcode; - return IS_BLOCK_PUSH_OPCODE(opcode); + return IS_BLOCK_PUSH_OPCODE(i->i_opcode); } static inline int @@ -385,7 +384,6 @@ struct compiler_unit { struct location u_loc; /* line/column info of the current stmt */ }; - /* This struct captures the global state of a compilation. The u pointer points to the current compilation unit, while units From 414e26a4a0f92b4db8623a0c5476e885452ee096 Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Mon, 13 Jun 2022 11:26:25 +0100 Subject: [PATCH 13/14] remove two unused fields from struct assembler --- Python/compile.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/Python/compile.c b/Python/compile.c index 65e84eeaf6174d..a56bffa0dc0c1b 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -6991,8 +6991,6 @@ struct assembler { basicblock *a_entry; int a_offset; /* offset into bytecode */ int a_except_table_off; /* offset into exception table */ - int a_prevlineno; /* lineno of last emitted line in line table */ - int a_prev_end_lineno; /* end_lineno of last emitted line in line table */ int a_lineno; /* lineno of last emitted instruction */ int a_end_lineno; /* end_lineno of last emitted instruction */ int a_lineno_start; /* bytecode start offset of current lineno */ @@ -7104,8 +7102,8 @@ static int assemble_init(struct assembler *a, int nblocks, int firstlineno) { memset(a, 0, sizeof(struct assembler)); - a->a_prevlineno = a->a_lineno = firstlineno; - a->a_prev_end_lineno = a->a_end_lineno = firstlineno; + a->a_lineno = firstlineno; + a->a_end_lineno = firstlineno; a->a_linetable = NULL; a->a_location_off = 0; a->a_except_table = NULL; From 26dc190be73b24ca25bbc3c377e1745ba7c47c9e Mon Sep 17 00:00:00 2001 From: Irit Katriel Date: Mon, 13 Jun 2022 19:56:24 +0100 Subject: [PATCH 14/14] define NO_LOCATION as a static const in global scope --- Python/compile.c | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/Python/compile.c b/Python/compile.c index a56bffa0dc0c1b..3ba07b973ef715 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -159,7 +159,7 @@ struct location { #define LOCATION(LNO, END_LNO, COL, END_COL) \ ((const struct location){(LNO), (END_LNO), (COL), (END_COL)}) -#define NO_LOCATION {-1, -1, -1, -1} +static struct location NO_LOCATION = {-1, -1, -1, -1}; struct instr { int i_opcode; @@ -1285,8 +1285,6 @@ basicblock_addop(basicblock *b, int opcode, int oparg, IS_BLOCK_PUSH_OPCODE(opcode)); assert(oparg == 0 || target == NULL); - static struct location no_location = NO_LOCATION; - int off = basicblock_next_instr(b); if (off < 0) { return 0; @@ -1295,7 +1293,7 @@ basicblock_addop(basicblock *b, int opcode, int oparg, i->i_opcode = opcode; i->i_oparg = oparg; i->i_target = target; - i->i_loc = loc ? *loc : no_location; + i->i_loc = loc ? *loc : NO_LOCATION; return 1; } @@ -7388,8 +7386,7 @@ push_cold_blocks_to_end(struct compiler *c, basicblock *entry, int code_flags) { if (explicit_jump == NULL) { return -1; } - static struct location no_location = NO_LOCATION; - basicblock_addop(explicit_jump, JUMP, 0, b->b_next, &no_location); + basicblock_addop(explicit_jump, JUMP, 0, b->b_next, &NO_LOCATION); explicit_jump->b_cold = 1; explicit_jump->b_next = b->b_next; @@ -8294,8 +8291,6 @@ insert_prefix_instructions(struct compiler *c, basicblock *entryblock, { assert(c->u->u_firstlineno > 0); - static struct location no_location = NO_LOCATION; - /* Add the generator prefix instructions. */ if (code_flags & (CO_GENERATOR | CO_COROUTINE | CO_ASYNC_GENERATOR)) { struct instr make_gen = { @@ -8310,7 +8305,7 @@ insert_prefix_instructions(struct compiler *c, basicblock *entryblock, struct instr pop_top = { .i_opcode = POP_TOP, .i_oparg = 0, - .i_loc = no_location, + .i_loc = NO_LOCATION, .i_target = NULL, }; if (insert_instruction(entryblock, 1, &pop_top) < 0) { @@ -8342,7 +8337,7 @@ insert_prefix_instructions(struct compiler *c, basicblock *entryblock, .i_opcode = MAKE_CELL, // This will get fixed in offset_derefs(). .i_oparg = oldindex, - .i_loc = no_location, + .i_loc = NO_LOCATION, .i_target = NULL, }; if (insert_instruction(entryblock, ncellsused, &make_cell) < 0) { @@ -8357,7 +8352,7 @@ insert_prefix_instructions(struct compiler *c, basicblock *entryblock, struct instr copy_frees = { .i_opcode = COPY_FREE_VARS, .i_oparg = nfreevars, - .i_loc = no_location, + .i_loc = NO_LOCATION, .i_target = NULL, }; if (insert_instruction(entryblock, 0, ©_frees) < 0) { @@ -9363,8 +9358,7 @@ propagate_line_numbers(struct assembler *a) { continue; } - static struct location no_location = NO_LOCATION; - struct location prev_location = no_location; + struct location prev_location = NO_LOCATION; for (int i = 0; i < b->b_iused; i++) { if (b->b_instr[i].i_loc.lineno < 0) { b->b_instr[i].i_loc = prev_location;