From 3f88cba010028c2ef6bc312156b3c37c5566cee6 Mon Sep 17 00:00:00 2001 From: sweeneyde Date: Thu, 6 Oct 2022 01:38:12 -0400 Subject: [PATCH 01/12] Use 64-bit masks, ignore locals beyond 64. --- Python/compile.c | 145 ++++++++++++++++++++++++++++------------------- 1 file changed, 86 insertions(+), 59 deletions(-) diff --git a/Python/compile.c b/Python/compile.c index 507fd040a89d7d..e48d2dd50e27c3 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -264,6 +264,9 @@ typedef struct basicblock_ { int b_startdepth; /* instruction offset for block, computed by assemble_jump_offsets() */ int b_offset; + /* Used by add_checks_for_loads_of_unknown_variables */ + uint64_t b_needs_visited_locals_mask; + uint64_t b_already_visited_locals_mask; /* Basic block is an exception handler that preserves lasti */ unsigned b_preserve_lasti : 1; /* Used by compiler passes to mark whether they have visited a basic block. */ @@ -7899,19 +7902,30 @@ assemble_jump_offsets(basicblock *entryblock) } -// Ensure each basicblock is only put onto the stack once. -#define MAYBE_PUSH(B) do { \ - if ((B)->b_visited == 0) { \ - *(*stack_top)++ = (B); \ - (B)->b_visited = 1; \ - } \ - } while (0) +// helper functions for add_checks_for_loads_of_unknown_variables +static inline void +maybe_push(basicblock *b, uint64_t unsafe_mask, basicblock ***sp) +{ + // Push b if the unsafe mask is giving us any new information. + // To avoid overflowing the stack, only allow each block once. + // Use b->b_visited=1 to mean that b is currently on the stack. + b->b_needs_visited_locals_mask |= unsafe_mask; + if (b->b_needs_visited_locals_mask + & ~b->b_already_visited_locals_mask) + { + // Still work left to do. + if (!b->b_visited) { + // not on the stack, so push it. + *(*sp)++ = b; + b->b_visited = 1; + } + } +} static void -scan_block_for_local(int target, basicblock *b, bool unsafe_to_start, - basicblock ***stack_top) +scan_block_for_locals(basicblock *b, uint64_t unsafe_mask, basicblock ***sp) { - bool unsafe = unsafe_to_start; + // mask & (1<b_iused; i++) { struct instr *instr = &b->b_instr[i]; assert(instr->i_opcode != EXTENDED_ARG); @@ -7921,81 +7935,94 @@ scan_block_for_local(int target, basicblock *b, bool unsafe_to_start, assert(instr->i_opcode != LOAD_CONST__LOAD_FAST); assert(instr->i_opcode != STORE_FAST__STORE_FAST); assert(instr->i_opcode != LOAD_FAST__LOAD_CONST); - if (unsafe && instr->i_except != NULL) { - MAYBE_PUSH(instr->i_except); + if (instr->i_except != NULL) { + maybe_push(instr->i_except, unsafe_mask, sp); } - if (instr->i_oparg != target) { + if (instr->i_oparg >= 64) { continue; } + assert(instr->i_oparg >= 0); + uint64_t bit = (uint64_t)1 << instr->i_oparg; switch (instr->i_opcode) { - case LOAD_FAST_CHECK: - // if this doesn't raise, then var is defined - unsafe = false; + case DELETE_FAST: + unsafe_mask |= bit; break; case LOAD_FAST: - if (unsafe) { + // If this doesn't raise, then var is defined. + if (unsafe_mask & bit) { instr->i_opcode = LOAD_FAST_CHECK; } - unsafe = false; + unsafe_mask &= ~bit; break; - case STORE_FAST: - unsafe = false; + case LOAD_FAST_CHECK: + unsafe_mask &= ~bit; break; - case DELETE_FAST: - unsafe = true; + case STORE_FAST: + unsafe_mask &= ~bit; break; } } - if (unsafe) { - // unsafe at end of this block, - // so unsafe at start of next blocks - if (b->b_next && BB_HAS_FALLTHROUGH(b)) { - MAYBE_PUSH(b->b_next); - } - struct instr *last = basicblock_last_instr(b); - if (last != NULL) { - if (is_jump(last)) { - assert(last->i_target != NULL); - MAYBE_PUSH(last->i_target); - } - } + if (b->b_next && BB_HAS_FALLTHROUGH(b)) { + maybe_push(b->b_next, unsafe_mask, sp); + } + struct instr *last = basicblock_last_instr(b); + if (last && is_jump(last)) { + assert(last->i_target != NULL); + maybe_push(last->i_target, unsafe_mask, sp); } } -#undef MAYBE_PUSH static int add_checks_for_loads_of_unknown_variables(basicblock *entryblock, struct compiler *c) { + int nparams = (int)PyList_GET_SIZE(c->u->u_ste->ste_varnames); + int nlocals = (int)PyDict_GET_SIZE(c->u->u_varnames); + if (nlocals > 64) { + // Avoid O(nlocals**2) compilation: + // only analyze the first 64 locals. + // The rest get only LOAD_FAST_CHECK. + for (basicblock *b = entryblock; b != NULL; b = b->b_next) { + for (int i = 0; i < b->b_iused; i++) { + struct instr *instr = &b->b_instr[i]; + if (instr->i_opcode == LOAD_FAST && instr->i_oparg >= 64) { + instr->i_opcode = LOAD_FAST_CHECK; + } + } + } + nlocals = 64; + } basicblock **stack = make_cfg_traversal_stack(entryblock); if (stack == NULL) { return -1; } - Py_ssize_t nparams = PyList_GET_SIZE(c->u->u_ste->ste_varnames); - int nlocals = (int)PyDict_GET_SIZE(c->u->u_varnames); - for (int target = 0; target < nlocals; target++) { - for (basicblock *b = entryblock; b != NULL; b = b->b_next) { - b->b_visited = 0; - } - basicblock **stack_top = stack; + basicblock **sp = stack; - // First pass: find the relevant DFS starting points: - // the places where "being uninitialized" originates, - // which are the entry block and any DELETE_FAST statements. - if (target >= nparams) { - // only non-parameter locals start out uninitialized. - *(stack_top++) = entryblock; - entryblock->b_visited = 1; - } - for (basicblock *b = entryblock; b != NULL; b = b->b_next) { - scan_block_for_local(target, b, false, &stack_top); - } + // First origin of being uninitialized: + // The non-parameter locals in the entry block. + // is there a bithack for this without UB? + uint64_t start_mask = 0; + for (int i = nparams; i < nlocals; i++) { + start_mask |= (uint64_t)1 << i; + } + maybe_push(entryblock, start_mask, &sp); - // Second pass: Depth-first search to propagate uncertainty - while (stack_top > stack) { - basicblock *b = *--stack_top; - scan_block_for_local(target, b, true, &stack_top); - } + // Second origin of being uninitialized: + // There could be DELETE_FAST somewhere. + for (basicblock *b = entryblock; b != NULL; b = b->b_next) { + scan_block_for_locals(b, 0, &sp); + } + + // Now propagate the uncertainty from the origins we found: Use + // LOAD_FAST_CHECK for any LOAD_FAST where the local could be undefined. + while (sp > stack) { + basicblock *b = *--sp; + // mark as no longer on stack + b->b_visited = 0; + uint64_t unsafe_mask = b->b_needs_visited_locals_mask; + assert(unsafe_mask & ~b->b_already_visited_locals_mask); + b->b_already_visited_locals_mask = unsafe_mask; + scan_block_for_locals(b, unsafe_mask, &sp); } PyMem_Free(stack); return 0; From cc52638a2b4ed2f5516123b7269811580b96b917 Mon Sep 17 00:00:00 2001 From: sweeneyde Date: Thu, 6 Oct 2022 01:59:52 -0400 Subject: [PATCH 02/12] Add test case for too many locals --- Lib/test/test_peepholer.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/Lib/test/test_peepholer.py b/Lib/test/test_peepholer.py index ab45e3c52a039b..7db96924b89576 100644 --- a/Lib/test/test_peepholer.py +++ b/Lib/test/test_peepholer.py @@ -776,6 +776,29 @@ def f(): self.assertInBytecode(f, 'LOAD_FAST_CHECK') self.assertNotInBytecode(f, 'LOAD_FAST') + def test_load_fast_unknown_too_many_locals(self): + # When there get to be too many locals to analyze, + # later locals are always converted to LOAD_FAST_CHECK. + def f(): + a00 = a01 = a02 = a03 = a04 = a05 = a06 = a07 = a08 = a09 = 1 + a10 = a11 = a12 = a13 = a14 = a15 = a16 = a17 = a18 = a19 = 1 + a20 = a21 = a22 = a23 = a24 = a25 = a26 = a27 = a28 = a29 = 1 + a30 = a31 = a32 = a33 = a34 = a35 = a36 = a37 = a38 = a39 = 1 + a40 = a41 = a42 = a43 = a44 = a45 = a46 = a47 = a48 = a49 = 1 + a50 = a51 = a52 = a53 = a54 = a55 = a56 = a57 = a58 = a59 = 1 + a60 = a61 = a62 = a63 = a64 = a65 = a66 = a67 = a68 = a69 = 1 + a70 = a71 = a72 = a73 = a74 = a75 = a76 = a77 = a78 = a79 = 1 + while True: + print(a00, a01, a62, a63) + print(a64, a65, a78, a79) + + for i in 0, 1, 62, 63: + self.assertInBytecode(f, 'LOAD_FAST', f"a{i:02}") + self.assertNotInBytecode(f, 'LOAD_FAST_CHECK', f"a{i:02}") + for i in 64, 65, 78, 79: + self.assertInBytecode(f, 'LOAD_FAST_CHECK', f"a{i:02}") + self.assertNotInBytecode(f, 'LOAD_FAST', f"a{i:02}") + def test_setting_lineno_adds_check(self): code = textwrap.dedent("""\ def f(): From c54ea39895cc057cd9eac9c64dbc54f66e541d8c Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Thu, 6 Oct 2022 06:36:34 +0000 Subject: [PATCH 03/12] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20b?= =?UTF-8?q?lurb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../2022-10-06-06-36-29.gh-issue-97912.jGRJpa.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2022-10-06-06-36-29.gh-issue-97912.jGRJpa.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-10-06-06-36-29.gh-issue-97912.jGRJpa.rst b/Misc/NEWS.d/next/Core and Builtins/2022-10-06-06-36-29.gh-issue-97912.jGRJpa.rst new file mode 100644 index 00000000000000..bd3d221252b2ae --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2022-10-06-06-36-29.gh-issue-97912.jGRJpa.rst @@ -0,0 +1 @@ +The compiler now avoids quadratic behavior when finding which instructions should use the :opcode:`LOAD_FAST_CHECK` opcode. From 92d6ed1b21f38e37795eaf1aed14143cef55799b Mon Sep 17 00:00:00 2001 From: sweeneyde Date: Thu, 6 Oct 2022 21:16:14 -0400 Subject: [PATCH 04/12] Do within-basicblock analysis beyond the first 64 locals. --- Lib/test/test_peepholer.py | 7 ++++++ Python/compile.c | 46 +++++++++++++++++++++++++++++++++----- 2 files changed, 48 insertions(+), 5 deletions(-) diff --git a/Lib/test/test_peepholer.py b/Lib/test/test_peepholer.py index 7db96924b89576..74dd79a0529a13 100644 --- a/Lib/test/test_peepholer.py +++ b/Lib/test/test_peepholer.py @@ -788,6 +788,7 @@ def f(): a50 = a51 = a52 = a53 = a54 = a55 = a56 = a57 = a58 = a59 = 1 a60 = a61 = a62 = a63 = a64 = a65 = a66 = a67 = a68 = a69 = 1 a70 = a71 = a72 = a73 = a74 = a75 = a76 = a77 = a78 = a79 = 1 + print(a70, a71, a72, a73) while True: print(a00, a01, a62, a63) print(a64, a65, a78, a79) @@ -798,6 +799,12 @@ def f(): for i in 64, 65, 78, 79: self.assertInBytecode(f, 'LOAD_FAST_CHECK', f"a{i:02}") self.assertNotInBytecode(f, 'LOAD_FAST', f"a{i:02}") + for i in 70, 71, 72, 73: + # Even though we don't do the complete analysis beyond the + # first 64 locals, we can at least use LOAD_FAST + # within the same basicblock. + self.assertInBytecode(f, 'LOAD_FAST', f"a{i:02}") + self.assertNotInBytecode(f, 'LOAD_FAST_CHECK', f"a{i:02}") def test_setting_lineno_adds_check(self): code = textwrap.dedent("""\ diff --git a/Python/compile.c b/Python/compile.c index e48d2dd50e27c3..4823963b1bb008 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -7978,18 +7978,54 @@ add_checks_for_loads_of_unknown_variables(basicblock *entryblock, { int nparams = (int)PyList_GET_SIZE(c->u->u_ste->ste_varnames); int nlocals = (int)PyDict_GET_SIZE(c->u->u_varnames); + if (nlocals > 64) { - // Avoid O(nlocals**2) compilation: - // only analyze the first 64 locals. - // The rest get only LOAD_FAST_CHECK. + // To avoid O(nlocals**2) compilation, locals beyond the first 64 + // Are only analyzed one basicblock at a time. Initialization + // information is not passed between basicblocks. + // state[oparg - 64] == blocknum means + // local #oparg is guaranteed to be initialized. + Py_ssize_t *states = PyMem_Calloc(nlocals - 64, sizeof(Py_ssize_t)); + if (states == NULL) { + PyErr_NoMemory(); + return -1; + } + Py_ssize_t blocknum = 0; for (basicblock *b = entryblock; b != NULL; b = b->b_next) { + blocknum++; for (int i = 0; i < b->b_iused; i++) { struct instr *instr = &b->b_instr[i]; - if (instr->i_opcode == LOAD_FAST && instr->i_oparg >= 64) { - instr->i_opcode = LOAD_FAST_CHECK; + assert(instr->i_opcode != EXTENDED_ARG); + assert(instr->i_opcode != EXTENDED_ARG_QUICK); + assert(instr->i_opcode != LOAD_FAST__LOAD_FAST); + assert(instr->i_opcode != STORE_FAST__LOAD_FAST); + assert(instr->i_opcode != LOAD_CONST__LOAD_FAST); + assert(instr->i_opcode != STORE_FAST__STORE_FAST); + assert(instr->i_opcode != LOAD_FAST__LOAD_CONST); + int arg = instr->i_oparg; + if (arg < 64) { + continue; + } + assert(arg >= 0); + switch (instr->i_opcode) { + case DELETE_FAST: + states[arg - 64] = blocknum - 1; + break; + case STORE_FAST: + states[arg - 64] = blocknum; + break; + case LOAD_FAST: + if (states[arg - 64] != blocknum) { + instr->i_opcode = LOAD_FAST_CHECK; + } + states[arg - 64] = blocknum; + break; + case LOAD_FAST_CHECK: + Py_UNREACHABLE(); } } } + PyMem_Free(states); nlocals = 64; } basicblock **stack = make_cfg_traversal_stack(entryblock); From 5d9cd8fd261a6e006b5f3733776c67fb2125edcf Mon Sep 17 00:00:00 2001 From: sweeneyde Date: Thu, 6 Oct 2022 21:30:18 -0400 Subject: [PATCH 05/12] Save a word in basicblocks --- Python/compile.c | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/Python/compile.c b/Python/compile.c index 4823963b1bb008..f9a69a1d32438b 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -258,15 +258,14 @@ typedef struct basicblock_ { int b_iused; /* length of instruction array (b_instr) */ int b_ialloc; + /* Used by add_checks_for_loads_of_unknown_variables */ + uint64_t b_visited_locals_mask; /* Number of predecessors that a block has. */ int b_predecessors; /* depth of stack upon entry of block, computed by stackdepth() */ int b_startdepth; /* instruction offset for block, computed by assemble_jump_offsets() */ int b_offset; - /* Used by add_checks_for_loads_of_unknown_variables */ - uint64_t b_needs_visited_locals_mask; - uint64_t b_already_visited_locals_mask; /* Basic block is an exception handler that preserves lasti */ unsigned b_preserve_lasti : 1; /* Used by compiler passes to mark whether they have visited a basic block. */ @@ -7909,11 +7908,10 @@ maybe_push(basicblock *b, uint64_t unsafe_mask, basicblock ***sp) // Push b if the unsafe mask is giving us any new information. // To avoid overflowing the stack, only allow each block once. // Use b->b_visited=1 to mean that b is currently on the stack. - b->b_needs_visited_locals_mask |= unsafe_mask; - if (b->b_needs_visited_locals_mask - & ~b->b_already_visited_locals_mask) - { - // Still work left to do. + uint64_t both = b->b_visited_locals_mask | unsafe_mask; + if (b->b_visited_locals_mask != both) { + b->b_visited_locals_mask = both; + // More work left to do. if (!b->b_visited) { // not on the stack, so push it. *(*sp)++ = b; @@ -8055,10 +8053,7 @@ add_checks_for_loads_of_unknown_variables(basicblock *entryblock, basicblock *b = *--sp; // mark as no longer on stack b->b_visited = 0; - uint64_t unsafe_mask = b->b_needs_visited_locals_mask; - assert(unsafe_mask & ~b->b_already_visited_locals_mask); - b->b_already_visited_locals_mask = unsafe_mask; - scan_block_for_locals(b, unsafe_mask, &sp); + scan_block_for_locals(b, b->b_visited_locals_mask, &sp); } PyMem_Free(stack); return 0; From a33442064bd9f7d6c9fb49604c7e927feb11c54e Mon Sep 17 00:00:00 2001 From: sweeneyde Date: Fri, 7 Oct 2022 16:44:41 -0400 Subject: [PATCH 06/12] Refactor to add fast_scan_many_locals function --- Python/compile.c | 110 ++++++++++++++++++++++++++--------------------- 1 file changed, 61 insertions(+), 49 deletions(-) diff --git a/Python/compile.c b/Python/compile.c index f9a69a1d32438b..0a2714ee7d7974 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -7921,8 +7921,9 @@ maybe_push(basicblock *b, uint64_t unsafe_mask, basicblock ***sp) } static void -scan_block_for_locals(basicblock *b, uint64_t unsafe_mask, basicblock ***sp) +scan_block_for_locals(basicblock *b, basicblock ***sp) { + uint64_t unsafe_mask = b->b_visited_locals_mask; // mask & (1<b_iused; i++) { struct instr *instr = &b->b_instr[i]; @@ -7970,6 +7971,57 @@ scan_block_for_locals(basicblock *b, uint64_t unsafe_mask, basicblock ***sp) } } +static int +fast_scan_many_locals(basicblock *entryblock, int nlocals) +{ + assert(nlocals > 64); + Py_ssize_t *states = PyMem_Calloc(nlocals - 64, sizeof(Py_ssize_t)); + if (states == NULL) { + PyErr_NoMemory(); + return -1; + } + Py_ssize_t blocknum = 0; + // state[oparg - 64] == blocknum if #oparg is guaranteed to be + // initialized, i.e., if it has had a previous LOAD_FAST or + // STORE_FAST within that basicblock (not followed by DELETE_FAST). + for (basicblock *b = entryblock; b != NULL; b = b->b_next) { + blocknum++; + for (int i = 0; i < b->b_iused; i++) { + struct instr *instr = &b->b_instr[i]; + assert(instr->i_opcode != EXTENDED_ARG); + assert(instr->i_opcode != EXTENDED_ARG_QUICK); + assert(instr->i_opcode != LOAD_FAST__LOAD_FAST); + assert(instr->i_opcode != STORE_FAST__LOAD_FAST); + assert(instr->i_opcode != LOAD_CONST__LOAD_FAST); + assert(instr->i_opcode != STORE_FAST__STORE_FAST); + assert(instr->i_opcode != LOAD_FAST__LOAD_CONST); + int arg = instr->i_oparg; + if (arg < 64) { + continue; + } + assert(arg >= 0); + switch (instr->i_opcode) { + case DELETE_FAST: + states[arg - 64] = blocknum - 1; + break; + case STORE_FAST: + states[arg - 64] = blocknum; + break; + case LOAD_FAST: + if (states[arg - 64] != blocknum) { + instr->i_opcode = LOAD_FAST_CHECK; + } + states[arg - 64] = blocknum; + break; + case LOAD_FAST_CHECK: + Py_UNREACHABLE(); + } + } + } + PyMem_Free(states); + return 0; +} + static int add_checks_for_loads_of_unknown_variables(basicblock *entryblock, struct compiler *c) @@ -7978,52 +8030,12 @@ add_checks_for_loads_of_unknown_variables(basicblock *entryblock, int nlocals = (int)PyDict_GET_SIZE(c->u->u_varnames); if (nlocals > 64) { - // To avoid O(nlocals**2) compilation, locals beyond the first 64 - // Are only analyzed one basicblock at a time. Initialization - // information is not passed between basicblocks. - // state[oparg - 64] == blocknum means - // local #oparg is guaranteed to be initialized. - Py_ssize_t *states = PyMem_Calloc(nlocals - 64, sizeof(Py_ssize_t)); - if (states == NULL) { - PyErr_NoMemory(); + // To avoid O(nlocals**2) compilation, locals beyond the first + // 64 are only analyzed one basicblock at a time: initialization + // info is not passed between basicblocks. + if (fast_scan_many_locals(entryblock, nlocals) < 0) { return -1; } - Py_ssize_t blocknum = 0; - for (basicblock *b = entryblock; b != NULL; b = b->b_next) { - blocknum++; - for (int i = 0; i < b->b_iused; i++) { - struct instr *instr = &b->b_instr[i]; - assert(instr->i_opcode != EXTENDED_ARG); - assert(instr->i_opcode != EXTENDED_ARG_QUICK); - assert(instr->i_opcode != LOAD_FAST__LOAD_FAST); - assert(instr->i_opcode != STORE_FAST__LOAD_FAST); - assert(instr->i_opcode != LOAD_CONST__LOAD_FAST); - assert(instr->i_opcode != STORE_FAST__STORE_FAST); - assert(instr->i_opcode != LOAD_FAST__LOAD_CONST); - int arg = instr->i_oparg; - if (arg < 64) { - continue; - } - assert(arg >= 0); - switch (instr->i_opcode) { - case DELETE_FAST: - states[arg - 64] = blocknum - 1; - break; - case STORE_FAST: - states[arg - 64] = blocknum; - break; - case LOAD_FAST: - if (states[arg - 64] != blocknum) { - instr->i_opcode = LOAD_FAST_CHECK; - } - states[arg - 64] = blocknum; - break; - case LOAD_FAST_CHECK: - Py_UNREACHABLE(); - } - } - } - PyMem_Free(states); nlocals = 64; } basicblock **stack = make_cfg_traversal_stack(entryblock); @@ -8034,7 +8046,6 @@ add_checks_for_loads_of_unknown_variables(basicblock *entryblock, // First origin of being uninitialized: // The non-parameter locals in the entry block. - // is there a bithack for this without UB? uint64_t start_mask = 0; for (int i = nparams; i < nlocals; i++) { start_mask |= (uint64_t)1 << i; @@ -8042,9 +8053,10 @@ add_checks_for_loads_of_unknown_variables(basicblock *entryblock, maybe_push(entryblock, start_mask, &sp); // Second origin of being uninitialized: - // There could be DELETE_FAST somewhere. + // There could be DELETE_FAST somewhere, so + // be sure to scan each basicblock at least once. for (basicblock *b = entryblock; b != NULL; b = b->b_next) { - scan_block_for_locals(b, 0, &sp); + scan_block_for_locals(b, &sp); } // Now propagate the uncertainty from the origins we found: Use @@ -8053,7 +8065,7 @@ add_checks_for_loads_of_unknown_variables(basicblock *entryblock, basicblock *b = *--sp; // mark as no longer on stack b->b_visited = 0; - scan_block_for_locals(b, b->b_visited_locals_mask, &sp); + scan_block_for_locals(b, &sp); } PyMem_Free(stack); return 0; From aa58a516c8879022912e0f8afbdbc8e29559663d Mon Sep 17 00:00:00 2001 From: sweeneyde Date: Fri, 7 Oct 2022 17:03:12 -0400 Subject: [PATCH 07/12] Cover more branches for >64 locals work --- Lib/test/test_peepholer.py | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/Lib/test/test_peepholer.py b/Lib/test/test_peepholer.py index 74dd79a0529a13..7363452f5e132f 100644 --- a/Lib/test/test_peepholer.py +++ b/Lib/test/test_peepholer.py @@ -776,9 +776,10 @@ def f(): self.assertInBytecode(f, 'LOAD_FAST_CHECK') self.assertNotInBytecode(f, 'LOAD_FAST') - def test_load_fast_unknown_too_many_locals(self): - # When there get to be too many locals to analyze, - # later locals are always converted to LOAD_FAST_CHECK. + def test_load_fast_too_many_locals(self): + # When there get to be too many locals to analyze completely, + # later locals are all converted to LOAD_FAST_CHECK, except + # when a store or prior load occurred in the same basicblock. def f(): a00 = a01 = a02 = a03 = a04 = a05 = a06 = a07 = a08 = a09 = 1 a10 = a11 = a12 = a13 = a14 = a15 = a16 = a17 = a18 = a19 = 1 @@ -788,23 +789,31 @@ def f(): a50 = a51 = a52 = a53 = a54 = a55 = a56 = a57 = a58 = a59 = 1 a60 = a61 = a62 = a63 = a64 = a65 = a66 = a67 = a68 = a69 = 1 a70 = a71 = a72 = a73 = a74 = a75 = a76 = a77 = a78 = a79 = 1 + del a72, a73 + print(a73) print(a70, a71, a72, a73) while True: print(a00, a01, a62, a63) print(a64, a65, a78, a79) for i in 0, 1, 62, 63: + # First 64 locals: analyze completely self.assertInBytecode(f, 'LOAD_FAST', f"a{i:02}") self.assertNotInBytecode(f, 'LOAD_FAST_CHECK', f"a{i:02}") for i in 64, 65, 78, 79: + # Locals >=64 not in the same basicblock self.assertInBytecode(f, 'LOAD_FAST_CHECK', f"a{i:02}") self.assertNotInBytecode(f, 'LOAD_FAST', f"a{i:02}") - for i in 70, 71, 72, 73: - # Even though we don't do the complete analysis beyond the - # first 64 locals, we can at least use LOAD_FAST - # within the same basicblock. + for i in 70, 71: + # Locals >=64 in the same basicblock self.assertInBytecode(f, 'LOAD_FAST', f"a{i:02}") self.assertNotInBytecode(f, 'LOAD_FAST_CHECK', f"a{i:02}") + # del statements should invalidate within basicblocks. + self.assertInBytecode(f, 'LOAD_FAST_CHECK', "a72") + self.assertNotInBytecode(f, 'LOAD_FAST', "a72") + # previous checked loads within a basicblock enable unchecked loads + self.assertInBytecode(f, 'LOAD_FAST_CHECK', "a73") + self.assertInBytecode(f, 'LOAD_FAST', "a73") def test_setting_lineno_adds_check(self): code = textwrap.dedent("""\ From b07e5202c2167a85189c90390193e09d24442d7f Mon Sep 17 00:00:00 2001 From: sweeneyde Date: Fri, 7 Oct 2022 17:07:24 -0400 Subject: [PATCH 08/12] rename: b_unsafe_locals_mask --- Python/compile.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Python/compile.c b/Python/compile.c index 0a2714ee7d7974..d88f92cad81202 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -259,7 +259,7 @@ typedef struct basicblock_ { /* length of instruction array (b_instr) */ int b_ialloc; /* Used by add_checks_for_loads_of_unknown_variables */ - uint64_t b_visited_locals_mask; + uint64_t b_unsafe_locals_mask; /* Number of predecessors that a block has. */ int b_predecessors; /* depth of stack upon entry of block, computed by stackdepth() */ @@ -7908,9 +7908,9 @@ maybe_push(basicblock *b, uint64_t unsafe_mask, basicblock ***sp) // Push b if the unsafe mask is giving us any new information. // To avoid overflowing the stack, only allow each block once. // Use b->b_visited=1 to mean that b is currently on the stack. - uint64_t both = b->b_visited_locals_mask | unsafe_mask; - if (b->b_visited_locals_mask != both) { - b->b_visited_locals_mask = both; + uint64_t both = b->b_unsafe_locals_mask | unsafe_mask; + if (b->b_unsafe_locals_mask != both) { + b->b_unsafe_locals_mask = both; // More work left to do. if (!b->b_visited) { // not on the stack, so push it. @@ -7923,7 +7923,7 @@ maybe_push(basicblock *b, uint64_t unsafe_mask, basicblock ***sp) static void scan_block_for_locals(basicblock *b, basicblock ***sp) { - uint64_t unsafe_mask = b->b_visited_locals_mask; + uint64_t unsafe_mask = b->b_unsafe_locals_mask; // mask & (1<b_iused; i++) { struct instr *instr = &b->b_instr[i]; From 01bcfb5d38e5612bdb5bba226cf67d0a54ed5176 Mon Sep 17 00:00:00 2001 From: sweeneyde Date: Fri, 7 Oct 2022 17:18:54 -0400 Subject: [PATCH 09/12] nlocals==0 check --- Python/compile.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Python/compile.c b/Python/compile.c index d88f92cad81202..bb3b6d17afdba6 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -8028,7 +8028,9 @@ add_checks_for_loads_of_unknown_variables(basicblock *entryblock, { int nparams = (int)PyList_GET_SIZE(c->u->u_ste->ste_varnames); int nlocals = (int)PyDict_GET_SIZE(c->u->u_varnames); - + if (nlocals == 0) { + return 0; + } if (nlocals > 64) { // To avoid O(nlocals**2) compilation, locals beyond the first // 64 are only analyzed one basicblock at a time: initialization From 7c442955e153d64b0fe07caeaf91c6ab7b3919d7 Mon Sep 17 00:00:00 2001 From: sweeneyde Date: Fri, 7 Oct 2022 17:52:33 -0400 Subject: [PATCH 10/12] Reorder cases --- Python/compile.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Python/compile.c b/Python/compile.c index bb3b6d17afdba6..cca9f9262bb897 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -7946,17 +7946,17 @@ scan_block_for_locals(basicblock *b, basicblock ***sp) case DELETE_FAST: unsafe_mask |= bit; break; - case LOAD_FAST: - // If this doesn't raise, then var is defined. - if (unsafe_mask & bit) { - instr->i_opcode = LOAD_FAST_CHECK; - } + case STORE_FAST: unsafe_mask &= ~bit; break; case LOAD_FAST_CHECK: + // If this doesn't raise, then the local is defined. unsafe_mask &= ~bit; break; - case STORE_FAST: + case LOAD_FAST: + if (unsafe_mask & bit) { + instr->i_opcode = LOAD_FAST_CHECK; + } unsafe_mask &= ~bit; break; } From d206d28c19cc979818b7436528837314829ab656 Mon Sep 17 00:00:00 2001 From: sweeneyde Date: Fri, 14 Oct 2022 19:37:36 -0400 Subject: [PATCH 11/12] improve comments --- Python/compile.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Python/compile.c b/Python/compile.c index 6e613a32c639fe..011a55fcea865f 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -7934,8 +7934,8 @@ maybe_push(basicblock *b, uint64_t unsafe_mask, basicblock ***sp) static void scan_block_for_locals(basicblock *b, basicblock ***sp) { + // bit i is set if local i is potentially uninitialized uint64_t unsafe_mask = b->b_unsafe_locals_mask; - // mask & (1<b_iused; i++) { struct instr *instr = &b->b_instr[i]; assert(instr->i_opcode != EXTENDED_ARG); @@ -7992,8 +7992,8 @@ fast_scan_many_locals(basicblock *entryblock, int nlocals) return -1; } Py_ssize_t blocknum = 0; - // state[oparg - 64] == blocknum if #oparg is guaranteed to be - // initialized, i.e., if it has had a previous LOAD_FAST or + // state[i - 64] == blocknum if local i is guaranteed to + // be initialized, i.e., if it has had a previous LOAD_FAST or // STORE_FAST within that basicblock (not followed by DELETE_FAST). for (basicblock *b = entryblock; b != NULL; b = b->b_next) { blocknum++; @@ -8037,7 +8037,6 @@ static int add_checks_for_loads_of_uninitialized_variables(basicblock *entryblock, struct compiler *c) { - int nparams = (int)PyList_GET_SIZE(c->u->u_ste->ste_varnames); int nlocals = (int)PyDict_GET_SIZE(c->u->u_varnames); if (nlocals == 0) { return 0; @@ -8059,6 +8058,7 @@ add_checks_for_loads_of_uninitialized_variables(basicblock *entryblock, // First origin of being uninitialized: // The non-parameter locals in the entry block. + int nparams = (int)PyList_GET_SIZE(c->u->u_ste->ste_varnames); uint64_t start_mask = 0; for (int i = nparams; i < nlocals; i++) { start_mask |= (uint64_t)1 << i; From 096650f0884e14d0df1fe03a14f0274b06eb9ce3 Mon Sep 17 00:00:00 2001 From: sweeneyde Date: Thu, 20 Oct 2022 15:16:26 -0400 Subject: [PATCH 12/12] IS_SUPERINSTRUCTION_OPCODE --- Python/compile.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/Python/compile.c b/Python/compile.c index 1f787647dcd7c4..43110167c057ad 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -114,6 +114,13 @@ (opcode) == RAISE_VARARGS || \ (opcode) == RERAISE) +#define IS_SUPERINSTRUCTION_OPCODE(opcode) \ + ((opcode) == LOAD_FAST__LOAD_FAST || \ + (opcode) == LOAD_FAST__LOAD_CONST || \ + (opcode) == LOAD_CONST__LOAD_FAST || \ + (opcode) == STORE_FAST__LOAD_FAST || \ + (opcode) == STORE_FAST__STORE_FAST) + #define IS_TOP_LEVEL_AWAIT(c) ( \ (c->c_flags->cf_flags & PyCF_ALLOW_TOP_LEVEL_AWAIT) \ && (c->u->u_ste->ste_type == ModuleBlock)) @@ -8068,11 +8075,7 @@ scan_block_for_locals(basicblock *b, basicblock ***sp) struct instr *instr = &b->b_instr[i]; assert(instr->i_opcode != EXTENDED_ARG); assert(instr->i_opcode != EXTENDED_ARG_QUICK); - assert(instr->i_opcode != LOAD_FAST__LOAD_FAST); - assert(instr->i_opcode != STORE_FAST__LOAD_FAST); - assert(instr->i_opcode != LOAD_CONST__LOAD_FAST); - assert(instr->i_opcode != STORE_FAST__STORE_FAST); - assert(instr->i_opcode != LOAD_FAST__LOAD_CONST); + assert(!IS_SUPERINSTRUCTION_OPCODE(instr->i_opcode)); if (instr->i_except != NULL) { maybe_push(instr->i_except, unsafe_mask, sp); } @@ -8129,11 +8132,7 @@ fast_scan_many_locals(basicblock *entryblock, int nlocals) struct instr *instr = &b->b_instr[i]; assert(instr->i_opcode != EXTENDED_ARG); assert(instr->i_opcode != EXTENDED_ARG_QUICK); - assert(instr->i_opcode != LOAD_FAST__LOAD_FAST); - assert(instr->i_opcode != STORE_FAST__LOAD_FAST); - assert(instr->i_opcode != LOAD_CONST__LOAD_FAST); - assert(instr->i_opcode != STORE_FAST__STORE_FAST); - assert(instr->i_opcode != LOAD_FAST__LOAD_CONST); + assert(!IS_SUPERINSTRUCTION_OPCODE(instr->i_opcode)); int arg = instr->i_oparg; if (arg < 64) { continue;