Thanks to visit codestin.com
Credit goes to github.com

Skip to content

gh-97912: Avoid quadratic behavior when adding LOAD_FAST_CHECK #97952

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
Oct 20, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 39 additions & 0 deletions Lib/test/test_peepholer.py
Original file line number Diff line number Diff line change
Expand Up @@ -776,6 +776,45 @@ def f():
self.assertInBytecode(f, 'LOAD_FAST_CHECK')
self.assertNotInBytecode(f, 'LOAD_FAST')

def test_load_fast_too_many_locals(self):
# When there get to be too many locals to analyze completely,
# later locals are all converted to LOAD_FAST_CHECK, except
# when a store or prior load occurred in the same basicblock.
def f():
a00 = a01 = a02 = a03 = a04 = a05 = a06 = a07 = a08 = a09 = 1
a10 = a11 = a12 = a13 = a14 = a15 = a16 = a17 = a18 = a19 = 1
a20 = a21 = a22 = a23 = a24 = a25 = a26 = a27 = a28 = a29 = 1
a30 = a31 = a32 = a33 = a34 = a35 = a36 = a37 = a38 = a39 = 1
a40 = a41 = a42 = a43 = a44 = a45 = a46 = a47 = a48 = a49 = 1
a50 = a51 = a52 = a53 = a54 = a55 = a56 = a57 = a58 = a59 = 1
a60 = a61 = a62 = a63 = a64 = a65 = a66 = a67 = a68 = a69 = 1
a70 = a71 = a72 = a73 = a74 = a75 = a76 = a77 = a78 = a79 = 1
del a72, a73
print(a73)
print(a70, a71, a72, a73)
while True:
print(a00, a01, a62, a63)
print(a64, a65, a78, a79)

for i in 0, 1, 62, 63:
# First 64 locals: analyze completely
self.assertInBytecode(f, 'LOAD_FAST', f"a{i:02}")
self.assertNotInBytecode(f, 'LOAD_FAST_CHECK', f"a{i:02}")
for i in 64, 65, 78, 79:
# Locals >=64 not in the same basicblock
self.assertInBytecode(f, 'LOAD_FAST_CHECK', f"a{i:02}")
self.assertNotInBytecode(f, 'LOAD_FAST', f"a{i:02}")
for i in 70, 71:
# Locals >=64 in the same basicblock
self.assertInBytecode(f, 'LOAD_FAST', f"a{i:02}")
self.assertNotInBytecode(f, 'LOAD_FAST_CHECK', f"a{i:02}")
# del statements should invalidate within basicblocks.
self.assertInBytecode(f, 'LOAD_FAST_CHECK', "a72")
self.assertNotInBytecode(f, 'LOAD_FAST', "a72")
# previous checked loads within a basicblock enable unchecked loads
self.assertInBytecode(f, 'LOAD_FAST_CHECK', "a73")
self.assertInBytecode(f, 'LOAD_FAST', "a73")

def test_setting_lineno_adds_check(self):
code = textwrap.dedent("""\
def f():
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
The compiler now avoids quadratic behavior when finding which instructions should use the :opcode:`LOAD_FAST_CHECK` opcode.
199 changes: 135 additions & 64 deletions Python/compile.c
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,13 @@
(opcode) == RAISE_VARARGS || \
(opcode) == RERAISE)

#define IS_SUPERINSTRUCTION_OPCODE(opcode) \
((opcode) == LOAD_FAST__LOAD_FAST || \
(opcode) == LOAD_FAST__LOAD_CONST || \
(opcode) == LOAD_CONST__LOAD_FAST || \
(opcode) == STORE_FAST__LOAD_FAST || \
(opcode) == STORE_FAST__STORE_FAST)

#define IS_TOP_LEVEL_AWAIT(c) ( \
(c->c_flags->cf_flags & PyCF_ALLOW_TOP_LEVEL_AWAIT) \
&& (c->u->u_ste->ste_type == ModuleBlock))
Expand Down Expand Up @@ -258,6 +265,8 @@ typedef struct basicblock_ {
int b_iused;
/* length of instruction array (b_instr) */
int b_ialloc;
/* Used by add_checks_for_loads_of_unknown_variables */
uint64_t b_unsafe_locals_mask;
/* Number of predecessors that a block has. */
int b_predecessors;
/* depth of stack upon entry of block, computed by stackdepth() */
Expand Down Expand Up @@ -8038,103 +8047,165 @@ assemble_jump_offsets(basicblock *entryblock)
}


// Ensure each basicblock is only put onto the stack once.
#define MAYBE_PUSH(B) do { \
if ((B)->b_visited == 0) { \
*(*stack_top)++ = (B); \
(B)->b_visited = 1; \
} \
} while (0)
// helper functions for add_checks_for_loads_of_unknown_variables
static inline void
maybe_push(basicblock *b, uint64_t unsafe_mask, basicblock ***sp)
{
// Push b if the unsafe mask is giving us any new information.
// To avoid overflowing the stack, only allow each block once.
// Use b->b_visited=1 to mean that b is currently on the stack.
uint64_t both = b->b_unsafe_locals_mask | unsafe_mask;
if (b->b_unsafe_locals_mask != both) {
b->b_unsafe_locals_mask = both;
// More work left to do.
if (!b->b_visited) {
// not on the stack, so push it.
*(*sp)++ = b;
b->b_visited = 1;
}
}
}

static void
scan_block_for_local(int target, basicblock *b, bool unsafe_to_start,
basicblock ***stack_top)
scan_block_for_locals(basicblock *b, basicblock ***sp)
{
bool unsafe = unsafe_to_start;
// bit i is set if local i is potentially uninitialized
uint64_t unsafe_mask = b->b_unsafe_locals_mask;
for (int i = 0; i < b->b_iused; i++) {
struct instr *instr = &b->b_instr[i];
assert(instr->i_opcode != EXTENDED_ARG);
assert(instr->i_opcode != EXTENDED_ARG_QUICK);
assert(instr->i_opcode != LOAD_FAST__LOAD_FAST);
assert(instr->i_opcode != STORE_FAST__LOAD_FAST);
assert(instr->i_opcode != LOAD_CONST__LOAD_FAST);
assert(instr->i_opcode != STORE_FAST__STORE_FAST);
assert(instr->i_opcode != LOAD_FAST__LOAD_CONST);
if (unsafe && instr->i_except != NULL) {
MAYBE_PUSH(instr->i_except);
}
if (instr->i_oparg != target) {
assert(!IS_SUPERINSTRUCTION_OPCODE(instr->i_opcode));
if (instr->i_except != NULL) {
maybe_push(instr->i_except, unsafe_mask, sp);
}
if (instr->i_oparg >= 64) {
continue;
}
assert(instr->i_oparg >= 0);
uint64_t bit = (uint64_t)1 << instr->i_oparg;
switch (instr->i_opcode) {
case DELETE_FAST:
unsafe_mask |= bit;
break;
case STORE_FAST:
unsafe_mask &= ~bit;
break;
case LOAD_FAST_CHECK:
// if this doesn't raise, then var is defined
unsafe = false;
// If this doesn't raise, then the local is defined.
unsafe_mask &= ~bit;
break;
case LOAD_FAST:
if (unsafe) {
if (unsafe_mask & bit) {
instr->i_opcode = LOAD_FAST_CHECK;
}
unsafe = false;
break;
case STORE_FAST:
unsafe = false;
break;
case DELETE_FAST:
unsafe = true;
unsafe_mask &= ~bit;
break;
}
}
if (unsafe) {
// unsafe at end of this block,
// so unsafe at start of next blocks
if (b->b_next && BB_HAS_FALLTHROUGH(b)) {
MAYBE_PUSH(b->b_next);
}
struct instr *last = basicblock_last_instr(b);
if (last != NULL) {
if (is_jump(last)) {
assert(last->i_target != NULL);
MAYBE_PUSH(last->i_target);
if (b->b_next && BB_HAS_FALLTHROUGH(b)) {
maybe_push(b->b_next, unsafe_mask, sp);
}
struct instr *last = basicblock_last_instr(b);
if (last && is_jump(last)) {
assert(last->i_target != NULL);
maybe_push(last->i_target, unsafe_mask, sp);
}
}

static int
fast_scan_many_locals(basicblock *entryblock, int nlocals)
{
assert(nlocals > 64);
Py_ssize_t *states = PyMem_Calloc(nlocals - 64, sizeof(Py_ssize_t));
if (states == NULL) {
PyErr_NoMemory();
return -1;
}
Py_ssize_t blocknum = 0;
// state[i - 64] == blocknum if local i is guaranteed to
// be initialized, i.e., if it has had a previous LOAD_FAST or
// STORE_FAST within that basicblock (not followed by DELETE_FAST).
for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
blocknum++;
for (int i = 0; i < b->b_iused; i++) {
struct instr *instr = &b->b_instr[i];
assert(instr->i_opcode != EXTENDED_ARG);
assert(instr->i_opcode != EXTENDED_ARG_QUICK);
assert(!IS_SUPERINSTRUCTION_OPCODE(instr->i_opcode));
int arg = instr->i_oparg;
if (arg < 64) {
continue;
}
assert(arg >= 0);
switch (instr->i_opcode) {
case DELETE_FAST:
states[arg - 64] = blocknum - 1;
break;
case STORE_FAST:
states[arg - 64] = blocknum;
break;
case LOAD_FAST:
if (states[arg - 64] != blocknum) {
instr->i_opcode = LOAD_FAST_CHECK;
}
states[arg - 64] = blocknum;
break;
case LOAD_FAST_CHECK:
Py_UNREACHABLE();
}
}
}
PyMem_Free(states);
return 0;
}
#undef MAYBE_PUSH

static int
add_checks_for_loads_of_uninitialized_variables(basicblock *entryblock,
struct compiler *c)
{
int nlocals = (int)PyDict_GET_SIZE(c->u->u_varnames);
if (nlocals == 0) {
return 0;
}
if (nlocals > 64) {
// To avoid O(nlocals**2) compilation, locals beyond the first
// 64 are only analyzed one basicblock at a time: initialization
// info is not passed between basicblocks.
if (fast_scan_many_locals(entryblock, nlocals) < 0) {
return -1;
}
nlocals = 64;
}
basicblock **stack = make_cfg_traversal_stack(entryblock);
if (stack == NULL) {
return -1;
}
Py_ssize_t nparams = PyList_GET_SIZE(c->u->u_ste->ste_varnames);
int nlocals = (int)PyDict_GET_SIZE(c->u->u_varnames);
for (int target = 0; target < nlocals; target++) {
for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
b->b_visited = 0;
}
basicblock **stack_top = stack;
basicblock **sp = stack;

// First pass: find the relevant DFS starting points:
// the places where "being uninitialized" originates,
// which are the entry block and any DELETE_FAST statements.
if (target >= nparams) {
// only non-parameter locals start out uninitialized.
*(stack_top++) = entryblock;
entryblock->b_visited = 1;
}
for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
scan_block_for_local(target, b, false, &stack_top);
}
// First origin of being uninitialized:
// The non-parameter locals in the entry block.
int nparams = (int)PyList_GET_SIZE(c->u->u_ste->ste_varnames);
uint64_t start_mask = 0;
for (int i = nparams; i < nlocals; i++) {
start_mask |= (uint64_t)1 << i;
}
maybe_push(entryblock, start_mask, &sp);

// Second pass: Depth-first search to propagate uncertainty
while (stack_top > stack) {
basicblock *b = *--stack_top;
scan_block_for_local(target, b, true, &stack_top);
}
// Second origin of being uninitialized:
// There could be DELETE_FAST somewhere, so
// be sure to scan each basicblock at least once.
for (basicblock *b = entryblock; b != NULL; b = b->b_next) {
scan_block_for_locals(b, &sp);
}

// Now propagate the uncertainty from the origins we found: Use
// LOAD_FAST_CHECK for any LOAD_FAST where the local could be undefined.
while (sp > stack) {
basicblock *b = *--sp;
// mark as no longer on stack
b->b_visited = 0;
scan_block_for_locals(b, &sp);
}
PyMem_Free(stack);
return 0;
Expand Down