Thanks to visit codestin.com
Credit goes to github.com

Skip to content

GH-118095: Make invalidating and clearing executors memory safe #118459

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
May 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion Include/cpython/optimizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ typedef struct {
uint8_t opcode;
uint8_t oparg;
uint8_t valid;
uint8_t linked;
int index; // Index of ENTER_EXECUTOR (if code isn't NULL, below).
_PyBloomFilter bloom;
_PyExecutorLinkListNode links;
Expand Down Expand Up @@ -135,7 +136,7 @@ PyAPI_FUNC(_PyOptimizerObject *) PyUnstable_GetOptimizer(void);
PyAPI_FUNC(_PyExecutorObject *) PyUnstable_GetExecutor(PyCodeObject *code, int offset);

void _Py_ExecutorInit(_PyExecutorObject *, const _PyBloomFilter *);
void _Py_ExecutorClear(_PyExecutorObject *);
void _Py_ExecutorDetach(_PyExecutorObject *);
void _Py_BloomFilter_Init(_PyBloomFilter *);
void _Py_BloomFilter_Add(_PyBloomFilter *bloom, void *obj);
PyAPI_FUNC(void) _Py_Executor_DependsOn(_PyExecutorObject *executor, void *obj);
Expand Down
3 changes: 2 additions & 1 deletion Objects/codeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -1504,7 +1504,8 @@ clear_executors(PyCodeObject *co)
assert(co->co_executors);
for (int i = 0; i < co->co_executors->size; i++) {
if (co->co_executors->executors[i]) {
_Py_ExecutorClear(co->co_executors->executors[i]);
_Py_ExecutorDetach(co->co_executors->executors[i]);
assert(co->co_executors->executors[i] == NULL);
}
}
PyMem_Free(co->co_executors);
Expand Down
12 changes: 9 additions & 3 deletions Python/bytecodes.c
Original file line number Diff line number Diff line change
Expand Up @@ -163,9 +163,15 @@ dummy_func(
if ((oparg & RESUME_OPARG_LOCATION_MASK) < RESUME_AFTER_YIELD_FROM) {
CHECK_EVAL_BREAKER();
}
#if ENABLE_SPECIALIZATION
FT_ATOMIC_STORE_UINT8_RELAXED(this_instr->op.code, RESUME_CHECK);
#endif /* ENABLE_SPECIALIZATION */
assert(this_instr->op.code == RESUME ||
this_instr->op.code == RESUME_CHECK ||
this_instr->op.code == INSTRUMENTED_RESUME ||
this_instr->op.code == ENTER_EXECUTOR);
if (this_instr->op.code == RESUME) {
#if ENABLE_SPECIALIZATION
FT_ATOMIC_STORE_UINT8_RELAXED(this_instr->op.code, RESUME_CHECK);
#endif /* ENABLE_SPECIALIZATION */
}
}
}

Expand Down
12 changes: 9 additions & 3 deletions Python/generated_cases.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

115 changes: 81 additions & 34 deletions Python/optimizer.c
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ insert_executor(PyCodeObject *code, _Py_CODEUNIT *instr, int index, _PyExecutorO
Py_INCREF(executor);
if (instr->op.code == ENTER_EXECUTOR) {
assert(index == instr->op.arg);
_Py_ExecutorClear(code->co_executors->executors[index]);
_Py_ExecutorDetach(code->co_executors->executors[index]);
}
else {
assert(code->co_executors->size == index);
Expand Down Expand Up @@ -270,10 +270,14 @@ static PyMethodDef executor_methods[] = {

///////////////////// Experimental UOp Optimizer /////////////////////

static int executor_clear(_PyExecutorObject *executor);
static void unlink_executor(_PyExecutorObject *executor);

static void
uop_dealloc(_PyExecutorObject *self) {
_PyObject_GC_UNTRACK(self);
_Py_ExecutorClear(self);
assert(self->vm_data.code == NULL);
unlink_executor(self);
#ifdef _Py_JIT
_PyJIT_Free(self);
#endif
Expand Down Expand Up @@ -379,13 +383,6 @@ PySequenceMethods uop_as_sequence = {
.sq_item = (ssizeargfunc)uop_item,
};

static int
executor_clear(PyObject *o)
{
_Py_ExecutorClear((_PyExecutorObject *)o);
return 0;
}

static int
executor_traverse(PyObject *o, visitproc visit, void *arg)
{
Expand All @@ -412,7 +409,7 @@ PyTypeObject _PyUOpExecutor_Type = {
.tp_as_sequence = &uop_as_sequence,
.tp_methods = executor_methods,
.tp_traverse = executor_traverse,
.tp_clear = executor_clear,
.tp_clear = (inquiry)executor_clear,
.tp_is_gc = executor_is_gc,
};

Expand Down Expand Up @@ -1190,6 +1187,7 @@ init_cold_exit_executor(_PyExecutorObject *executor, int oparg)
inst->opcode = _COLD_EXIT;
inst->oparg = oparg;
executor->vm_data.valid = true;
executor->vm_data.linked = false;
for (int i = 0; i < BLOOM_FILTER_WORDS; i++) {
assert(executor->vm_data.bloom.bits[i] == 0);
}
Expand Down Expand Up @@ -1328,7 +1326,7 @@ PyTypeObject _PyCounterExecutor_Type = {
.tp_dealloc = (destructor)counter_dealloc,
.tp_methods = executor_methods,
.tp_traverse = executor_traverse,
.tp_clear = executor_clear,
.tp_clear = (inquiry)executor_clear,
};

static int
Expand Down Expand Up @@ -1503,23 +1501,25 @@ link_executor(_PyExecutorObject *executor)
links->next = NULL;
}
else {
_PyExecutorObject *next = head->vm_data.links.next;
links->previous = head;
links->next = next;
if (next != NULL) {
next->vm_data.links.previous = executor;
}
head->vm_data.links.next = executor;
assert(head->vm_data.links.previous == NULL);
links->previous = NULL;
links->next = head;
head->vm_data.links.previous = executor;
interp->executor_list_head = executor;
}
executor->vm_data.valid = true;
executor->vm_data.linked = true;
/* executor_list_head must be first in list */
assert(interp->executor_list_head->vm_data.links.previous == NULL);
}

static void
unlink_executor(_PyExecutorObject *executor)
{
if (!executor->vm_data.linked) {
return;
}
_PyExecutorLinkListNode *links = &executor->vm_data.links;
assert(executor->vm_data.valid);
_PyExecutorObject *next = links->next;
_PyExecutorObject *prev = links->previous;
if (next != NULL) {
Expand All @@ -1534,7 +1534,7 @@ unlink_executor(_PyExecutorObject *executor)
assert(interp->executor_list_head == executor);
interp->executor_list_head = next;
}
executor->vm_data.valid = false;
executor->vm_data.linked = false;
}

/* This must be called by optimizers before using the executor */
Expand All @@ -1548,31 +1548,52 @@ _Py_ExecutorInit(_PyExecutorObject *executor, const _PyBloomFilter *dependency_s
link_executor(executor);
}

/* This must be called by executors during dealloc */
/* Detaches the executor from the code object (if any) that
* holds a reference to it */
void
_Py_ExecutorClear(_PyExecutorObject *executor)
_Py_ExecutorDetach(_PyExecutorObject *executor)
{
if (!executor->vm_data.valid) {
return;
}
unlink_executor(executor);
PyCodeObject *code = executor->vm_data.code;
if (code == NULL) {
return;
}
for (uint32_t i = 0; i < executor->exit_count; i++) {
Py_DECREF(executor->exits[i].executor);
executor->exits[i].executor = &COLD_EXITS[i];
executor->exits[i].temperature = initial_unreachable_backoff_counter();
}
_Py_CODEUNIT *instruction = &_PyCode_CODE(code)[executor->vm_data.index];
assert(instruction->op.code == ENTER_EXECUTOR);
int index = instruction->op.arg;
assert(code->co_executors->executors[index] == executor);
instruction->op.code = executor->vm_data.opcode;
instruction->op.arg = executor->vm_data.oparg;
executor->vm_data.code = NULL;
Py_CLEAR(code->co_executors->executors[index]);
code->co_executors->executors[index] = NULL;
Py_DECREF(executor);
}

static int
executor_clear(_PyExecutorObject *executor)
{
if (!executor->vm_data.valid) {
return 0;
}
assert(executor->vm_data.valid == 1);
unlink_executor(executor);
executor->vm_data.valid = 0;
/* It is possible for an executor to form a reference
* cycle with itself, so decref'ing a side exit could
* free the executor unless we hold a strong reference to it
*/
Py_INCREF(executor);
for (uint32_t i = 0; i < executor->exit_count; i++) {
const _PyExecutorObject *cold = &COLD_EXITS[i];
const _PyExecutorObject *side = executor->exits[i].executor;
executor->exits[i].temperature = initial_unreachable_backoff_counter();
if (side != cold) {
executor->exits[i].executor = cold;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like the executor owns the reference to exits[i].executor when that is not equal to the cold exit, but not when it is equal. That is potentially confusing.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It owns a reference in both cases, it is just that cold exit executors are immortal, so we can be lose with the refcounting.

Py_DECREF(side);
}
}
_Py_ExecutorDetach(executor);
Py_DECREF(executor);
return 0;
}

void
Expand All @@ -1593,17 +1614,42 @@ _Py_Executors_InvalidateDependency(PyInterpreterState *interp, void *obj, int is
_Py_BloomFilter_Add(&obj_filter, obj);
/* Walk the list of executors */
/* TO DO -- Use a tree to avoid traversing as many objects */
bool no_memory = false;
PyObject *invalidate = PyList_New(0);
if (invalidate == NULL) {
PyErr_Clear();
no_memory = true;
}
/* Clearing an executor can deallocate others, so we need to make a list of
* executors to invalidate first */
for (_PyExecutorObject *exec = interp->executor_list_head; exec != NULL;) {
assert(exec->vm_data.valid);
_PyExecutorObject *next = exec->vm_data.links.next;
if (bloom_filter_may_contain(&exec->vm_data.bloom, &obj_filter)) {
_Py_ExecutorClear(exec);
unlink_executor(exec);
if (no_memory) {
exec->vm_data.valid = 0;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's really ok to just skip clearing exec in this case?

Copy link
Member Author

@markshannon markshannon May 1, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We never execute an executor with valid == 0, so it does what says and invalidates the dependent executor.

It might delay later re-optimizations and defer reclaiming some objects.
We've hit a memory error at this point, so the program is probably dying anyway.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We've hit a memory error at this point, so the program is probably dying anyway.

That feels like a dangerous assumption -- an app may have a gigabyte array of temp data somewhere that it can free and retry.

} else {
if (PyList_Append(invalidate, (PyObject *)exec) < 0) {
PyErr_Clear();
no_memory = true;
exec->vm_data.valid = 0;
}
}
if (is_invalidation) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't see any calls where this flag is passed as 0, so maybe the flag argument is no longer needed?

OPT_STAT_INC(executors_invalidated);
}
}
exec = next;
}
if (invalidate != NULL) {
for (Py_ssize_t i = 0; i < PyList_GET_SIZE(invalidate); i++) {
_PyExecutorObject *exec = (_PyExecutorObject *)PyList_GET_ITEM(invalidate, i);
executor_clear(exec);
}
Py_DECREF(invalidate);
}
return;
}

/* Invalidate all executors */
Expand All @@ -1612,12 +1658,13 @@ _Py_Executors_InvalidateAll(PyInterpreterState *interp, int is_invalidation)
{
while (interp->executor_list_head) {
_PyExecutorObject *executor = interp->executor_list_head;
assert(executor->vm_data.valid == 1 && executor->vm_data.linked == 1);
if (executor->vm_data.code) {
// Clear the entire code object so its co_executors array be freed:
_PyCode_Clear_Executors(executor->vm_data.code);
}
else {
_Py_ExecutorClear(executor);
executor_clear(executor);
}
if (is_invalidation) {
OPT_STAT_INC(executors_invalidated);
Expand Down
Loading