From 5805184689d930ac0a7b36799b25a20f9d3e1960 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 1 May 2024 08:26:09 +0100 Subject: [PATCH 1/3] Make invalidating and clearing executors memory safe --- Include/cpython/optimizer.h | 3 +- Objects/codeobject.c | 3 +- Python/optimizer.c | 114 +++++++++++++++++++++++++----------- 3 files changed, 84 insertions(+), 36 deletions(-) diff --git a/Include/cpython/optimizer.h b/Include/cpython/optimizer.h index 819251a25bb242..a169280b26a6ad 100644 --- a/Include/cpython/optimizer.h +++ b/Include/cpython/optimizer.h @@ -24,6 +24,7 @@ typedef struct { uint8_t opcode; uint8_t oparg; uint8_t valid; + uint8_t linked; int index; // Index of ENTER_EXECUTOR (if code isn't NULL, below). _PyBloomFilter bloom; _PyExecutorLinkListNode links; @@ -135,7 +136,7 @@ PyAPI_FUNC(_PyOptimizerObject *) PyUnstable_GetOptimizer(void); PyAPI_FUNC(_PyExecutorObject *) PyUnstable_GetExecutor(PyCodeObject *code, int offset); void _Py_ExecutorInit(_PyExecutorObject *, const _PyBloomFilter *); -void _Py_ExecutorClear(_PyExecutorObject *); +void _Py_ExecutorDetach(_PyExecutorObject *); void _Py_BloomFilter_Init(_PyBloomFilter *); void _Py_BloomFilter_Add(_PyBloomFilter *bloom, void *obj); PyAPI_FUNC(void) _Py_Executor_DependsOn(_PyExecutorObject *executor, void *obj); diff --git a/Objects/codeobject.c b/Objects/codeobject.c index 014632962bfcf3..dfcff44b7f1d35 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -1502,7 +1502,8 @@ clear_executors(PyCodeObject *co) assert(co->co_executors); for (int i = 0; i < co->co_executors->size; i++) { if (co->co_executors->executors[i]) { - _Py_ExecutorClear(co->co_executors->executors[i]); + _Py_ExecutorDetach(co->co_executors->executors[i]); + assert(co->co_executors->executors[i] == NULL); } } PyMem_Free(co->co_executors); diff --git a/Python/optimizer.c b/Python/optimizer.c index a9a35fc902018a..2211e1718139e0 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -73,7 +73,7 @@ insert_executor(PyCodeObject *code, _Py_CODEUNIT *instr, int index, _PyExecutorO Py_INCREF(executor); if (instr->op.code == ENTER_EXECUTOR) { assert(index == instr->op.arg); - _Py_ExecutorClear(code->co_executors->executors[index]); + _Py_ExecutorDetach(code->co_executors->executors[index]); } else { assert(code->co_executors->size == index); @@ -268,10 +268,14 @@ static PyMethodDef executor_methods[] = { ///////////////////// Experimental UOp Optimizer ///////////////////// +static int executor_clear(_PyExecutorObject *executor); +static void unlink_executor(_PyExecutorObject *executor); + static void uop_dealloc(_PyExecutorObject *self) { _PyObject_GC_UNTRACK(self); - _Py_ExecutorClear(self); + assert(self->vm_data.code == NULL); + unlink_executor(self); #ifdef _Py_JIT _PyJIT_Free(self); #endif @@ -377,13 +381,6 @@ PySequenceMethods uop_as_sequence = { .sq_item = (ssizeargfunc)uop_item, }; -static int -executor_clear(PyObject *o) -{ - _Py_ExecutorClear((_PyExecutorObject *)o); - return 0; -} - static int executor_traverse(PyObject *o, visitproc visit, void *arg) { @@ -410,7 +407,7 @@ PyTypeObject _PyUOpExecutor_Type = { .tp_as_sequence = &uop_as_sequence, .tp_methods = executor_methods, .tp_traverse = executor_traverse, - .tp_clear = executor_clear, + .tp_clear = (inquiry)executor_clear, .tp_is_gc = executor_is_gc, }; @@ -1188,6 +1185,7 @@ init_cold_exit_executor(_PyExecutorObject *executor, int oparg) inst->opcode = _COLD_EXIT; inst->oparg = oparg; executor->vm_data.valid = true; + executor->vm_data.linked = false; for (int i = 0; i < BLOOM_FILTER_WORDS; i++) { assert(executor->vm_data.bloom.bits[i] == 0); } @@ -1326,7 +1324,7 @@ PyTypeObject _PyCounterExecutor_Type = { .tp_dealloc = (destructor)counter_dealloc, .tp_methods = executor_methods, .tp_traverse = executor_traverse, - .tp_clear = executor_clear, + .tp_clear = (inquiry)executor_clear, }; static int @@ -1501,15 +1499,13 @@ link_executor(_PyExecutorObject *executor) links->next = NULL; } else { - _PyExecutorObject *next = head->vm_data.links.next; - links->previous = head; - links->next = next; - if (next != NULL) { - next->vm_data.links.previous = executor; - } - head->vm_data.links.next = executor; + assert(head->vm_data.links.previous == NULL); + links->previous = NULL; + links->next = head; + head->vm_data.links.previous = executor; + interp->executor_list_head = executor; } - executor->vm_data.valid = true; + executor->vm_data.linked = true; /* executor_list_head must be first in list */ assert(interp->executor_list_head->vm_data.links.previous == NULL); } @@ -1517,7 +1513,11 @@ link_executor(_PyExecutorObject *executor) static void unlink_executor(_PyExecutorObject *executor) { + if (!executor->vm_data.linked) { + return; + } _PyExecutorLinkListNode *links = &executor->vm_data.links; + assert(executor->vm_data.valid); _PyExecutorObject *next = links->next; _PyExecutorObject *prev = links->previous; if (next != NULL) { @@ -1532,7 +1532,7 @@ unlink_executor(_PyExecutorObject *executor) assert(interp->executor_list_head == executor); interp->executor_list_head = next; } - executor->vm_data.valid = false; + executor->vm_data.linked = false; } /* This must be called by optimizers before using the executor */ @@ -1546,23 +1546,15 @@ _Py_ExecutorInit(_PyExecutorObject *executor, const _PyBloomFilter *dependency_s link_executor(executor); } -/* This must be called by executors during dealloc */ +/* Detaches the executor from the code object (if any) that + * holds a reference to it */ void -_Py_ExecutorClear(_PyExecutorObject *executor) +_Py_ExecutorDetach(_PyExecutorObject *executor) { - if (!executor->vm_data.valid) { - return; - } - unlink_executor(executor); PyCodeObject *code = executor->vm_data.code; if (code == NULL) { return; } - for (uint32_t i = 0; i < executor->exit_count; i++) { - Py_DECREF(executor->exits[i].executor); - executor->exits[i].executor = &COLD_EXITS[i]; - executor->exits[i].temperature = initial_unreachable_backoff_counter(); - } _Py_CODEUNIT *instruction = &_PyCode_CODE(code)[executor->vm_data.index]; assert(instruction->op.code == ENTER_EXECUTOR); int index = instruction->op.arg; @@ -1570,7 +1562,36 @@ _Py_ExecutorClear(_PyExecutorObject *executor) instruction->op.code = executor->vm_data.opcode; instruction->op.arg = executor->vm_data.oparg; executor->vm_data.code = NULL; - Py_CLEAR(code->co_executors->executors[index]); + code->co_executors->executors[index] = NULL; + Py_DECREF(executor); +} + +static int +executor_clear(_PyExecutorObject *executor) +{ + if (!executor->vm_data.valid) { + return 0; + } + assert(executor->vm_data.valid == 1); + unlink_executor(executor); + executor->vm_data.valid = 0; + /* It is possible for an executor to form a reference + * cycle with itself, so decref'ing a side exit could + * free the executor unless we hold a strong reference to it + */ + Py_INCREF(executor); + for (uint32_t i = 0; i < executor->exit_count; i++) { + const _PyExecutorObject *cold = &COLD_EXITS[i]; + const _PyExecutorObject *side = executor->exits[i].executor; + executor->exits[i].temperature = initial_unreachable_backoff_counter(); + if (side != cold) { + executor->exits[i].executor = cold; + Py_DECREF(side); + } + } + _Py_ExecutorDetach(executor); + Py_DECREF(executor); + return 0; } void @@ -1591,17 +1612,41 @@ _Py_Executors_InvalidateDependency(PyInterpreterState *interp, void *obj, int is _Py_BloomFilter_Add(&obj_filter, obj); /* Walk the list of executors */ /* TO DO -- Use a tree to avoid traversing as many objects */ + bool no_memory = false; + PyObject *invalidate = PyList_New(0); + if (invalidate == NULL) { + PyErr_Clear(); + no_memory = true; + } + /* Clearing an executor can deallocate others, so we need to make a list of + * executors to invalidate first */ for (_PyExecutorObject *exec = interp->executor_list_head; exec != NULL;) { assert(exec->vm_data.valid); _PyExecutorObject *next = exec->vm_data.links.next; if (bloom_filter_may_contain(&exec->vm_data.bloom, &obj_filter)) { - _Py_ExecutorClear(exec); + unlink_executor(exec); + if (no_memory) { + exec->vm_data.valid = 0; + } else { + if (PyList_Append(invalidate, (PyObject *)exec) < 0) { + PyErr_Clear(); + no_memory = true; + } + } if (is_invalidation) { OPT_STAT_INC(executors_invalidated); } } exec = next; } + if (invalidate != NULL) { + for (Py_ssize_t i = 0; i < PyList_GET_SIZE(invalidate); i++) { + _PyExecutorObject *exec = (_PyExecutorObject *)PyList_GET_ITEM(invalidate, i); + executor_clear(exec); + } + Py_DECREF(invalidate); + } + return; } /* Invalidate all executors */ @@ -1610,12 +1655,13 @@ _Py_Executors_InvalidateAll(PyInterpreterState *interp, int is_invalidation) { while (interp->executor_list_head) { _PyExecutorObject *executor = interp->executor_list_head; + assert(executor->vm_data.valid == 1 && executor->vm_data.linked == 1); if (executor->vm_data.code) { // Clear the entire code object so its co_executors array be freed: _PyCode_Clear_Executors(executor->vm_data.code); } else { - _Py_ExecutorClear(executor); + executor_clear(executor); } if (is_invalidation) { OPT_STAT_INC(executors_invalidated); From 6a54e06735f16e305e207b7fcfc5f9ec7f682e77 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 1 May 2024 08:53:06 +0100 Subject: [PATCH 2/3] Invalidate in out of memory case --- Python/optimizer.c | 1 + 1 file changed, 1 insertion(+) diff --git a/Python/optimizer.c b/Python/optimizer.c index 2211e1718139e0..84c75043cf54e9 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -1631,6 +1631,7 @@ _Py_Executors_InvalidateDependency(PyInterpreterState *interp, void *obj, int is if (PyList_Append(invalidate, (PyObject *)exec) < 0) { PyErr_Clear(); no_memory = true; + exec->vm_data.valid = 0; } } if (is_invalidation) { From 62def6a5422fd06ea2a251a6db574383d7f854ac Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 1 May 2024 09:34:34 +0100 Subject: [PATCH 3/3] Only specialize to RESUME_CHECK if opcode is RESUME. Respects executors and instrumentation. --- Python/bytecodes.c | 8 +++++++- Python/generated_cases.c.h | 8 +++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index eee8b328a65b8c..81e6edc52dd111 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -163,7 +163,13 @@ dummy_func( if ((oparg & RESUME_OPARG_LOCATION_MASK) < RESUME_AFTER_YIELD_FROM) { CHECK_EVAL_BREAKER(); } - this_instr->op.code = RESUME_CHECK; + assert(this_instr->op.code == RESUME || + this_instr->op.code == RESUME_CHECK || + this_instr->op.code == INSTRUMENTED_RESUME || + this_instr->op.code == ENTER_EXECUTOR); + if (this_instr->op.code == RESUME) { + this_instr->op.code = RESUME_CHECK; + } } } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 7c1cc147de4e1a..2bb847e9635568 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -4955,7 +4955,13 @@ if ((oparg & RESUME_OPARG_LOCATION_MASK) < RESUME_AFTER_YIELD_FROM) { CHECK_EVAL_BREAKER(); } - this_instr->op.code = RESUME_CHECK; + assert(this_instr->op.code == RESUME || + this_instr->op.code == RESUME_CHECK || + this_instr->op.code == INSTRUMENTED_RESUME || + this_instr->op.code == ENTER_EXECUTOR); + if (this_instr->op.code == RESUME) { + this_instr->op.code = RESUME_CHECK; + } } DISPATCH(); }