Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 65f1237

Browse files
GH-123516: Improve JIT memory consumption by invalidating cold executors (GH-124443)
Co-authored-by: Bénédikt Tran <[email protected]>
1 parent 23e812b commit 65f1237

14 files changed

+129
-39
lines changed

Include/internal/pycore_ceval.h

+1
Original file line numberDiff line numberDiff line change
@@ -283,6 +283,7 @@ PyAPI_FUNC(PyObject *) _PyEval_LoadName(PyThreadState *tstate, _PyInterpreterFra
283283
#define _PY_GC_SCHEDULED_BIT (1U << 4)
284284
#define _PY_EVAL_PLEASE_STOP_BIT (1U << 5)
285285
#define _PY_EVAL_EXPLICIT_MERGE_BIT (1U << 6)
286+
#define _PY_EVAL_JIT_INVALIDATE_COLD_BIT (1U << 7)
286287

287288
/* Reserve a few bits for future use */
288289
#define _PY_EVAL_EVENTS_BITS 8

Include/internal/pycore_interp.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -261,7 +261,7 @@ struct _is {
261261
struct callable_cache callable_cache;
262262
_PyOptimizerObject *optimizer;
263263
_PyExecutorObject *executor_list_head;
264-
264+
size_t trace_run_counter;
265265
_rare_events rare_events;
266266
PyDict_WatchCallback builtins_dict_watcher;
267267

Include/internal/pycore_optimizer.h

+11-3
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,10 @@ typedef struct {
2929
typedef struct {
3030
uint8_t opcode;
3131
uint8_t oparg;
32-
uint16_t valid:1;
33-
uint16_t linked:1;
34-
uint16_t chain_depth:14; // Must be big engough for MAX_CHAIN_DEPTH - 1.
32+
uint8_t valid:1;
33+
uint8_t linked:1;
34+
uint8_t chain_depth:6; // Must be big enough for MAX_CHAIN_DEPTH - 1.
35+
bool warm;
3536
int index; // Index of ENTER_EXECUTOR (if code isn't NULL, below).
3637
_PyBloomFilter bloom;
3738
_PyExecutorLinkListNode links;
@@ -123,11 +124,18 @@ PyAPI_FUNC(PyObject *) _PyOptimizer_NewUOpOptimizer(void);
123124
#ifdef _Py_TIER2
124125
PyAPI_FUNC(void) _Py_Executors_InvalidateDependency(PyInterpreterState *interp, void *obj, int is_invalidation);
125126
PyAPI_FUNC(void) _Py_Executors_InvalidateAll(PyInterpreterState *interp, int is_invalidation);
127+
PyAPI_FUNC(void) _Py_Executors_InvalidateCold(PyInterpreterState *interp);
128+
126129
#else
127130
# define _Py_Executors_InvalidateDependency(A, B, C) ((void)0)
128131
# define _Py_Executors_InvalidateAll(A, B) ((void)0)
132+
# define _Py_Executors_InvalidateCold(A) ((void)0)
133+
129134
#endif
130135

136+
// Used as the threshold to trigger executor invalidation when
137+
// trace_run_counter is greater than this value.
138+
#define JIT_CLEANUP_THRESHOLD 100000
131139

132140
// This is the length of the trace we project initially.
133141
#define UOP_MAX_TRACE_LENGTH 800

Include/internal/pycore_uop_ids.h

+36-35
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Include/internal/pycore_uop_metadata.h

+4
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Improved JIT memory consumption by periodically freeing memory used by infrequently-executed code.
2+
This change is especially likely to improve the memory footprint of long-running programs.

Python/bytecodes.c

+8
Original file line numberDiff line numberDiff line change
@@ -4836,6 +4836,14 @@ dummy_func(
48364836
assert(((_PyExecutorObject *)executor)->vm_data.valid);
48374837
}
48384838

4839+
tier2 op(_MAKE_WARM, (--)) {
4840+
current_executor->vm_data.warm = true;
4841+
// It's okay if this ends up going negative.
4842+
if (--tstate->interp->trace_run_counter == 0) {
4843+
_Py_set_eval_breaker_bit(tstate, _PY_EVAL_JIT_INVALIDATE_COLD_BIT);
4844+
}
4845+
}
4846+
48394847
tier2 op(_FATAL_ERROR, (--)) {
48404848
assert(0);
48414849
Py_FatalError("Fatal error uop executed.");

Python/ceval_gil.c

+6
Original file line numberDiff line numberDiff line change
@@ -1289,6 +1289,12 @@ _Py_HandlePending(PyThreadState *tstate)
12891289
_Py_RunGC(tstate);
12901290
}
12911291

1292+
if ((breaker & _PY_EVAL_JIT_INVALIDATE_COLD_BIT) != 0) {
1293+
_Py_unset_eval_breaker_bit(tstate, _PY_EVAL_JIT_INVALIDATE_COLD_BIT);
1294+
_Py_Executors_InvalidateCold(tstate->interp);
1295+
tstate->interp->trace_run_counter = JIT_CLEANUP_THRESHOLD;
1296+
}
1297+
12921298
/* GIL drop request */
12931299
if ((breaker & _PY_GIL_DROP_REQUEST_BIT) != 0) {
12941300
/* Give another thread a chance */

Python/executor_cases.c.h

+9
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Python/optimizer.c

+42
Original file line numberDiff line numberDiff line change
@@ -565,6 +565,7 @@ translate_bytecode_to_trace(
565565
code->co_firstlineno,
566566
2 * INSTR_IP(initial_instr, code));
567567
ADD_TO_TRACE(_START_EXECUTOR, 0, (uintptr_t)instr, INSTR_IP(instr, code));
568+
ADD_TO_TRACE(_MAKE_WARM, 0, 0, 0);
568569
uint32_t target = 0;
569570

570571
for (;;) {
@@ -1194,6 +1195,9 @@ make_executor_from_uops(_PyUOpInstruction *buffer, int length, const _PyBloomFil
11941195
executor->jit_code = NULL;
11951196
executor->jit_side_entry = NULL;
11961197
executor->jit_size = 0;
1198+
// This is initialized to true so we can prevent the executor
1199+
// from being immediately detected as cold and invalidated.
1200+
executor->vm_data.warm = true;
11971201
if (_PyJIT_Compile(executor, executor->trace, length)) {
11981202
Py_DECREF(executor);
11991203
return NULL;
@@ -1659,4 +1663,42 @@ _Py_Executors_InvalidateAll(PyInterpreterState *interp, int is_invalidation)
16591663
}
16601664
}
16611665

1666+
void
1667+
_Py_Executors_InvalidateCold(PyInterpreterState *interp)
1668+
{
1669+
/* Walk the list of executors */
1670+
/* TO DO -- Use a tree to avoid traversing as many objects */
1671+
PyObject *invalidate = PyList_New(0);
1672+
if (invalidate == NULL) {
1673+
goto error;
1674+
}
1675+
1676+
/* Clearing an executor can deallocate others, so we need to make a list of
1677+
* executors to invalidate first */
1678+
for (_PyExecutorObject *exec = interp->executor_list_head; exec != NULL;) {
1679+
assert(exec->vm_data.valid);
1680+
_PyExecutorObject *next = exec->vm_data.links.next;
1681+
1682+
if (!exec->vm_data.warm && PyList_Append(invalidate, (PyObject *)exec) < 0) {
1683+
goto error;
1684+
}
1685+
else {
1686+
exec->vm_data.warm = false;
1687+
}
1688+
1689+
exec = next;
1690+
}
1691+
for (Py_ssize_t i = 0; i < PyList_GET_SIZE(invalidate); i++) {
1692+
_PyExecutorObject *exec = (_PyExecutorObject *)PyList_GET_ITEM(invalidate, i);
1693+
executor_clear(exec);
1694+
}
1695+
Py_DECREF(invalidate);
1696+
return;
1697+
error:
1698+
PyErr_Clear();
1699+
Py_XDECREF(invalidate);
1700+
// If we're truly out of memory, wiping out everything is a fine fallback
1701+
_Py_Executors_InvalidateAll(interp, 0);
1702+
}
1703+
16621704
#endif /* _Py_TIER2 */

Python/optimizer_cases.c.h

+4
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Python/pystate.c

+1
Original file line numberDiff line numberDiff line change
@@ -660,6 +660,7 @@ init_interpreter(PyInterpreterState *interp,
660660
#ifdef _Py_TIER2
661661
(void)_Py_SetOptimizer(interp, NULL);
662662
interp->executor_list_head = NULL;
663+
interp->trace_run_counter = JIT_CLEANUP_THRESHOLD;
663664
#endif
664665
if (interp != &runtime->_main_interpreter) {
665666
/* Fix the self-referential, statically initialized fields. */

Tools/cases_generator/analyzer.py

+1
Original file line numberDiff line numberDiff line change
@@ -540,6 +540,7 @@ def has_error_without_pop(op: parser.InstDef) -> bool:
540540
"_PyList_FromStackRefSteal",
541541
"_PyTuple_FromArraySteal",
542542
"_PyTuple_FromStackRefSteal",
543+
"_Py_set_eval_breaker_bit"
543544
)
544545

545546
ESCAPING_FUNCTIONS = (

Tools/jit/_targets.py

+3
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,9 @@ async def _compile(
139139
"-fno-plt",
140140
# Don't call stack-smashing canaries that we can't find or patch:
141141
"-fno-stack-protector",
142+
# On aarch64 Linux, intrinsics were being emitted and this flag
143+
# was required to disable them.
144+
"-mno-outline-atomics",
142145
"-std=c11",
143146
*self.args,
144147
]

0 commit comments

Comments
 (0)