From e7e819b4637527c7cb61ffb3dd83389ceb9cd538 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Thu, 21 Mar 2024 17:33:17 -0700 Subject: [PATCH 01/42] Baby steps: reimplement thresholds using adaptive counter abstractions --- Include/internal/pycore_code.h | 5 +++ Python/bytecodes.c | 56 ++++++++++++++++------------------ Python/generated_cases.c.h | 56 ++++++++++++++++------------------ Python/optimizer.c | 17 +++++------ 4 files changed, 66 insertions(+), 68 deletions(-) diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index e004783ee48198..29003195856ab3 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -498,6 +498,11 @@ adaptive_counter_cooldown(void) { ADAPTIVE_COOLDOWN_BACKOFF); } +static inline uint16_t +adaptive_counter_jump_init(void) { + return adaptive_counter_bits(16, 4); +} + static inline uint16_t adaptive_counter_backoff(uint16_t counter) { uint16_t backoff = counter & ((1 << ADAPTIVE_BACKOFF_BITS) - 1); diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 5cd9db97c71e37..b69facf2ab9ca6 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2348,41 +2348,39 @@ dummy_func( JUMPBY(-oparg); #if ENABLE_SPECIALIZATION uint16_t counter = this_instr[1].cache; - this_instr[1].cache = counter + (1 << OPTIMIZER_BITS_IN_COUNTER); - /* We are using unsigned values, but we really want signed values, so - * do the 2s complement adjustment manually */ - uint32_t offset_counter = counter ^ (1 << 15); - uint32_t threshold = tstate->interp->optimizer_backedge_threshold; - assert((threshold & OPTIMIZER_BITS_MASK) == 0); - // Use '>=' not '>' so that the optimizer/backoff bits do not effect the result. - // Double-check that the opcode isn't instrumented or something: - if (offset_counter >= threshold && this_instr->op.code == JUMP_BACKWARD) { - _Py_CODEUNIT *start = this_instr; - /* Back up over EXTENDED_ARGs so optimizer sees the whole instruction */ - while (oparg > 255) { - oparg >>= 8; - start--; - } - _PyExecutorObject *executor; - int optimized = _PyOptimizer_Optimize(frame, start, stack_pointer, &executor); - ERROR_IF(optimized < 0, error); - if (optimized) { - assert(tstate->previous_executor == NULL); - tstate->previous_executor = Py_None; - GOTO_TIER_TWO(executor); + if (ADAPTIVE_COUNTER_IS_ZERO(counter) && this_instr->op.code == JUMP_BACKWARD) { + if (counter == 0) { + // Dynamically initialize the counter + PyInterpreterState *interp = tstate->interp; + if (interp->optimizer_backedge_threshold != OPTIMIZER_UNREACHABLE_THRESHOLD) { + counter = interp->optimizer_backedge_threshold; + assert(counter != 0); + this_instr[1].cache = counter; + } } else { - int backoff = this_instr[1].cache & OPTIMIZER_BITS_MASK; - backoff++; - if (backoff < MIN_TIER2_BACKOFF) { - backoff = MIN_TIER2_BACKOFF; + _Py_CODEUNIT *start = this_instr; + /* Back up over EXTENDED_ARGs so optimizer sees the whole instruction */ + while (oparg > 255) { + oparg >>= 8; + start--; } - else if (backoff > MAX_TIER2_BACKOFF) { - backoff = MAX_TIER2_BACKOFF; + _PyExecutorObject *executor; + int optimized = _PyOptimizer_Optimize(frame, start, stack_pointer, &executor); + ERROR_IF(optimized < 0, error); + if (optimized) { + assert(tstate->previous_executor == NULL); + tstate->previous_executor = Py_None; + GOTO_TIER_TWO(executor); + } + else { + this_instr[1].cache = adaptive_counter_backoff(counter); } - this_instr[1].cache = ((UINT16_MAX << OPTIMIZER_BITS_IN_COUNTER) << backoff) | backoff; } } + else { + DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache); + } #endif /* ENABLE_SPECIALIZATION */ } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index c66eb678d38475..ac0fa3cb719cb4 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -3416,41 +3416,39 @@ JUMPBY(-oparg); #if ENABLE_SPECIALIZATION uint16_t counter = this_instr[1].cache; - this_instr[1].cache = counter + (1 << OPTIMIZER_BITS_IN_COUNTER); - /* We are using unsigned values, but we really want signed values, so - * do the 2s complement adjustment manually */ - uint32_t offset_counter = counter ^ (1 << 15); - uint32_t threshold = tstate->interp->optimizer_backedge_threshold; - assert((threshold & OPTIMIZER_BITS_MASK) == 0); - // Use '>=' not '>' so that the optimizer/backoff bits do not effect the result. - // Double-check that the opcode isn't instrumented or something: - if (offset_counter >= threshold && this_instr->op.code == JUMP_BACKWARD) { - _Py_CODEUNIT *start = this_instr; - /* Back up over EXTENDED_ARGs so optimizer sees the whole instruction */ - while (oparg > 255) { - oparg >>= 8; - start--; - } - _PyExecutorObject *executor; - int optimized = _PyOptimizer_Optimize(frame, start, stack_pointer, &executor); - if (optimized < 0) goto error; - if (optimized) { - assert(tstate->previous_executor == NULL); - tstate->previous_executor = Py_None; - GOTO_TIER_TWO(executor); + if (ADAPTIVE_COUNTER_IS_ZERO(counter) && this_instr->op.code == JUMP_BACKWARD) { + if (counter == 0) { + // Dynamically initialize the counter + PyInterpreterState *interp = tstate->interp; + if (interp->optimizer_backedge_threshold != OPTIMIZER_UNREACHABLE_THRESHOLD) { + counter = interp->optimizer_backedge_threshold; + assert(counter != 0); + this_instr[1].cache = counter; + } } else { - int backoff = this_instr[1].cache & OPTIMIZER_BITS_MASK; - backoff++; - if (backoff < MIN_TIER2_BACKOFF) { - backoff = MIN_TIER2_BACKOFF; + _Py_CODEUNIT *start = this_instr; + /* Back up over EXTENDED_ARGs so optimizer sees the whole instruction */ + while (oparg > 255) { + oparg >>= 8; + start--; } - else if (backoff > MAX_TIER2_BACKOFF) { - backoff = MAX_TIER2_BACKOFF; + _PyExecutorObject *executor; + int optimized = _PyOptimizer_Optimize(frame, start, stack_pointer, &executor); + if (optimized < 0) goto error; + if (optimized) { + assert(tstate->previous_executor == NULL); + tstate->previous_executor = Py_None; + GOTO_TIER_TWO(executor); + } + else { + this_instr[1].cache = adaptive_counter_backoff(counter); } - this_instr[1].cache = ((UINT16_MAX << OPTIMIZER_BITS_IN_COUNTER) << backoff) | backoff; } } + else { + DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache); + } #endif /* ENABLE_SPECIALIZATION */ DISPATCH(); } diff --git a/Python/optimizer.c b/Python/optimizer.c index 38ab6d3cf61c72..b2045a3f7d44da 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -110,9 +110,7 @@ never_optimize( _PyExecutorObject **exec, int Py_UNUSED(stack_entries)) { - /* Although it should be benign for this to be called, - * it shouldn't happen, so fail in debug builds. */ - assert(0 && "never optimize should never be called"); + // This may be called if the optimizer is reset return 0; } @@ -135,17 +133,16 @@ static _PyOptimizerObject _PyOptimizer_Default = { static uint32_t shift_and_offset_threshold(uint32_t threshold) { - return (threshold << OPTIMIZER_BITS_IN_COUNTER) + (1 << 15); + if (threshold == OPTIMIZER_UNREACHABLE_THRESHOLD) { + return threshold; + } + return adaptive_counter_bits(threshold - 1, MIN_TIER2_BACKOFF); } _PyOptimizerObject * PyUnstable_GetOptimizer(void) { PyInterpreterState *interp = _PyInterpreterState_GET(); - assert(interp->optimizer_backedge_threshold == - shift_and_offset_threshold(interp->optimizer->backedge_threshold)); - assert(interp->optimizer_resume_threshold == - shift_and_offset_threshold(interp->optimizer->resume_threshold)); if (interp->optimizer == &_PyOptimizer_Default) { return NULL; } @@ -194,8 +191,8 @@ _Py_SetOptimizer(PyInterpreterState *interp, _PyOptimizerObject *optimizer) interp->optimizer_resume_threshold = shift_and_offset_threshold(optimizer->resume_threshold); interp->optimizer_side_threshold = optimizer->side_threshold; if (optimizer == &_PyOptimizer_Default) { - assert(interp->optimizer_backedge_threshold > (1 << 16)); - assert(interp->optimizer_resume_threshold > (1 << 16)); + assert(interp->optimizer_backedge_threshold == OPTIMIZER_UNREACHABLE_THRESHOLD); + assert(interp->optimizer_resume_threshold == OPTIMIZER_UNREACHABLE_THRESHOLD); } return old; } From 8c74dfab2617247efffce7ed546bbc442ddd8843 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Thu, 21 Mar 2024 18:09:03 -0700 Subject: [PATCH 02/42] Make temperature an adaptive counter like the rest --- Include/cpython/optimizer.h | 7 +++---- Include/internal/pycore_interp.h | 5 ++--- Python/bytecodes.c | 10 +++++----- Python/executor_cases.c.h | 10 +++++----- Python/optimizer.c | 17 ++++++++++------- 5 files changed, 25 insertions(+), 24 deletions(-) diff --git a/Include/cpython/optimizer.h b/Include/cpython/optimizer.h index bc960c583782c5..3f7f56e68014df 100644 --- a/Include/cpython/optimizer.h +++ b/Include/cpython/optimizer.h @@ -89,7 +89,7 @@ static inline uint16_t uop_get_error_target(const _PyUOpInstruction *inst) typedef struct _exit_data { uint32_t target; - int16_t temperature; + uint16_t temperature; const struct _PyExecutorObject *executor; } _PyExitData; @@ -115,11 +115,10 @@ typedef int (*optimize_func)( struct _PyOptimizerObject { PyObject_HEAD optimize_func optimize; - /* These thresholds are treated as signed so do not exceed INT16_MAX - * Use INT16_MAX to indicate that the optimizer should never be called */ + /* Initial values for adaptive-style counters */ + uint16_t backedge_threshold; uint16_t resume_threshold; uint16_t side_threshold; - uint16_t backedge_threshold; /* Data needed by the optimizer goes here, but is opaque to the VM */ }; diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h index b8d0fdcce11ba8..07f970c6509922 100644 --- a/Include/internal/pycore_interp.h +++ b/Include/internal/pycore_interp.h @@ -239,10 +239,9 @@ struct _is { _PyOptimizerObject *optimizer; _PyExecutorObject *executor_list_head; - /* These two values are shifted and offset to speed up check in JUMP_BACKWARD */ - uint32_t optimizer_resume_threshold; + /* These three values are shifted and offset to speed up check in JUMP_BACKWARD */ uint32_t optimizer_backedge_threshold; - + uint32_t optimizer_resume_threshold; uint16_t optimizer_side_threshold; _rare_events rare_events; diff --git a/Python/bytecodes.c b/Python/bytecodes.c index b69facf2ab9ca6..b53bcfb5b754f3 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -4157,21 +4157,21 @@ dummy_func( tier2 op(_COLD_EXIT, (--)) { _PyExecutorObject *previous = (_PyExecutorObject *)tstate->previous_executor; _PyExitData *exit = &previous->exits[oparg]; - exit->temperature++; PyCodeObject *code = _PyFrame_GetCode(frame); _Py_CODEUNIT *target = _PyCode_CODE(code) + exit->target; - if (exit->temperature < (int32_t)tstate->interp->optimizer_side_threshold) { + if (!ADAPTIVE_COUNTER_IS_ZERO(exit->temperature)) { + DECREMENT_ADAPTIVE_COUNTER(exit->temperature); GOTO_TIER_ONE(target); } _PyExecutorObject *executor; if (target->op.code == ENTER_EXECUTOR) { executor = code->co_executors->executors[target->op.arg]; Py_INCREF(executor); - } else { + } + else { int optimized = _PyOptimizer_Optimize(frame, target, stack_pointer, &executor); if (optimized <= 0) { - int32_t new_temp = -1 * tstate->interp->optimizer_side_threshold; - exit->temperature = (new_temp < INT16_MIN) ? INT16_MIN : new_temp; + exit->temperature = adaptive_counter_backoff(exit->temperature); if (optimized < 0) { Py_DECREF(previous); tstate->previous_executor = Py_None; diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 224b600b8f6a4a..831737b677be27 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -3695,21 +3695,21 @@ oparg = CURRENT_OPARG(); _PyExecutorObject *previous = (_PyExecutorObject *)tstate->previous_executor; _PyExitData *exit = &previous->exits[oparg]; - exit->temperature++; PyCodeObject *code = _PyFrame_GetCode(frame); _Py_CODEUNIT *target = _PyCode_CODE(code) + exit->target; - if (exit->temperature < (int32_t)tstate->interp->optimizer_side_threshold) { + if (!ADAPTIVE_COUNTER_IS_ZERO(exit->temperature)) { + DECREMENT_ADAPTIVE_COUNTER(exit->temperature); GOTO_TIER_ONE(target); } _PyExecutorObject *executor; if (target->op.code == ENTER_EXECUTOR) { executor = code->co_executors->executors[target->op.arg]; Py_INCREF(executor); - } else { + } + else { int optimized = _PyOptimizer_Optimize(frame, target, stack_pointer, &executor); if (optimized <= 0) { - int32_t new_temp = -1 * tstate->interp->optimizer_side_threshold; - exit->temperature = (new_temp < INT16_MIN) ? INT16_MIN : new_temp; + exit->temperature = adaptive_counter_backoff(exit->temperature); if (optimized < 0) { Py_DECREF(previous); tstate->previous_executor = Py_None; diff --git a/Python/optimizer.c b/Python/optimizer.c index b2045a3f7d44da..2fd6e30f8fbc23 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -125,8 +125,8 @@ PyTypeObject _PyDefaultOptimizer_Type = { static _PyOptimizerObject _PyOptimizer_Default = { PyObject_HEAD_INIT(&_PyDefaultOptimizer_Type) .optimize = never_optimize, - .resume_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD, .backedge_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD, + .resume_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD, .side_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD, }; @@ -189,10 +189,11 @@ _Py_SetOptimizer(PyInterpreterState *interp, _PyOptimizerObject *optimizer) interp->optimizer = optimizer; interp->optimizer_backedge_threshold = shift_and_offset_threshold(optimizer->backedge_threshold); interp->optimizer_resume_threshold = shift_and_offset_threshold(optimizer->resume_threshold); - interp->optimizer_side_threshold = optimizer->side_threshold; + interp->optimizer_side_threshold = shift_and_offset_threshold(optimizer->side_threshold); if (optimizer == &_PyOptimizer_Default) { assert(interp->optimizer_backedge_threshold == OPTIMIZER_UNREACHABLE_THRESHOLD); assert(interp->optimizer_resume_threshold == OPTIMIZER_UNREACHABLE_THRESHOLD); + assert(interp->optimizer_side_threshold == OPTIMIZER_UNREACHABLE_THRESHOLD); } return old; } @@ -1103,10 +1104,12 @@ make_executor_from_uops(_PyUOpInstruction *buffer, int length, const _PyBloomFil } /* Initialize exits */ + PyInterpreterState *interp = _PyInterpreterState_GET(); assert(exit_count < COLD_EXIT_COUNT); for (int i = 0; i < exit_count; i++) { executor->exits[i].executor = &COLD_EXITS[i]; - executor->exits[i].temperature = 0; + executor->exits[i].temperature = + adaptive_counter_bits(interp->optimizer_side_threshold, 4); // TODO: Constantify } int next_exit = exit_count-1; _PyUOpInstruction *dest = (_PyUOpInstruction *)&executor->trace[length]; @@ -1288,11 +1291,11 @@ PyUnstable_Optimizer_NewUOpOptimizer(void) return NULL; } opt->optimize = uop_optimize; - opt->resume_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD; // Need a few iterations to settle specializations, // and to ammortize the cost of optimization. - opt->side_threshold = 16; opt->backedge_threshold = 16; + opt->resume_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD; + opt->side_threshold = 16; return (PyObject *)opt; } @@ -1382,9 +1385,9 @@ PyUnstable_Optimizer_NewCounter(void) return NULL; } opt->base.optimize = counter_optimize; + opt->base.backedge_threshold = 0; opt->base.resume_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD; opt->base.side_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD; - opt->base.backedge_threshold = 0; opt->count = 0; return (PyObject *)opt; } @@ -1551,7 +1554,7 @@ _Py_ExecutorClear(_PyExecutorObject *executor) for (uint32_t i = 0; i < executor->exit_count; i++) { Py_DECREF(executor->exits[i].executor); executor->exits[i].executor = &COLD_EXITS[i]; - executor->exits[i].temperature = INT16_MIN; + executor->exits[i].temperature = OPTIMIZER_UNREACHABLE_THRESHOLD; } _Py_CODEUNIT *instruction = &_PyCode_CODE(code)[executor->vm_data.index]; assert(instruction->op.code == ENTER_EXECUTOR); From 79036ff35934a287056d8a22306e8c19456da90d Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Thu, 21 Mar 2024 18:22:12 -0700 Subject: [PATCH 03/42] Fix tests --- Python/bytecodes.c | 4 ++-- Python/generated_cases.c.h | 4 ++-- Python/optimizer.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index b53bcfb5b754f3..edd8ce0a8185ca 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2348,7 +2348,7 @@ dummy_func( JUMPBY(-oparg); #if ENABLE_SPECIALIZATION uint16_t counter = this_instr[1].cache; - if (ADAPTIVE_COUNTER_IS_ZERO(counter) && this_instr->op.code == JUMP_BACKWARD) { + if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { if (counter == 0) { // Dynamically initialize the counter PyInterpreterState *interp = tstate->interp; @@ -2358,7 +2358,7 @@ dummy_func( this_instr[1].cache = counter; } } - else { + if (ADAPTIVE_COUNTER_IS_ZERO(counter) && this_instr->op.code == JUMP_BACKWARD) { _Py_CODEUNIT *start = this_instr; /* Back up over EXTENDED_ARGs so optimizer sees the whole instruction */ while (oparg > 255) { diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index ac0fa3cb719cb4..55262379da50e2 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -3416,7 +3416,7 @@ JUMPBY(-oparg); #if ENABLE_SPECIALIZATION uint16_t counter = this_instr[1].cache; - if (ADAPTIVE_COUNTER_IS_ZERO(counter) && this_instr->op.code == JUMP_BACKWARD) { + if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { if (counter == 0) { // Dynamically initialize the counter PyInterpreterState *interp = tstate->interp; @@ -3426,7 +3426,7 @@ this_instr[1].cache = counter; } } - else { + if (ADAPTIVE_COUNTER_IS_ZERO(counter) && this_instr->op.code == JUMP_BACKWARD) { _Py_CODEUNIT *start = this_instr; /* Back up over EXTENDED_ARGs so optimizer sees the whole instruction */ while (oparg > 255) { diff --git a/Python/optimizer.c b/Python/optimizer.c index 2fd6e30f8fbc23..8b83163265722c 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -136,7 +136,7 @@ shift_and_offset_threshold(uint32_t threshold) if (threshold == OPTIMIZER_UNREACHABLE_THRESHOLD) { return threshold; } - return adaptive_counter_bits(threshold - 1, MIN_TIER2_BACKOFF); + return adaptive_counter_bits(threshold, MIN_TIER2_BACKOFF); } _PyOptimizerObject * From 54c1f8e61b09484425661f11433e4daf15c7a01e Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Thu, 21 Mar 2024 18:26:43 -0700 Subject: [PATCH 04/42] Remove dead adaptive_counter_jump_init() --- Include/internal/pycore_code.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index 29003195856ab3..e004783ee48198 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -498,11 +498,6 @@ adaptive_counter_cooldown(void) { ADAPTIVE_COOLDOWN_BACKOFF); } -static inline uint16_t -adaptive_counter_jump_init(void) { - return adaptive_counter_bits(16, 4); -} - static inline uint16_t adaptive_counter_backoff(uint16_t counter) { uint16_t backoff = counter & ((1 << ADAPTIVE_BACKOFF_BITS) - 1); From 015bb000a4eb26f08569d24386d75b55c2e5ffcd Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Fri, 22 Mar 2024 15:57:33 -0700 Subject: [PATCH 05/42] Fix no-GIL build failure in _COLD_EXIT --- Python/bytecodes.c | 48 ++++++++++++++++++++------------------- Python/executor_cases.c.h | 48 ++++++++++++++++++++------------------- 2 files changed, 50 insertions(+), 46 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index edd8ce0a8185ca..be99e95f884361 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -4159,32 +4159,34 @@ dummy_func( _PyExitData *exit = &previous->exits[oparg]; PyCodeObject *code = _PyFrame_GetCode(frame); _Py_CODEUNIT *target = _PyCode_CODE(code) + exit->target; - if (!ADAPTIVE_COUNTER_IS_ZERO(exit->temperature)) { - DECREMENT_ADAPTIVE_COUNTER(exit->temperature); - GOTO_TIER_ONE(target); - } - _PyExecutorObject *executor; - if (target->op.code == ENTER_EXECUTOR) { - executor = code->co_executors->executors[target->op.arg]; - Py_INCREF(executor); - } - else { - int optimized = _PyOptimizer_Optimize(frame, target, stack_pointer, &executor); - if (optimized <= 0) { - exit->temperature = adaptive_counter_backoff(exit->temperature); - if (optimized < 0) { - Py_DECREF(previous); - tstate->previous_executor = Py_None; - GOTO_UNWIND(); + #if ENABLE_SPECIALIZATION + if (ADAPTIVE_COUNTER_IS_ZERO(exit->temperature)) { + _PyExecutorObject *executor; + if (target->op.code == ENTER_EXECUTOR) { + executor = code->co_executors->executors[target->op.arg]; + Py_INCREF(executor); + } + else { + int optimized = _PyOptimizer_Optimize(frame, target, stack_pointer, &executor); + if (optimized <= 0) { + exit->temperature = adaptive_counter_backoff(exit->temperature); + if (optimized < 0) { + Py_DECREF(previous); + tstate->previous_executor = Py_None; + GOTO_UNWIND(); + } + GOTO_TIER_ONE(target); } - GOTO_TIER_ONE(target); } + /* We need two references. One to store in exit->executor and + * one to keep the executor alive when executing. */ + Py_INCREF(executor); + exit->executor = executor; + GOTO_TIER_TWO(executor); } - /* We need two references. One to store in exit->executor and - * one to keep the executor alive when executing. */ - Py_INCREF(executor); - exit->executor = executor; - GOTO_TIER_TWO(executor); + DECREMENT_ADAPTIVE_COUNTER(exit->temperature); + #endif + GOTO_TIER_ONE(target); } tier2 op(_START_EXECUTOR, (executor/4 --)) { diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 831737b677be27..5656bab2f5ddb1 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -3697,32 +3697,34 @@ _PyExitData *exit = &previous->exits[oparg]; PyCodeObject *code = _PyFrame_GetCode(frame); _Py_CODEUNIT *target = _PyCode_CODE(code) + exit->target; - if (!ADAPTIVE_COUNTER_IS_ZERO(exit->temperature)) { - DECREMENT_ADAPTIVE_COUNTER(exit->temperature); - GOTO_TIER_ONE(target); - } - _PyExecutorObject *executor; - if (target->op.code == ENTER_EXECUTOR) { - executor = code->co_executors->executors[target->op.arg]; - Py_INCREF(executor); - } - else { - int optimized = _PyOptimizer_Optimize(frame, target, stack_pointer, &executor); - if (optimized <= 0) { - exit->temperature = adaptive_counter_backoff(exit->temperature); - if (optimized < 0) { - Py_DECREF(previous); - tstate->previous_executor = Py_None; - GOTO_UNWIND(); + #if ENABLE_SPECIALIZATION + if (ADAPTIVE_COUNTER_IS_ZERO(exit->temperature)) { + _PyExecutorObject *executor; + if (target->op.code == ENTER_EXECUTOR) { + executor = code->co_executors->executors[target->op.arg]; + Py_INCREF(executor); + } + else { + int optimized = _PyOptimizer_Optimize(frame, target, stack_pointer, &executor); + if (optimized <= 0) { + exit->temperature = adaptive_counter_backoff(exit->temperature); + if (optimized < 0) { + Py_DECREF(previous); + tstate->previous_executor = Py_None; + GOTO_UNWIND(); + } + GOTO_TIER_ONE(target); } - GOTO_TIER_ONE(target); } + /* We need two references. One to store in exit->executor and + * one to keep the executor alive when executing. */ + Py_INCREF(executor); + exit->executor = executor; + GOTO_TIER_TWO(executor); } - /* We need two references. One to store in exit->executor and - * one to keep the executor alive when executing. */ - Py_INCREF(executor); - exit->executor = executor; - GOTO_TIER_TWO(executor); + DECREMENT_ADAPTIVE_COUNTER(exit->temperature); + #endif + GOTO_TIER_ONE(target); break; } From 173029545673ef92aa1290ae6e5e92286bb564cb Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Fri, 22 Mar 2024 20:23:20 -0700 Subject: [PATCH 06/42] Use the right named constant in initial temperature --- Python/optimizer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/optimizer.c b/Python/optimizer.c index 8b83163265722c..69857b3d89fa09 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -1109,7 +1109,7 @@ make_executor_from_uops(_PyUOpInstruction *buffer, int length, const _PyBloomFil for (int i = 0; i < exit_count; i++) { executor->exits[i].executor = &COLD_EXITS[i]; executor->exits[i].temperature = - adaptive_counter_bits(interp->optimizer_side_threshold, 4); // TODO: Constantify + adaptive_counter_bits(interp->optimizer_side_threshold, MIN_TIER2_BACKOFF); } int next_exit = exit_count-1; _PyUOpInstruction *dest = (_PyUOpInstruction *)&executor->trace[length]; From 95f93b71ef63a286b903c85db14fbb563ec35b4a Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 26 Mar 2024 16:28:11 -0700 Subject: [PATCH 07/42] Add pycore_backoff.h, and include it, but don't use it yet --- Include/internal/pycore_backoff.h | 105 ++++++++++++++++++++++++++++++ Makefile.pre.in | 1 + PCbuild/pythoncore.vcxproj | 1 + Python/bytecodes.c | 1 + Python/ceval.c | 1 + Tools/jit/template.c | 1 + 6 files changed, 110 insertions(+) create mode 100644 Include/internal/pycore_backoff.h diff --git a/Include/internal/pycore_backoff.h b/Include/internal/pycore_backoff.h new file mode 100644 index 00000000000000..6d75e570655951 --- /dev/null +++ b/Include/internal/pycore_backoff.h @@ -0,0 +1,105 @@ + +#ifndef Py_INTERNAL_BACKOFF_H +#define Py_INTERNAL_BACKOFF_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#include +#include +#include + +/* 16-bit countdown counters using exponential backoff. + + These are used by the adaptive specializer to count down until + it is time to specialize an instruction. If specialization fails + the counter is reset using exponential backoff. + + Another use is for the Tier 2 optimizer to decide when to create + a new Tier 2 trace (executor). Again, exponential backoff is used. + + The 16-bit counter is structured as a 12-bit unsigned 'value' + and a 4-bit 'backoff' field. When resetting the counter, the + backoff field is incremented (until it reaches a limit) and the + value is set to a bit mask representing the value 2**backoff - 1. + The maximum backoff is 12 (the number of value bits). + + There is an exceptional value which must not be updated, 0xFFFF. +*/ + +typedef struct { + union { + uint16_t counter; + struct { + uint16_t value : 12; + uint16_t backoff : 4; + }; + }; +} backoff_counter_t; + +static_assert(sizeof(backoff_counter_t) == 2, "backoff counter size should be 2 bytes"); + +#define UNREACHABLE_BACKOFF_COUNTER ((backoff_counter_t){.counter = 0xFFFF}) + +static inline bool +is_unreachable_backoff_counter(backoff_counter_t counter) +{ + return counter.counter == 0xFFFF; +} + +static inline backoff_counter_t +make_backoff_counter(uint16_t value, uint16_t backoff) +{ + assert(backoff <= 12); + assert(value <= 0xFFF); + return (backoff_counter_t){.value = value, .backoff = backoff}; +} + +static inline backoff_counter_t +forge_backoff_counter(uint16_t counter) +{ + return (backoff_counter_t){.counter = counter}; +} + +static inline backoff_counter_t +reset_backoff_counter(backoff_counter_t counter) +{ + assert(!is_unreachable_backoff_counter(counter)); + if (counter.backoff < 12) { + return make_backoff_counter((1 << (counter.backoff + 1)) - 1, counter.backoff + 1); + } + else { + return make_backoff_counter((1 << 12) - 1, 12); + } +} + +static inline backoff_counter_t +increment_backoff_counter(backoff_counter_t counter) +{ + assert(!is_unreachable_backoff_counter(counter)); + assert(counter.value != 0xFFF); + return make_backoff_counter(counter.value + 1, counter.backoff); +} + +static inline backoff_counter_t +decrement_backoff_counter(backoff_counter_t counter) +{ + assert(!is_unreachable_backoff_counter(counter)); + assert(counter.value != 0); + return make_backoff_counter(counter.value - 1, counter.backoff); +} + +static inline bool +backoff_counter_is_zero(backoff_counter_t counter) +{ + return counter.value == 0; +} + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_BACKOFF_H */ diff --git a/Makefile.pre.in b/Makefile.pre.in index c454f31aae1e57..ea7b401a9a2898 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -1117,6 +1117,7 @@ PYTHON_HEADERS= \ $(srcdir)/Include/internal/pycore_ast.h \ $(srcdir)/Include/internal/pycore_ast_state.h \ $(srcdir)/Include/internal/pycore_atexit.h \ + $(srcdir)/Include/internal/pycore_backoff.h \ $(srcdir)/Include/internal/pycore_bitutils.h \ $(srcdir)/Include/internal/pycore_blocks_output_buffer.h \ $(srcdir)/Include/internal/pycore_brc.h \ diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj index c944bbafdba7e5..2298ca807366e4 100644 --- a/PCbuild/pythoncore.vcxproj +++ b/PCbuild/pythoncore.vcxproj @@ -204,6 +204,7 @@ + diff --git a/Python/bytecodes.c b/Python/bytecodes.c index be99e95f884361..608558d50f7308 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -8,6 +8,7 @@ #include "Python.h" #include "pycore_abstract.h" // _PyIndex_Check() +#include "pycore_backoff.h" #include "pycore_code.h" #include "pycore_emscripten_signal.h" // _Py_CHECK_EMSCRIPTEN_SIGNALS #include "pycore_function.h" diff --git a/Python/ceval.c b/Python/ceval.c index cd51011450c3d5..1eaf135421241b 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -4,6 +4,7 @@ #include "Python.h" #include "pycore_abstract.h" // _PyIndex_Check() +#include "pycore_backoff.h" #include "pycore_call.h" // _PyObject_CallNoArgs() #include "pycore_ceval.h" #include "pycore_code.h" diff --git a/Tools/jit/template.c b/Tools/jit/template.c index 9b4fc2af9671eb..deb216215cb4aa 100644 --- a/Tools/jit/template.c +++ b/Tools/jit/template.c @@ -1,5 +1,6 @@ #include "Python.h" +#include "pycore_backoff.h" #include "pycore_call.h" #include "pycore_ceval.h" #include "pycore_dict.h" From 7df0f10d9018f12cbb0f07b0efcc87193ccc9c77 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 26 Mar 2024 16:50:59 -0700 Subject: [PATCH 08/42] Reimplement adaptive counters in terms of backoff_counter --- Include/internal/pycore_code.h | 24 +++++------------------- Python/ceval_macros.h | 10 +++------- 2 files changed, 8 insertions(+), 26 deletions(-) diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index e004783ee48198..cf5c44b87ede50 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -448,18 +448,14 @@ write_location_entry_start(uint8_t *ptr, int code, int length) /** Counters * The first 16-bit value in each inline cache is a counter. - * When counting misses, the counter is treated as a simple unsigned value. * * When counting executions until the next specialization attempt, * exponential backoff is used to reduce the number of specialization failures. - * The high 12 bits store the counter, the low 4 bits store the backoff exponent. - * On a specialization failure, the backoff exponent is incremented and the - * counter set to (2**backoff - 1). - * Backoff == 6 -> starting counter == 63, backoff == 10 -> starting counter == 1023. + * See pycore_backoff.h for more details. + * On a specialization failure, the backoff counter is reset. */ -/* With a 16-bit counter, we have 12 bits for the counter value, and 4 bits for the backoff */ -#define ADAPTIVE_BACKOFF_BITS 4 +#include "pycore_backoff.h" // A value of 1 means that we attempt to specialize the *second* time each // instruction is executed. Executing twice is a much better indicator of @@ -477,13 +473,9 @@ write_location_entry_start(uint8_t *ptr, int code, int length) #define ADAPTIVE_COOLDOWN_VALUE 52 #define ADAPTIVE_COOLDOWN_BACKOFF 0 -#define MAX_BACKOFF_VALUE (16 - ADAPTIVE_BACKOFF_BITS) - - static inline uint16_t adaptive_counter_bits(uint16_t value, uint16_t backoff) { - return ((value << ADAPTIVE_BACKOFF_BITS) - | (backoff & ((1 << ADAPTIVE_BACKOFF_BITS) - 1))); + return make_backoff_counter(value, backoff).counter; } static inline uint16_t @@ -500,13 +492,7 @@ adaptive_counter_cooldown(void) { static inline uint16_t adaptive_counter_backoff(uint16_t counter) { - uint16_t backoff = counter & ((1 << ADAPTIVE_BACKOFF_BITS) - 1); - backoff++; - if (backoff > MAX_BACKOFF_VALUE) { - backoff = MAX_BACKOFF_VALUE; - } - uint16_t value = (uint16_t)(1 << backoff) - 1; - return adaptive_counter_bits(value, backoff); + return reset_backoff_counter(forge_backoff_counter(counter)).counter; } diff --git a/Python/ceval_macros.h b/Python/ceval_macros.h index f2536ed3602c69..d4264433ab887f 100644 --- a/Python/ceval_macros.h +++ b/Python/ceval_macros.h @@ -291,10 +291,7 @@ GETITEM(PyObject *v, Py_ssize_t i) { } #define ADAPTIVE_COUNTER_IS_ZERO(COUNTER) \ - (((COUNTER) >> ADAPTIVE_BACKOFF_BITS) == 0) - -#define ADAPTIVE_COUNTER_IS_MAX(COUNTER) \ - (((COUNTER) >> ADAPTIVE_BACKOFF_BITS) == ((1 << MAX_BACKOFF_VALUE) - 1)) + backoff_counter_is_zero(forge_backoff_counter((COUNTER))) #ifdef Py_GIL_DISABLED #define DECREMENT_ADAPTIVE_COUNTER(COUNTER) \ @@ -305,14 +302,13 @@ GETITEM(PyObject *v, Py_ssize_t i) { #else #define DECREMENT_ADAPTIVE_COUNTER(COUNTER) \ do { \ - assert(!ADAPTIVE_COUNTER_IS_ZERO((COUNTER))); \ - (COUNTER) -= (1 << ADAPTIVE_BACKOFF_BITS); \ + (COUNTER) = decrement_backoff_counter(forge_backoff_counter((COUNTER))).counter; \ } while (0); #endif #define INCREMENT_ADAPTIVE_COUNTER(COUNTER) \ do { \ - (COUNTER) += (1 << ADAPTIVE_BACKOFF_BITS); \ + (COUNTER) = increment_backoff_counter(forge_backoff_counter((COUNTER))).counter; \ } while (0); #define UNBOUNDLOCAL_ERROR_MSG \ From 925cae76144cf6c032fa3f9f365958443d8c2ade Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 26 Mar 2024 18:30:14 -0700 Subject: [PATCH 09/42] Redefine T2 temperature as a backoff counter --- Include/cpython/optimizer.h | 8 -------- Include/internal/pycore_backoff.h | 3 +++ Python/optimizer.c | 9 ++++++--- 3 files changed, 9 insertions(+), 11 deletions(-) diff --git a/Include/cpython/optimizer.h b/Include/cpython/optimizer.h index 3f7f56e68014df..3346511d756923 100644 --- a/Include/cpython/optimizer.h +++ b/Include/cpython/optimizer.h @@ -150,14 +150,6 @@ extern void _Py_Executors_InvalidateAll(PyInterpreterState *interp, int is_inval PyAPI_FUNC(PyObject *)PyUnstable_Optimizer_NewCounter(void); PyAPI_FUNC(PyObject *)PyUnstable_Optimizer_NewUOpOptimizer(void); -#define OPTIMIZER_BITS_IN_COUNTER 4 -/* Minimum of 16 additional executions before retry */ -#define MIN_TIER2_BACKOFF 4 -#define MAX_TIER2_BACKOFF (15 - OPTIMIZER_BITS_IN_COUNTER) -#define OPTIMIZER_BITS_MASK ((1 << OPTIMIZER_BITS_IN_COUNTER) - 1) -/* A value <= UINT16_MAX but large enough that when shifted is > UINT16_MAX */ -#define OPTIMIZER_UNREACHABLE_THRESHOLD UINT16_MAX - #define _Py_MAX_ALLOWED_BUILTINS_MODIFICATIONS 3 #define _Py_MAX_ALLOWED_GLOBALS_MODIFICATIONS 6 diff --git a/Include/internal/pycore_backoff.h b/Include/internal/pycore_backoff.h index 6d75e570655951..79af384c2ab1a6 100644 --- a/Include/internal/pycore_backoff.h +++ b/Include/internal/pycore_backoff.h @@ -45,6 +45,9 @@ static_assert(sizeof(backoff_counter_t) == 2, "backoff counter size should be 2 #define UNREACHABLE_BACKOFF_COUNTER ((backoff_counter_t){.counter = 0xFFFF}) +/* Alias used by optimizer */ +#define OPTIMIZER_UNREACHABLE_THRESHOLD UNREACHABLE_BACKOFF_COUNTER.counter + static inline bool is_unreachable_backoff_counter(backoff_counter_t counter) { diff --git a/Python/optimizer.c b/Python/optimizer.c index 69857b3d89fa09..29d9c9fa78885b 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -1,6 +1,7 @@ #include "Python.h" #include "opcode.h" #include "pycore_interp.h" +#include "pycore_backoff.h" #include "pycore_bitutils.h" // _Py_popcount32() #include "pycore_object.h" // _PyObject_GC_UNTRACK() #include "pycore_opcode_metadata.h" // _PyOpcode_OpName[] @@ -14,6 +15,9 @@ #include #include +/* Minimum of 16 additional executions before retry */ +#define MIN_TIER2_BACKOFF 4 + #define NEED_OPCODE_METADATA #include "pycore_uop_metadata.h" // Uop tables #undef NEED_OPCODE_METADATA @@ -136,7 +140,7 @@ shift_and_offset_threshold(uint32_t threshold) if (threshold == OPTIMIZER_UNREACHABLE_THRESHOLD) { return threshold; } - return adaptive_counter_bits(threshold, MIN_TIER2_BACKOFF); + return make_backoff_counter(threshold, MIN_TIER2_BACKOFF).counter; } _PyOptimizerObject * @@ -1108,8 +1112,7 @@ make_executor_from_uops(_PyUOpInstruction *buffer, int length, const _PyBloomFil assert(exit_count < COLD_EXIT_COUNT); for (int i = 0; i < exit_count; i++) { executor->exits[i].executor = &COLD_EXITS[i]; - executor->exits[i].temperature = - adaptive_counter_bits(interp->optimizer_side_threshold, MIN_TIER2_BACKOFF); + executor->exits[i].temperature = interp->optimizer_side_threshold; } int next_exit = exit_count-1; _PyUOpInstruction *dest = (_PyUOpInstruction *)&executor->trace[length]; From f0c7fb0943a661d0c3ecbca958b32f4b7e26514e Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 26 Mar 2024 18:31:07 -0700 Subject: [PATCH 10/42] Don't increment branch cache (bitmask) in INSTRUMENTED_INSTRUCTION --- Python/bytecodes.c | 4 ++-- Python/generated_cases.c.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 608558d50f7308..27c7c1b080d883 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -3971,8 +3971,8 @@ dummy_func( tstate, frame, this_instr); ERROR_IF(next_opcode < 0, error); next_instr = this_instr; - if (_PyOpcode_Caches[next_opcode]) { - INCREMENT_ADAPTIVE_COUNTER(this_instr[1].cache); + if (_PyOpcode_Caches[next_opcode] && !OPCODE_HAS_JUMP(next_opcode)) { + INCREMENT_ADAPTIVE_COUNTER(next_instr[1].cache); } assert(next_opcode > 0 && next_opcode < 256); opcode = next_opcode; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 55262379da50e2..cfd967fee7b5e2 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -3141,8 +3141,8 @@ tstate, frame, this_instr); if (next_opcode < 0) goto error; next_instr = this_instr; - if (_PyOpcode_Caches[next_opcode]) { - INCREMENT_ADAPTIVE_COUNTER(this_instr[1].cache); + if (_PyOpcode_Caches[next_opcode] && !OPCODE_HAS_JUMP(next_opcode)) { + INCREMENT_ADAPTIVE_COUNTER(next_instr[1].cache); } assert(next_opcode > 0 && next_opcode < 256); opcode = next_opcode; From 8f79a6061b78f2a03913578c94ec3896eb43b700 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 26 Mar 2024 18:53:01 -0700 Subject: [PATCH 11/42] Don't increment branch cache (bitmask) in INSTRUMENTED_LINE --- Python/ceval.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/ceval.c b/Python/ceval.c index 1eaf135421241b..545bca9f44780f 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -818,7 +818,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int if (next_instr != here) { DISPATCH(); } - if (_PyOpcode_Caches[original_opcode]) { + if (_PyOpcode_Caches[original_opcode] && !OPCODE_HAS_JUMP(original_opcode)) { _PyBinaryOpCache *cache = (_PyBinaryOpCache *)(next_instr+1); /* Prevent the underlying instruction from specializing * and overwriting the instrumentation. */ From 149e9c4ac71d7b884bdfa1de188bfd5d8d11ca1b Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 26 Mar 2024 18:51:32 -0700 Subject: [PATCH 12/42] Don't update unreachable counters --- Include/internal/pycore_backoff.h | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/Include/internal/pycore_backoff.h b/Include/internal/pycore_backoff.h index 79af384c2ab1a6..813424b6d7bfa0 100644 --- a/Include/internal/pycore_backoff.h +++ b/Include/internal/pycore_backoff.h @@ -83,17 +83,25 @@ reset_backoff_counter(backoff_counter_t counter) static inline backoff_counter_t increment_backoff_counter(backoff_counter_t counter) { - assert(!is_unreachable_backoff_counter(counter)); - assert(counter.value != 0xFFF); - return make_backoff_counter(counter.value + 1, counter.backoff); + if (!is_unreachable_backoff_counter(counter)) { + assert(counter.value != 0xFFF); + return make_backoff_counter(counter.value + 1, counter.backoff); + } + else { + return counter; + } } static inline backoff_counter_t decrement_backoff_counter(backoff_counter_t counter) { - assert(!is_unreachable_backoff_counter(counter)); - assert(counter.value != 0); - return make_backoff_counter(counter.value - 1, counter.backoff); + if (!is_unreachable_backoff_counter(counter)) { + assert(counter.value != 0); + return make_backoff_counter(counter.value - 1, counter.backoff); + } + else { + return counter; + } } static inline bool From 1d76112a6d62e213e0f89f53b98641062e9ec757 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 26 Mar 2024 19:03:28 -0700 Subject: [PATCH 13/42] Simplify dynamic counter initialization for JUMP_BACKWARD --- Python/bytecodes.c | 7 +------ Python/generated_cases.c.h | 7 +------ 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 27c7c1b080d883..bd1e45c2512f1a 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2352,12 +2352,7 @@ dummy_func( if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { if (counter == 0) { // Dynamically initialize the counter - PyInterpreterState *interp = tstate->interp; - if (interp->optimizer_backedge_threshold != OPTIMIZER_UNREACHABLE_THRESHOLD) { - counter = interp->optimizer_backedge_threshold; - assert(counter != 0); - this_instr[1].cache = counter; - } + this_instr[1].cache = tstate->interp->optimizer_backedge_threshold; } if (ADAPTIVE_COUNTER_IS_ZERO(counter) && this_instr->op.code == JUMP_BACKWARD) { _Py_CODEUNIT *start = this_instr; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index cfd967fee7b5e2..ebdd290061df2f 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -3419,12 +3419,7 @@ if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { if (counter == 0) { // Dynamically initialize the counter - PyInterpreterState *interp = tstate->interp; - if (interp->optimizer_backedge_threshold != OPTIMIZER_UNREACHABLE_THRESHOLD) { - counter = interp->optimizer_backedge_threshold; - assert(counter != 0); - this_instr[1].cache = counter; - } + this_instr[1].cache = tstate->interp->optimizer_backedge_threshold; } if (ADAPTIVE_COUNTER_IS_ZERO(counter) && this_instr->op.code == JUMP_BACKWARD) { _Py_CODEUNIT *start = this_instr; From a5ffe02cc5fe5057dd9f2c15af1d7df9d00789e0 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 26 Mar 2024 19:08:10 -0700 Subject: [PATCH 14/42] Revert "Don't increment branch cache (bitmask) in INSTRUMENTED_LINE" This reverts commit 8f79a6061b78f2a03913578c94ec3896eb43b700. --- Python/ceval.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/ceval.c b/Python/ceval.c index 545bca9f44780f..1eaf135421241b 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -818,7 +818,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int if (next_instr != here) { DISPATCH(); } - if (_PyOpcode_Caches[original_opcode] && !OPCODE_HAS_JUMP(original_opcode)) { + if (_PyOpcode_Caches[original_opcode]) { _PyBinaryOpCache *cache = (_PyBinaryOpCache *)(next_instr+1); /* Prevent the underlying instruction from specializing * and overwriting the instrumentation. */ From ce7726cb7431c6cf58c953943f55c1062ef0bcee Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 26 Mar 2024 19:40:08 -0700 Subject: [PATCH 15/42] Different approach to avoid incrementing bitmask in INSTRUMENTED_LINE --- Python/ceval.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Python/ceval.c b/Python/ceval.c index 1eaf135421241b..ab54708e6ffa78 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -819,6 +819,10 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int DISPATCH(); } if (_PyOpcode_Caches[original_opcode]) { + assert(original_opcode != POP_JUMP_IF_FALSE); + assert(original_opcode != POP_JUMP_IF_TRUE); + assert(original_opcode != POP_JUMP_IF_NONE); + assert(original_opcode != POP_JUMP_IF_NOT_NONE); _PyBinaryOpCache *cache = (_PyBinaryOpCache *)(next_instr+1); /* Prevent the underlying instruction from specializing * and overwriting the instrumentation. */ From e2c39f21b5977a1807e0b83de6f7a6ac035aad8b Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 26 Mar 2024 19:40:18 -0700 Subject: [PATCH 16/42] Different approach to avoid incrementing bitmask in INSTRUMENTED_INSTRUCTION --- Python/bytecodes.c | 9 +++++++-- Python/generated_cases.c.h | 9 +++++++-- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index bd1e45c2512f1a..8fb1783d055a3b 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -3966,8 +3966,13 @@ dummy_func( tstate, frame, this_instr); ERROR_IF(next_opcode < 0, error); next_instr = this_instr; - if (_PyOpcode_Caches[next_opcode] && !OPCODE_HAS_JUMP(next_opcode)) { - INCREMENT_ADAPTIVE_COUNTER(next_instr[1].cache); + if (_PyOpcode_Caches[next_opcode]) { + if (next_opcode != POP_JUMP_IF_FALSE && + next_opcode != POP_JUMP_IF_TRUE && + next_opcode != POP_JUMP_IF_NOT_NONE && + next_opcode != POP_JUMP_IF_NONE) { + INCREMENT_ADAPTIVE_COUNTER(next_instr[1].cache); + } } assert(next_opcode > 0 && next_opcode < 256); opcode = next_opcode; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index ebdd290061df2f..70c72606f0bb93 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -3141,8 +3141,13 @@ tstate, frame, this_instr); if (next_opcode < 0) goto error; next_instr = this_instr; - if (_PyOpcode_Caches[next_opcode] && !OPCODE_HAS_JUMP(next_opcode)) { - INCREMENT_ADAPTIVE_COUNTER(next_instr[1].cache); + if (_PyOpcode_Caches[next_opcode]) { + if (next_opcode != POP_JUMP_IF_FALSE && + next_opcode != POP_JUMP_IF_TRUE && + next_opcode != POP_JUMP_IF_NOT_NONE && + next_opcode != POP_JUMP_IF_NONE) { + INCREMENT_ADAPTIVE_COUNTER(next_instr[1].cache); + } } assert(next_opcode > 0 && next_opcode < 256); opcode = next_opcode; From 8d227904092f14fe14d4eadaa7c7c81b36a18bed Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 26 Mar 2024 19:49:23 -0700 Subject: [PATCH 17/42] Fix dynamic counter initialization for JUMP_BACKWARD --- Python/bytecodes.c | 3 ++- Python/generated_cases.c.h | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 8fb1783d055a3b..99cbac4cbf30ba 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2352,7 +2352,8 @@ dummy_func( if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { if (counter == 0) { // Dynamically initialize the counter - this_instr[1].cache = tstate->interp->optimizer_backedge_threshold; + counter = tstate->interp->optimizer_backedge_threshold; + this_instr[1].cache = counter; } if (ADAPTIVE_COUNTER_IS_ZERO(counter) && this_instr->op.code == JUMP_BACKWARD) { _Py_CODEUNIT *start = this_instr; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 70c72606f0bb93..32add8a95ce709 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -3424,7 +3424,8 @@ if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { if (counter == 0) { // Dynamically initialize the counter - this_instr[1].cache = tstate->interp->optimizer_backedge_threshold; + counter = tstate->interp->optimizer_backedge_threshold; + this_instr[1].cache = counter; } if (ADAPTIVE_COUNTER_IS_ZERO(counter) && this_instr->op.code == JUMP_BACKWARD) { _Py_CODEUNIT *start = this_instr; From cd8264e85df7e9ad05a8f47a993521c8b2fae9b2 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 26 Mar 2024 19:56:37 -0700 Subject: [PATCH 18/42] Get rid of (unused) resume_threshold --- Include/cpython/optimizer.h | 1 - Include/internal/pycore_interp.h | 1 - Python/optimizer.c | 5 ----- 3 files changed, 7 deletions(-) diff --git a/Include/cpython/optimizer.h b/Include/cpython/optimizer.h index 3346511d756923..b6b071f584037f 100644 --- a/Include/cpython/optimizer.h +++ b/Include/cpython/optimizer.h @@ -117,7 +117,6 @@ struct _PyOptimizerObject { optimize_func optimize; /* Initial values for adaptive-style counters */ uint16_t backedge_threshold; - uint16_t resume_threshold; uint16_t side_threshold; /* Data needed by the optimizer goes here, but is opaque to the VM */ }; diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h index 07f970c6509922..6ac75d96ca8b9e 100644 --- a/Include/internal/pycore_interp.h +++ b/Include/internal/pycore_interp.h @@ -241,7 +241,6 @@ struct _is { /* These three values are shifted and offset to speed up check in JUMP_BACKWARD */ uint32_t optimizer_backedge_threshold; - uint32_t optimizer_resume_threshold; uint16_t optimizer_side_threshold; _rare_events rare_events; diff --git a/Python/optimizer.c b/Python/optimizer.c index 29d9c9fa78885b..e23dbcea20c360 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -130,7 +130,6 @@ static _PyOptimizerObject _PyOptimizer_Default = { PyObject_HEAD_INIT(&_PyDefaultOptimizer_Type) .optimize = never_optimize, .backedge_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD, - .resume_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD, .side_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD, }; @@ -192,11 +191,9 @@ _Py_SetOptimizer(PyInterpreterState *interp, _PyOptimizerObject *optimizer) Py_INCREF(optimizer); interp->optimizer = optimizer; interp->optimizer_backedge_threshold = shift_and_offset_threshold(optimizer->backedge_threshold); - interp->optimizer_resume_threshold = shift_and_offset_threshold(optimizer->resume_threshold); interp->optimizer_side_threshold = shift_and_offset_threshold(optimizer->side_threshold); if (optimizer == &_PyOptimizer_Default) { assert(interp->optimizer_backedge_threshold == OPTIMIZER_UNREACHABLE_THRESHOLD); - assert(interp->optimizer_resume_threshold == OPTIMIZER_UNREACHABLE_THRESHOLD); assert(interp->optimizer_side_threshold == OPTIMIZER_UNREACHABLE_THRESHOLD); } return old; @@ -1297,7 +1294,6 @@ PyUnstable_Optimizer_NewUOpOptimizer(void) // Need a few iterations to settle specializations, // and to ammortize the cost of optimization. opt->backedge_threshold = 16; - opt->resume_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD; opt->side_threshold = 16; return (PyObject *)opt; } @@ -1389,7 +1385,6 @@ PyUnstable_Optimizer_NewCounter(void) } opt->base.optimize = counter_optimize; opt->base.backedge_threshold = 0; - opt->base.resume_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD; opt->base.side_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD; opt->count = 0; return (PyObject *)opt; From d72c2ef02e3573c7be8c34f5c3440d2b65cd7ae3 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 26 Mar 2024 21:17:58 -0700 Subject: [PATCH 19/42] Hopeful fix for non-clang builds `((backoff_counter_t){.counter = UNREACHABLE_BACKOFF})` is not a compile-time constant. --- Include/internal/pycore_backoff.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Include/internal/pycore_backoff.h b/Include/internal/pycore_backoff.h index 813424b6d7bfa0..22f084aa092c9d 100644 --- a/Include/internal/pycore_backoff.h +++ b/Include/internal/pycore_backoff.h @@ -43,10 +43,11 @@ typedef struct { static_assert(sizeof(backoff_counter_t) == 2, "backoff counter size should be 2 bytes"); -#define UNREACHABLE_BACKOFF_COUNTER ((backoff_counter_t){.counter = 0xFFFF}) +#define UNREACHABLE_BACKOFF 0xFFFF +#define UNREACHABLE_BACKOFF_COUNTER ((backoff_counter_t){.counter = UNREACHABLE_BACKOFF}) /* Alias used by optimizer */ -#define OPTIMIZER_UNREACHABLE_THRESHOLD UNREACHABLE_BACKOFF_COUNTER.counter +#define OPTIMIZER_UNREACHABLE_THRESHOLD UNREACHABLE_BACKOFF static inline bool is_unreachable_backoff_counter(backoff_counter_t counter) From f6bf19499021fd26101e88111ae7d5b032c5ea25 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 26 Mar 2024 22:01:20 -0700 Subject: [PATCH 20/42] In no-GIL mode, make INCREMENT_ADAPTIVE_COUNTER() a no-op --- Python/ceval_macros.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/Python/ceval_macros.h b/Python/ceval_macros.h index d4264433ab887f..5798a2ed4b0653 100644 --- a/Python/ceval_macros.h +++ b/Python/ceval_macros.h @@ -294,23 +294,29 @@ GETITEM(PyObject *v, Py_ssize_t i) { backoff_counter_is_zero(forge_backoff_counter((COUNTER))) #ifdef Py_GIL_DISABLED + #define DECREMENT_ADAPTIVE_COUNTER(COUNTER) \ do { \ /* gh-115999 tracks progress on addressing this. */ \ static_assert(0, "The specializing interpreter is not yet thread-safe"); \ } while (0); + +#define INCREMENT_ADAPTIVE_COUNTER(COUNTER) do { } while (0); /* Ditto */ + #else + #define DECREMENT_ADAPTIVE_COUNTER(COUNTER) \ do { \ (COUNTER) = decrement_backoff_counter(forge_backoff_counter((COUNTER))).counter; \ } while (0); -#endif #define INCREMENT_ADAPTIVE_COUNTER(COUNTER) \ do { \ (COUNTER) = increment_backoff_counter(forge_backoff_counter((COUNTER))).counter; \ } while (0); +#endif + #define UNBOUNDLOCAL_ERROR_MSG \ "cannot access local variable '%s' where it is not associated with a value" #define UNBOUNDFREE_ERROR_MSG \ From b9918439e34dfdeb6657d4819dcc65e62a402acc Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 27 Mar 2024 08:59:59 -0700 Subject: [PATCH 21/42] Revert "In no-GIL mode, make INCREMENT_ADAPTIVE_COUNTER() a no-op" I have a better way. This reverts commit f6bf19499021fd26101e88111ae7d5b032c5ea25. --- Python/ceval_macros.h | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/Python/ceval_macros.h b/Python/ceval_macros.h index 5798a2ed4b0653..d4264433ab887f 100644 --- a/Python/ceval_macros.h +++ b/Python/ceval_macros.h @@ -294,29 +294,23 @@ GETITEM(PyObject *v, Py_ssize_t i) { backoff_counter_is_zero(forge_backoff_counter((COUNTER))) #ifdef Py_GIL_DISABLED - #define DECREMENT_ADAPTIVE_COUNTER(COUNTER) \ do { \ /* gh-115999 tracks progress on addressing this. */ \ static_assert(0, "The specializing interpreter is not yet thread-safe"); \ } while (0); - -#define INCREMENT_ADAPTIVE_COUNTER(COUNTER) do { } while (0); /* Ditto */ - #else - #define DECREMENT_ADAPTIVE_COUNTER(COUNTER) \ do { \ (COUNTER) = decrement_backoff_counter(forge_backoff_counter((COUNTER))).counter; \ } while (0); +#endif #define INCREMENT_ADAPTIVE_COUNTER(COUNTER) \ do { \ (COUNTER) = increment_backoff_counter(forge_backoff_counter((COUNTER))).counter; \ } while (0); -#endif - #define UNBOUNDLOCAL_ERROR_MSG \ "cannot access local variable '%s' where it is not associated with a value" #define UNBOUNDFREE_ERROR_MSG \ From 9b9f26af29132545170cdb5ada71abf150fc3629 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 27 Mar 2024 10:29:19 -0700 Subject: [PATCH 22/42] Fix comment and fix size of optimizer_backedge_threshold --- Include/internal/pycore_interp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h index 6ac75d96ca8b9e..0736b5b36558a8 100644 --- a/Include/internal/pycore_interp.h +++ b/Include/internal/pycore_interp.h @@ -239,8 +239,8 @@ struct _is { _PyOptimizerObject *optimizer; _PyExecutorObject *executor_list_head; - /* These three values are shifted and offset to speed up check in JUMP_BACKWARD */ - uint32_t optimizer_backedge_threshold; + /* These values are shifted and offset to speed up check in JUMP_BACKWARD */ + uint16_t optimizer_backedge_threshold; uint16_t optimizer_side_threshold; _rare_events rare_events; From 354cd81de206167faf1b0267149ac84a82464d7c Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 27 Mar 2024 10:32:33 -0700 Subject: [PATCH 23/42] _COLD_EXIT is not conditional on ENABLE_SPECIALIZATION --- Python/bytecodes.c | 2 -- Python/executor_cases.c.h | 2 -- 2 files changed, 4 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 99cbac4cbf30ba..b683bd20feab3e 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -4161,7 +4161,6 @@ dummy_func( _PyExitData *exit = &previous->exits[oparg]; PyCodeObject *code = _PyFrame_GetCode(frame); _Py_CODEUNIT *target = _PyCode_CODE(code) + exit->target; - #if ENABLE_SPECIALIZATION if (ADAPTIVE_COUNTER_IS_ZERO(exit->temperature)) { _PyExecutorObject *executor; if (target->op.code == ENTER_EXECUTOR) { @@ -4187,7 +4186,6 @@ dummy_func( GOTO_TIER_TWO(executor); } DECREMENT_ADAPTIVE_COUNTER(exit->temperature); - #endif GOTO_TIER_ONE(target); } diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 5656bab2f5ddb1..fdbab782aff1af 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -3697,7 +3697,6 @@ _PyExitData *exit = &previous->exits[oparg]; PyCodeObject *code = _PyFrame_GetCode(frame); _Py_CODEUNIT *target = _PyCode_CODE(code) + exit->target; - #if ENABLE_SPECIALIZATION if (ADAPTIVE_COUNTER_IS_ZERO(exit->temperature)) { _PyExecutorObject *executor; if (target->op.code == ENTER_EXECUTOR) { @@ -3723,7 +3722,6 @@ GOTO_TIER_TWO(executor); } DECREMENT_ADAPTIVE_COUNTER(exit->temperature); - #endif GOTO_TIER_ONE(target); break; } From c1de44ff5403135049c1546d077475741ed74501 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 27 Mar 2024 10:43:07 -0700 Subject: [PATCH 24/42] Rewrite _COLD_EXIT using backoff_counter_t directly This also reverts the extra indent for the code I didn't touch. --- Python/bytecodes.c | 47 ++++++++++++++++++++------------------- Python/executor_cases.c.h | 47 ++++++++++++++++++++------------------- 2 files changed, 48 insertions(+), 46 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index b683bd20feab3e..81da71f5921f93 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -4161,32 +4161,33 @@ dummy_func( _PyExitData *exit = &previous->exits[oparg]; PyCodeObject *code = _PyFrame_GetCode(frame); _Py_CODEUNIT *target = _PyCode_CODE(code) + exit->target; - if (ADAPTIVE_COUNTER_IS_ZERO(exit->temperature)) { - _PyExecutorObject *executor; - if (target->op.code == ENTER_EXECUTOR) { - executor = code->co_executors->executors[target->op.arg]; - Py_INCREF(executor); - } - else { - int optimized = _PyOptimizer_Optimize(frame, target, stack_pointer, &executor); - if (optimized <= 0) { - exit->temperature = adaptive_counter_backoff(exit->temperature); - if (optimized < 0) { - Py_DECREF(previous); - tstate->previous_executor = Py_None; - GOTO_UNWIND(); - } - GOTO_TIER_ONE(target); + backoff_counter_t temperature = forge_backoff_counter(exit->temperature); + if (!backoff_counter_is_zero(temperature)) { + exit->temperature = decrement_backoff_counter(temperature).counter; + GOTO_TIER_ONE(target); + } + _PyExecutorObject *executor; + if (target->op.code == ENTER_EXECUTOR) { + executor = code->co_executors->executors[target->op.arg]; + Py_INCREF(executor); + } + else { + int optimized = _PyOptimizer_Optimize(frame, target, stack_pointer, &executor); + if (optimized <= 0) { + exit->temperature = reset_backoff_counter(temperature).counter; + if (optimized < 0) { + Py_DECREF(previous); + tstate->previous_executor = Py_None; + GOTO_UNWIND(); } + GOTO_TIER_ONE(target); } - /* We need two references. One to store in exit->executor and - * one to keep the executor alive when executing. */ - Py_INCREF(executor); - exit->executor = executor; - GOTO_TIER_TWO(executor); } - DECREMENT_ADAPTIVE_COUNTER(exit->temperature); - GOTO_TIER_ONE(target); + /* We need two references. One to store in exit->executor and + * one to keep the executor alive when executing. */ + Py_INCREF(executor); + exit->executor = executor; + GOTO_TIER_TWO(executor); } tier2 op(_START_EXECUTOR, (executor/4 --)) { diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index fdbab782aff1af..26cec6b23cc1ee 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -3697,32 +3697,33 @@ _PyExitData *exit = &previous->exits[oparg]; PyCodeObject *code = _PyFrame_GetCode(frame); _Py_CODEUNIT *target = _PyCode_CODE(code) + exit->target; - if (ADAPTIVE_COUNTER_IS_ZERO(exit->temperature)) { - _PyExecutorObject *executor; - if (target->op.code == ENTER_EXECUTOR) { - executor = code->co_executors->executors[target->op.arg]; - Py_INCREF(executor); - } - else { - int optimized = _PyOptimizer_Optimize(frame, target, stack_pointer, &executor); - if (optimized <= 0) { - exit->temperature = adaptive_counter_backoff(exit->temperature); - if (optimized < 0) { - Py_DECREF(previous); - tstate->previous_executor = Py_None; - GOTO_UNWIND(); - } - GOTO_TIER_ONE(target); + backoff_counter_t temperature = forge_backoff_counter(exit->temperature); + if (!backoff_counter_is_zero(temperature)) { + exit->temperature = decrement_backoff_counter(temperature).counter; + GOTO_TIER_ONE(target); + } + _PyExecutorObject *executor; + if (target->op.code == ENTER_EXECUTOR) { + executor = code->co_executors->executors[target->op.arg]; + Py_INCREF(executor); + } + else { + int optimized = _PyOptimizer_Optimize(frame, target, stack_pointer, &executor); + if (optimized <= 0) { + exit->temperature = reset_backoff_counter(temperature).counter; + if (optimized < 0) { + Py_DECREF(previous); + tstate->previous_executor = Py_None; + GOTO_UNWIND(); } + GOTO_TIER_ONE(target); } - /* We need two references. One to store in exit->executor and - * one to keep the executor alive when executing. */ - Py_INCREF(executor); - exit->executor = executor; - GOTO_TIER_TWO(executor); } - DECREMENT_ADAPTIVE_COUNTER(exit->temperature); - GOTO_TIER_ONE(target); + /* We need two references. One to store in exit->executor and + * one to keep the executor alive when executing. */ + Py_INCREF(executor); + exit->executor = executor; + GOTO_TIER_TWO(executor); break; } From 1fe27c90f4e669ff0e6cdc745f5e2e6e059d576c Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 27 Mar 2024 11:06:51 -0700 Subject: [PATCH 25/42] Rename increment,decrement to advance,pause Pause does `counter.value += 1` without any asserts. I left is_zero alone; what to call it? is_ripe? :-) There shouldn't be a need to check for POP_JUMP_IF, it could affect perf a bit but with instrumentation it's terrible anyways. --- Include/internal/pycore_backoff.h | 12 +++--------- Python/bytecodes.c | 13 ++++--------- Python/ceval.c | 6 +----- Python/ceval_macros.h | 6 +++--- Python/executor_cases.c.h | 2 +- Python/generated_cases.c.h | 11 +++-------- 6 files changed, 15 insertions(+), 35 deletions(-) diff --git a/Include/internal/pycore_backoff.h b/Include/internal/pycore_backoff.h index 22f084aa092c9d..3abe670a52e31b 100644 --- a/Include/internal/pycore_backoff.h +++ b/Include/internal/pycore_backoff.h @@ -82,19 +82,13 @@ reset_backoff_counter(backoff_counter_t counter) } static inline backoff_counter_t -increment_backoff_counter(backoff_counter_t counter) +pause_backoff_counter(backoff_counter_t counter) { - if (!is_unreachable_backoff_counter(counter)) { - assert(counter.value != 0xFFF); - return make_backoff_counter(counter.value + 1, counter.backoff); - } - else { - return counter; - } + return make_backoff_counter(counter.value | 1, counter.backoff); } static inline backoff_counter_t -decrement_backoff_counter(backoff_counter_t counter) +advance_backoff_counter(backoff_counter_t counter) { if (!is_unreachable_backoff_counter(counter)) { assert(counter.value != 0); diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 81da71f5921f93..5ccf68ff2ed529 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -1716,7 +1716,7 @@ dummy_func( inst(INSTRUMENTED_LOAD_SUPER_ATTR, (unused/1, unused, unused, unused -- unused, unused if (oparg & 1))) { // cancel out the decrement that will happen in LOAD_SUPER_ATTR; we // don't want to specialize instrumented instructions - INCREMENT_ADAPTIVE_COUNTER(this_instr[1].cache); + PAUSE_ADAPTIVE_COUNTER(this_instr[1].cache); GO_TO_INSTRUCTION(LOAD_SUPER_ATTR); } @@ -3005,7 +3005,7 @@ dummy_func( tstate, PY_MONITORING_EVENT_CALL, frame, this_instr, function, arg); ERROR_IF(err, error); - INCREMENT_ADAPTIVE_COUNTER(this_instr[1].cache); + PAUSE_ADAPTIVE_COUNTER(this_instr[1].cache); GO_TO_INSTRUCTION(CALL); } @@ -3968,12 +3968,7 @@ dummy_func( ERROR_IF(next_opcode < 0, error); next_instr = this_instr; if (_PyOpcode_Caches[next_opcode]) { - if (next_opcode != POP_JUMP_IF_FALSE && - next_opcode != POP_JUMP_IF_TRUE && - next_opcode != POP_JUMP_IF_NOT_NONE && - next_opcode != POP_JUMP_IF_NONE) { - INCREMENT_ADAPTIVE_COUNTER(next_instr[1].cache); - } + PAUSE_ADAPTIVE_COUNTER(next_instr[1].cache); } assert(next_opcode > 0 && next_opcode < 256); opcode = next_opcode; @@ -4163,7 +4158,7 @@ dummy_func( _Py_CODEUNIT *target = _PyCode_CODE(code) + exit->target; backoff_counter_t temperature = forge_backoff_counter(exit->temperature); if (!backoff_counter_is_zero(temperature)) { - exit->temperature = decrement_backoff_counter(temperature).counter; + exit->temperature = advance_backoff_counter(temperature).counter; GOTO_TIER_ONE(target); } _PyExecutorObject *executor; diff --git a/Python/ceval.c b/Python/ceval.c index ab54708e6ffa78..5827a97903a3d0 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -819,14 +819,10 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int DISPATCH(); } if (_PyOpcode_Caches[original_opcode]) { - assert(original_opcode != POP_JUMP_IF_FALSE); - assert(original_opcode != POP_JUMP_IF_TRUE); - assert(original_opcode != POP_JUMP_IF_NONE); - assert(original_opcode != POP_JUMP_IF_NOT_NONE); _PyBinaryOpCache *cache = (_PyBinaryOpCache *)(next_instr+1); /* Prevent the underlying instruction from specializing * and overwriting the instrumentation. */ - INCREMENT_ADAPTIVE_COUNTER(cache->counter); + PAUSE_ADAPTIVE_COUNTER(cache->counter); } opcode = original_opcode; DISPATCH_GOTO(); diff --git a/Python/ceval_macros.h b/Python/ceval_macros.h index d4264433ab887f..604f9d956dae12 100644 --- a/Python/ceval_macros.h +++ b/Python/ceval_macros.h @@ -302,13 +302,13 @@ GETITEM(PyObject *v, Py_ssize_t i) { #else #define DECREMENT_ADAPTIVE_COUNTER(COUNTER) \ do { \ - (COUNTER) = decrement_backoff_counter(forge_backoff_counter((COUNTER))).counter; \ + (COUNTER) = advance_backoff_counter(forge_backoff_counter((COUNTER))).counter; \ } while (0); #endif -#define INCREMENT_ADAPTIVE_COUNTER(COUNTER) \ +#define PAUSE_ADAPTIVE_COUNTER(COUNTER) \ do { \ - (COUNTER) = increment_backoff_counter(forge_backoff_counter((COUNTER))).counter; \ + (COUNTER) = pause_backoff_counter(forge_backoff_counter((COUNTER))).counter; \ } while (0); #define UNBOUNDLOCAL_ERROR_MSG \ diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 26cec6b23cc1ee..a7aa5a9f7ceb6e 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -3699,7 +3699,7 @@ _Py_CODEUNIT *target = _PyCode_CODE(code) + exit->target; backoff_counter_t temperature = forge_backoff_counter(exit->temperature); if (!backoff_counter_is_zero(temperature)) { - exit->temperature = decrement_backoff_counter(temperature).counter; + exit->temperature = advance_backoff_counter(temperature).counter; GOTO_TIER_ONE(target); } _PyExecutorObject *executor; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 32add8a95ce709..fc9921d93741b7 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -3026,7 +3026,7 @@ tstate, PY_MONITORING_EVENT_CALL, frame, this_instr, function, arg); if (err) goto error; - INCREMENT_ADAPTIVE_COUNTER(this_instr[1].cache); + PAUSE_ADAPTIVE_COUNTER(this_instr[1].cache); GO_TO_INSTRUCTION(CALL); } @@ -3142,12 +3142,7 @@ if (next_opcode < 0) goto error; next_instr = this_instr; if (_PyOpcode_Caches[next_opcode]) { - if (next_opcode != POP_JUMP_IF_FALSE && - next_opcode != POP_JUMP_IF_TRUE && - next_opcode != POP_JUMP_IF_NOT_NONE && - next_opcode != POP_JUMP_IF_NONE) { - INCREMENT_ADAPTIVE_COUNTER(next_instr[1].cache); - } + PAUSE_ADAPTIVE_COUNTER(next_instr[1].cache); } assert(next_opcode > 0 && next_opcode < 256); opcode = next_opcode; @@ -3182,7 +3177,7 @@ /* Skip 1 cache entry */ // cancel out the decrement that will happen in LOAD_SUPER_ATTR; we // don't want to specialize instrumented instructions - INCREMENT_ADAPTIVE_COUNTER(this_instr[1].cache); + PAUSE_ADAPTIVE_COUNTER(this_instr[1].cache); GO_TO_INSTRUCTION(LOAD_SUPER_ATTR); } From 225ea17c8a6cff1d61a775f3c7440886869915b9 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 27 Mar 2024 11:16:03 -0700 Subject: [PATCH 26/42] Give up on restricting backoff to <= 12 --- Include/internal/pycore_backoff.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Include/internal/pycore_backoff.h b/Include/internal/pycore_backoff.h index 3abe670a52e31b..d0728ff959c52f 100644 --- a/Include/internal/pycore_backoff.h +++ b/Include/internal/pycore_backoff.h @@ -58,7 +58,7 @@ is_unreachable_backoff_counter(backoff_counter_t counter) static inline backoff_counter_t make_backoff_counter(uint16_t value, uint16_t backoff) { - assert(backoff <= 12); + assert(backoff <= 15); assert(value <= 0xFFF); return (backoff_counter_t){.value = value, .backoff = backoff}; } From 63d8bc7fafa48ddb836a5694394aa35bf8b99f3d Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Fri, 29 Mar 2024 08:29:22 -0700 Subject: [PATCH 27/42] Rip out backoff thresholds --- Include/cpython/optimizer.h | 3 --- Include/internal/pycore_interp.h | 4 ---- Python/bytecodes.c | 39 +++++++++++++------------------- Python/generated_cases.c.h | 39 +++++++++++++------------------- Python/optimizer.c | 16 +------------ Python/specialize.c | 3 ++- 6 files changed, 35 insertions(+), 69 deletions(-) diff --git a/Include/cpython/optimizer.h b/Include/cpython/optimizer.h index b6b071f584037f..2665dd385d32d5 100644 --- a/Include/cpython/optimizer.h +++ b/Include/cpython/optimizer.h @@ -115,9 +115,6 @@ typedef int (*optimize_func)( struct _PyOptimizerObject { PyObject_HEAD optimize_func optimize; - /* Initial values for adaptive-style counters */ - uint16_t backedge_threshold; - uint16_t side_threshold; /* Data needed by the optimizer goes here, but is opaque to the VM */ }; diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h index 0736b5b36558a8..b5cea863ff35dc 100644 --- a/Include/internal/pycore_interp.h +++ b/Include/internal/pycore_interp.h @@ -239,10 +239,6 @@ struct _is { _PyOptimizerObject *optimizer; _PyExecutorObject *executor_list_head; - /* These values are shifted and offset to speed up check in JUMP_BACKWARD */ - uint16_t optimizer_backedge_threshold; - uint16_t optimizer_side_threshold; - _rare_events rare_events; PyDict_WatchCallback builtins_dict_watcher; diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 5ccf68ff2ed529..5171a03a481c4d 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2349,30 +2349,23 @@ dummy_func( JUMPBY(-oparg); #if ENABLE_SPECIALIZATION uint16_t counter = this_instr[1].cache; - if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { - if (counter == 0) { - // Dynamically initialize the counter - counter = tstate->interp->optimizer_backedge_threshold; - this_instr[1].cache = counter; + if (ADAPTIVE_COUNTER_IS_ZERO(counter) && this_instr->op.code == JUMP_BACKWARD) { + _Py_CODEUNIT *start = this_instr; + /* Back up over EXTENDED_ARGs so optimizer sees the whole instruction */ + while (oparg > 255) { + oparg >>= 8; + start--; } - if (ADAPTIVE_COUNTER_IS_ZERO(counter) && this_instr->op.code == JUMP_BACKWARD) { - _Py_CODEUNIT *start = this_instr; - /* Back up over EXTENDED_ARGs so optimizer sees the whole instruction */ - while (oparg > 255) { - oparg >>= 8; - start--; - } - _PyExecutorObject *executor; - int optimized = _PyOptimizer_Optimize(frame, start, stack_pointer, &executor); - ERROR_IF(optimized < 0, error); - if (optimized) { - assert(tstate->previous_executor == NULL); - tstate->previous_executor = Py_None; - GOTO_TIER_TWO(executor); - } - else { - this_instr[1].cache = adaptive_counter_backoff(counter); - } + _PyExecutorObject *executor; + int optimized = _PyOptimizer_Optimize(frame, start, stack_pointer, &executor); + ERROR_IF(optimized < 0, error); + if (optimized) { + assert(tstate->previous_executor == NULL); + tstate->previous_executor = Py_None; + GOTO_TIER_TWO(executor); + } + else { + this_instr[1].cache = adaptive_counter_backoff(counter); } } else { diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index fc9921d93741b7..dab5ea891410b2 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -3416,30 +3416,23 @@ JUMPBY(-oparg); #if ENABLE_SPECIALIZATION uint16_t counter = this_instr[1].cache; - if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { - if (counter == 0) { - // Dynamically initialize the counter - counter = tstate->interp->optimizer_backedge_threshold; - this_instr[1].cache = counter; + if (ADAPTIVE_COUNTER_IS_ZERO(counter) && this_instr->op.code == JUMP_BACKWARD) { + _Py_CODEUNIT *start = this_instr; + /* Back up over EXTENDED_ARGs so optimizer sees the whole instruction */ + while (oparg > 255) { + oparg >>= 8; + start--; } - if (ADAPTIVE_COUNTER_IS_ZERO(counter) && this_instr->op.code == JUMP_BACKWARD) { - _Py_CODEUNIT *start = this_instr; - /* Back up over EXTENDED_ARGs so optimizer sees the whole instruction */ - while (oparg > 255) { - oparg >>= 8; - start--; - } - _PyExecutorObject *executor; - int optimized = _PyOptimizer_Optimize(frame, start, stack_pointer, &executor); - if (optimized < 0) goto error; - if (optimized) { - assert(tstate->previous_executor == NULL); - tstate->previous_executor = Py_None; - GOTO_TIER_TWO(executor); - } - else { - this_instr[1].cache = adaptive_counter_backoff(counter); - } + _PyExecutorObject *executor; + int optimized = _PyOptimizer_Optimize(frame, start, stack_pointer, &executor); + if (optimized < 0) goto error; + if (optimized) { + assert(tstate->previous_executor == NULL); + tstate->previous_executor = Py_None; + GOTO_TIER_TWO(executor); + } + else { + this_instr[1].cache = adaptive_counter_backoff(counter); } } else { diff --git a/Python/optimizer.c b/Python/optimizer.c index e23dbcea20c360..06d1cae1e0a5e7 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -129,8 +129,6 @@ PyTypeObject _PyDefaultOptimizer_Type = { static _PyOptimizerObject _PyOptimizer_Default = { PyObject_HEAD_INIT(&_PyDefaultOptimizer_Type) .optimize = never_optimize, - .backedge_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD, - .side_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD, }; static uint32_t @@ -190,12 +188,6 @@ _Py_SetOptimizer(PyInterpreterState *interp, _PyOptimizerObject *optimizer) } Py_INCREF(optimizer); interp->optimizer = optimizer; - interp->optimizer_backedge_threshold = shift_and_offset_threshold(optimizer->backedge_threshold); - interp->optimizer_side_threshold = shift_and_offset_threshold(optimizer->side_threshold); - if (optimizer == &_PyOptimizer_Default) { - assert(interp->optimizer_backedge_threshold == OPTIMIZER_UNREACHABLE_THRESHOLD); - assert(interp->optimizer_side_threshold == OPTIMIZER_UNREACHABLE_THRESHOLD); - } return old; } @@ -1109,7 +1101,7 @@ make_executor_from_uops(_PyUOpInstruction *buffer, int length, const _PyBloomFil assert(exit_count < COLD_EXIT_COUNT); for (int i = 0; i < exit_count; i++) { executor->exits[i].executor = &COLD_EXITS[i]; - executor->exits[i].temperature = interp->optimizer_side_threshold; + executor->exits[i].temperature = UNREACHABLE_BACKOFF; } int next_exit = exit_count-1; _PyUOpInstruction *dest = (_PyUOpInstruction *)&executor->trace[length]; @@ -1291,10 +1283,6 @@ PyUnstable_Optimizer_NewUOpOptimizer(void) return NULL; } opt->optimize = uop_optimize; - // Need a few iterations to settle specializations, - // and to ammortize the cost of optimization. - opt->backedge_threshold = 16; - opt->side_threshold = 16; return (PyObject *)opt; } @@ -1384,8 +1372,6 @@ PyUnstable_Optimizer_NewCounter(void) return NULL; } opt->base.optimize = counter_optimize; - opt->base.backedge_threshold = 0; - opt->base.side_threshold = OPTIMIZER_UNREACHABLE_THRESHOLD; opt->count = 0; return (PyObject *)opt; } diff --git a/Python/specialize.c b/Python/specialize.c index c1edf8842faf68..ae7095f822af93 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -423,7 +423,8 @@ _PyCode_Quicken(PyCodeObject *code) int initial_value; switch (opcode) { case JUMP_BACKWARD: - initial_value = 0; + // Backoff sequence 16, 32, 64, ..., 4096 + initial_value = make_backoff_counter(16, 4).counter; break; case POP_JUMP_IF_FALSE: case POP_JUMP_IF_TRUE: From 8aa2c75d5627ae06b668d2f96afa768dec4e64e0 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Fri, 29 Mar 2024 08:32:37 -0700 Subject: [PATCH 28/42] Fix tests --- Lib/test/test_capi/test_opt.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index b59f4b74a8593e..80deb4a1981d00 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -69,7 +69,8 @@ def loop(): self.assertEqual(opt.get_count(), 0) with clear_executors(loop): loop() - self.assertEqual(opt.get_count(), 1000) + # Subtract 16 because optimizer doesn't kick in until 16 + self.assertEqual(opt.get_count(), 1000 - 16) def test_long_loop(self): "Check that we aren't confused by EXTENDED_ARG" @@ -81,7 +82,7 @@ def nop(): pass def long_loop(): - for _ in range(10): + for _ in range(20): nop(); nop(); nop(); nop(); nop(); nop(); nop(); nop(); nop(); nop(); nop(); nop(); nop(); nop(); nop(); nop(); nop(); nop(); nop(); nop(); nop(); nop(); nop(); nop(); @@ -96,7 +97,7 @@ def long_loop(): with temporary_optimizer(opt): self.assertEqual(opt.get_count(), 0) long_loop() - self.assertEqual(opt.get_count(), 10) + self.assertEqual(opt.get_count(), 20 - 16) # Need 16 iterations to warm up def test_code_restore_for_ENTER_EXECUTOR(self): def testfunc(x): From 7bd4daad8ed324f9a995616e35d57a8971952f74 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Fri, 29 Mar 2024 11:00:17 -0700 Subject: [PATCH 29/42] Fix initial temperature, clean up --- Include/internal/pycore_backoff.h | 13 ++++++++----- Python/optimizer.c | 16 ++-------------- Python/specialize.c | 3 +-- 3 files changed, 11 insertions(+), 21 deletions(-) diff --git a/Include/internal/pycore_backoff.h b/Include/internal/pycore_backoff.h index d0728ff959c52f..c3a2e36ecaeed1 100644 --- a/Include/internal/pycore_backoff.h +++ b/Include/internal/pycore_backoff.h @@ -44,15 +44,11 @@ typedef struct { static_assert(sizeof(backoff_counter_t) == 2, "backoff counter size should be 2 bytes"); #define UNREACHABLE_BACKOFF 0xFFFF -#define UNREACHABLE_BACKOFF_COUNTER ((backoff_counter_t){.counter = UNREACHABLE_BACKOFF}) - -/* Alias used by optimizer */ -#define OPTIMIZER_UNREACHABLE_THRESHOLD UNREACHABLE_BACKOFF static inline bool is_unreachable_backoff_counter(backoff_counter_t counter) { - return counter.counter == 0xFFFF; + return counter.counter == UNREACHABLE_BACKOFF; } static inline backoff_counter_t @@ -105,6 +101,13 @@ backoff_counter_is_zero(backoff_counter_t counter) return counter.value == 0; } +static inline uint16_t +initial_backoff_counter(void) +{ + // Backoff sequence 16, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096 + return make_backoff_counter(16, 3).counter; +} + #ifdef __cplusplus } #endif diff --git a/Python/optimizer.c b/Python/optimizer.c index 06d1cae1e0a5e7..bf3a368de10b33 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -15,9 +15,6 @@ #include #include -/* Minimum of 16 additional executions before retry */ -#define MIN_TIER2_BACKOFF 4 - #define NEED_OPCODE_METADATA #include "pycore_uop_metadata.h" // Uop tables #undef NEED_OPCODE_METADATA @@ -131,15 +128,6 @@ static _PyOptimizerObject _PyOptimizer_Default = { .optimize = never_optimize, }; -static uint32_t -shift_and_offset_threshold(uint32_t threshold) -{ - if (threshold == OPTIMIZER_UNREACHABLE_THRESHOLD) { - return threshold; - } - return make_backoff_counter(threshold, MIN_TIER2_BACKOFF).counter; -} - _PyOptimizerObject * PyUnstable_GetOptimizer(void) { @@ -1101,7 +1089,7 @@ make_executor_from_uops(_PyUOpInstruction *buffer, int length, const _PyBloomFil assert(exit_count < COLD_EXIT_COUNT); for (int i = 0; i < exit_count; i++) { executor->exits[i].executor = &COLD_EXITS[i]; - executor->exits[i].temperature = UNREACHABLE_BACKOFF; + executor->exits[i].temperature = initial_backoff_counter(); } int next_exit = exit_count-1; _PyUOpInstruction *dest = (_PyUOpInstruction *)&executor->trace[length]; @@ -1538,7 +1526,7 @@ _Py_ExecutorClear(_PyExecutorObject *executor) for (uint32_t i = 0; i < executor->exit_count; i++) { Py_DECREF(executor->exits[i].executor); executor->exits[i].executor = &COLD_EXITS[i]; - executor->exits[i].temperature = OPTIMIZER_UNREACHABLE_THRESHOLD; + executor->exits[i].temperature = UNREACHABLE_BACKOFF; } _Py_CODEUNIT *instruction = &_PyCode_CODE(code)[executor->vm_data.index]; assert(instruction->op.code == ENTER_EXECUTOR); diff --git a/Python/specialize.c b/Python/specialize.c index ae7095f822af93..22339b070e905f 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -423,8 +423,7 @@ _PyCode_Quicken(PyCodeObject *code) int initial_value; switch (opcode) { case JUMP_BACKWARD: - // Backoff sequence 16, 32, 64, ..., 4096 - initial_value = make_backoff_counter(16, 4).counter; + initial_value = initial_backoff_counter(); break; case POP_JUMP_IF_FALSE: case POP_JUMP_IF_TRUE: From 8ce80686b04b9f27ff0f60a918c9239fb2baddc8 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Fri, 29 Mar 2024 13:07:38 -0700 Subject: [PATCH 30/42] Remove unused variable --- Python/optimizer.c | 1 - 1 file changed, 1 deletion(-) diff --git a/Python/optimizer.c b/Python/optimizer.c index bf3a368de10b33..f5fdcd7adcfa3c 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -1085,7 +1085,6 @@ make_executor_from_uops(_PyUOpInstruction *buffer, int length, const _PyBloomFil } /* Initialize exits */ - PyInterpreterState *interp = _PyInterpreterState_GET(); assert(exit_count < COLD_EXIT_COUNT); for (int i = 0; i < exit_count; i++) { executor->exits[i].executor = &COLD_EXITS[i]; From 7bb5618500128ad729a8baf3ac95c604ce7312bc Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Fri, 29 Mar 2024 13:30:17 -0700 Subject: [PATCH 31/42] Admit defeat: advance_backoff_counter() may be entered when value == 0 This caused test_deadlock() in test_importlib to occasionally crash. --- Include/internal/pycore_backoff.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Include/internal/pycore_backoff.h b/Include/internal/pycore_backoff.h index c3a2e36ecaeed1..8249c4ddd6d787 100644 --- a/Include/internal/pycore_backoff.h +++ b/Include/internal/pycore_backoff.h @@ -87,8 +87,7 @@ static inline backoff_counter_t advance_backoff_counter(backoff_counter_t counter) { if (!is_unreachable_backoff_counter(counter)) { - assert(counter.value != 0); - return make_backoff_counter(counter.value - 1, counter.backoff); + return make_backoff_counter((counter.value - 1) & 0xFFF, counter.backoff); } else { return counter; From 8eee1b40bc771c935ec62e860b266e0aa0267cad Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 2 Apr 2024 07:39:39 -0700 Subject: [PATCH 32/42] Put backoff field before value --- Include/internal/pycore_backoff.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Include/internal/pycore_backoff.h b/Include/internal/pycore_backoff.h index 8249c4ddd6d787..ab0faee52c8b7e 100644 --- a/Include/internal/pycore_backoff.h +++ b/Include/internal/pycore_backoff.h @@ -35,8 +35,8 @@ typedef struct { union { uint16_t counter; struct { - uint16_t value : 12; uint16_t backoff : 4; + uint16_t value : 12; }; }; } backoff_counter_t; From 42c1f268cbbe8faa49405f98b6dda6064abbd848 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 3 Apr 2024 11:05:13 -0700 Subject: [PATCH 33/42] Small cleanup in .h files --- Include/internal/pycore_backoff.h | 3 ++- Include/internal/pycore_code.h | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/Include/internal/pycore_backoff.h b/Include/internal/pycore_backoff.h index ab0faee52c8b7e..a314b161083f7e 100644 --- a/Include/internal/pycore_backoff.h +++ b/Include/internal/pycore_backoff.h @@ -41,7 +41,8 @@ typedef struct { }; } backoff_counter_t; -static_assert(sizeof(backoff_counter_t) == 2, "backoff counter size should be 2 bytes"); +static_assert(sizeof(backoff_counter_t) == sizeof(_Py_CODEUNIT), + "backoff counter size should be the same size as a code unit"); #define UNREACHABLE_BACKOFF 0xFFFF diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index 936415e2b66d12..2d7cc7481f1d5b 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -455,7 +455,7 @@ write_location_entry_start(uint8_t *ptr, int code, int length) * When counting executions until the next specialization attempt, * exponential backoff is used to reduce the number of specialization failures. * See pycore_backoff.h for more details. - * On a specialization failure, the backoff counter is reset. + * On a specialization failure, the backoff counter is restarted. */ #include "pycore_backoff.h" From a80cd0a5e00d7bc0a83805cc84afa95a558152b9 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 3 Apr 2024 11:06:29 -0700 Subject: [PATCH 34/42] Rename DECREMENT_ADAPTIVE_COUNTER to ADVANCE_... --- Python/bytecodes.c | 30 +++++++++++++++--------------- Python/ceval_macros.h | 14 +++++++------- Python/generated_cases.c.h | 30 +++++++++++++++--------------- 3 files changed, 37 insertions(+), 37 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index d3217b4298e73a..9dd6e8dfb65e29 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -333,7 +333,7 @@ dummy_func( DISPATCH_SAME_OPARG(); } STAT_INC(TO_BOOL, deferred); - DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].cache); #endif /* ENABLE_SPECIALIZATION */ } @@ -558,7 +558,7 @@ dummy_func( DISPATCH_SAME_OPARG(); } STAT_INC(BINARY_SUBSCR, deferred); - DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].cache); #endif /* ENABLE_SPECIALIZATION */ } @@ -705,7 +705,7 @@ dummy_func( DISPATCH_SAME_OPARG(); } STAT_INC(STORE_SUBSCR, deferred); - DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].cache); #endif /* ENABLE_SPECIALIZATION */ } @@ -989,7 +989,7 @@ dummy_func( DISPATCH_SAME_OPARG(); } STAT_INC(SEND, deferred); - DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].cache); #endif /* ENABLE_SPECIALIZATION */ } @@ -1218,7 +1218,7 @@ dummy_func( DISPATCH_SAME_OPARG(); } STAT_INC(UNPACK_SEQUENCE, deferred); - DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].cache); #endif /* ENABLE_SPECIALIZATION */ (void)seq; (void)counter; @@ -1288,7 +1288,7 @@ dummy_func( DISPATCH_SAME_OPARG(); } STAT_INC(STORE_ATTR, deferred); - DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].cache); #endif /* ENABLE_SPECIALIZATION */ } @@ -1406,7 +1406,7 @@ dummy_func( DISPATCH_SAME_OPARG(); } STAT_INC(LOAD_GLOBAL, deferred); - DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].cache); #endif /* ENABLE_SPECIALIZATION */ } @@ -1730,7 +1730,7 @@ dummy_func( DISPATCH_SAME_OPARG(); } STAT_INC(LOAD_SUPER_ATTR, deferred); - DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].cache); #endif /* ENABLE_SPECIALIZATION */ } @@ -1844,7 +1844,7 @@ dummy_func( DISPATCH_SAME_OPARG(); } STAT_INC(LOAD_ATTR, deferred); - DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].cache); #endif /* ENABLE_SPECIALIZATION */ } @@ -2164,7 +2164,7 @@ dummy_func( DISPATCH_SAME_OPARG(); } STAT_INC(COMPARE_OP, deferred); - DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].cache); #endif /* ENABLE_SPECIALIZATION */ } @@ -2261,7 +2261,7 @@ dummy_func( DISPATCH_SAME_OPARG(); } STAT_INC(CONTAINS_OP, deferred); - DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].cache); #endif /* ENABLE_SPECIALIZATION */ } @@ -2362,7 +2362,7 @@ dummy_func( } } else { - DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].cache); } #endif /* ENABLE_SPECIALIZATION */ } @@ -2529,7 +2529,7 @@ dummy_func( DISPATCH_SAME_OPARG(); } STAT_INC(FOR_ITER, deferred); - DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].cache); #endif /* ENABLE_SPECIALIZATION */ } @@ -3024,7 +3024,7 @@ dummy_func( DISPATCH_SAME_OPARG(); } STAT_INC(CALL, deferred); - DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].cache); #endif /* ENABLE_SPECIALIZATION */ } @@ -3927,7 +3927,7 @@ dummy_func( DISPATCH_SAME_OPARG(); } STAT_INC(BINARY_OP, deferred); - DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].cache); #endif /* ENABLE_SPECIALIZATION */ assert(NB_ADD <= oparg); assert(oparg <= NB_INPLACE_XOR); diff --git a/Python/ceval_macros.h b/Python/ceval_macros.h index 92182f97ef9a81..8b9b65d61bf6b2 100644 --- a/Python/ceval_macros.h +++ b/Python/ceval_macros.h @@ -294,20 +294,20 @@ GETITEM(PyObject *v, Py_ssize_t i) { backoff_counter_is_zero(forge_backoff_counter((COUNTER))) #ifdef Py_GIL_DISABLED -#define DECREMENT_ADAPTIVE_COUNTER(COUNTER) \ - do { \ - /* gh-115999 tracks progress on addressing this. */ \ +#define ADVANCE_ADAPTIVE_COUNTER(COUNTER) \ + do { \ + /* gh-115999 tracks progress on addressing this. */ \ static_assert(0, "The specializing interpreter is not yet thread-safe"); \ } while (0); #else -#define DECREMENT_ADAPTIVE_COUNTER(COUNTER) \ - do { \ +#define ADVANCE_ADAPTIVE_COUNTER(COUNTER) \ + do { \ (COUNTER) = advance_backoff_counter(forge_backoff_counter((COUNTER))).counter; \ } while (0); #endif -#define PAUSE_ADAPTIVE_COUNTER(COUNTER) \ - do { \ +#define PAUSE_ADAPTIVE_COUNTER(COUNTER) \ + do { \ (COUNTER) = pause_backoff_counter(forge_backoff_counter((COUNTER))).counter; \ } while (0); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 63f431d8d9c5eb..229f782868b2ee 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -121,7 +121,7 @@ DISPATCH_SAME_OPARG(); } STAT_INC(BINARY_OP, deferred); - DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].cache); #endif /* ENABLE_SPECIALIZATION */ assert(NB_ADD <= oparg); assert(oparg <= NB_INPLACE_XOR); @@ -438,7 +438,7 @@ DISPATCH_SAME_OPARG(); } STAT_INC(BINARY_SUBSCR, deferred); - DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].cache); #endif /* ENABLE_SPECIALIZATION */ } // _BINARY_SUBSCR @@ -766,7 +766,7 @@ DISPATCH_SAME_OPARG(); } STAT_INC(CALL, deferred); - DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].cache); #endif /* ENABLE_SPECIALIZATION */ } /* Skip 2 cache entries */ @@ -2042,7 +2042,7 @@ DISPATCH_SAME_OPARG(); } STAT_INC(COMPARE_OP, deferred); - DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].cache); #endif /* ENABLE_SPECIALIZATION */ } // _COMPARE_OP @@ -2191,7 +2191,7 @@ DISPATCH_SAME_OPARG(); } STAT_INC(CONTAINS_OP, deferred); - DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].cache); #endif /* ENABLE_SPECIALIZATION */ } // _CONTAINS_OP @@ -2602,7 +2602,7 @@ DISPATCH_SAME_OPARG(); } STAT_INC(FOR_ITER, deferred); - DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].cache); #endif /* ENABLE_SPECIALIZATION */ } // _FOR_ITER @@ -3436,7 +3436,7 @@ } } else { - DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].cache); } #endif /* ENABLE_SPECIALIZATION */ DISPATCH(); @@ -3537,7 +3537,7 @@ DISPATCH_SAME_OPARG(); } STAT_INC(LOAD_ATTR, deferred); - DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].cache); #endif /* ENABLE_SPECIALIZATION */ } /* Skip 8 cache entries */ @@ -4232,7 +4232,7 @@ DISPATCH_SAME_OPARG(); } STAT_INC(LOAD_GLOBAL, deferred); - DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].cache); #endif /* ENABLE_SPECIALIZATION */ } /* Skip 1 cache entry */ @@ -4435,7 +4435,7 @@ DISPATCH_SAME_OPARG(); } STAT_INC(LOAD_SUPER_ATTR, deferred); - DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].cache); #endif /* ENABLE_SPECIALIZATION */ } // _LOAD_SUPER_ATTR @@ -5076,7 +5076,7 @@ DISPATCH_SAME_OPARG(); } STAT_INC(SEND, deferred); - DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].cache); #endif /* ENABLE_SPECIALIZATION */ } // _SEND @@ -5265,7 +5265,7 @@ DISPATCH_SAME_OPARG(); } STAT_INC(STORE_ATTR, deferred); - DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].cache); #endif /* ENABLE_SPECIALIZATION */ } /* Skip 3 cache entries */ @@ -5555,7 +5555,7 @@ DISPATCH_SAME_OPARG(); } STAT_INC(STORE_SUBSCR, deferred); - DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].cache); #endif /* ENABLE_SPECIALIZATION */ } // _STORE_SUBSCR @@ -5658,7 +5658,7 @@ DISPATCH_SAME_OPARG(); } STAT_INC(TO_BOOL, deferred); - DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].cache); #endif /* ENABLE_SPECIALIZATION */ } /* Skip 2 cache entries */ @@ -5875,7 +5875,7 @@ DISPATCH_SAME_OPARG(); } STAT_INC(UNPACK_SEQUENCE, deferred); - DECREMENT_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].cache); #endif /* ENABLE_SPECIALIZATION */ (void)seq; (void)counter; From 545c60e0fa734278998e54e8c35174c5c7bf34d3 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 3 Apr 2024 11:09:23 -0700 Subject: [PATCH 35/42] Rename ADAPTIVE_COUNTER_IS_ZERO to ..._TRIGGERS --- Python/bytecodes.c | 30 +++++++++++++++--------------- Python/ceval_macros.h | 4 ++-- Python/generated_cases.c.h | 30 +++++++++++++++--------------- 3 files changed, 32 insertions(+), 32 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 9dd6e8dfb65e29..37909da3cc3c7d 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -327,7 +327,7 @@ dummy_func( specializing op(_SPECIALIZE_TO_BOOL, (counter/1, value -- value)) { #if ENABLE_SPECIALIZATION - if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { + if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; _Py_Specialize_ToBool(value, next_instr); DISPATCH_SAME_OPARG(); @@ -552,7 +552,7 @@ dummy_func( specializing op(_SPECIALIZE_BINARY_SUBSCR, (counter/1, container, sub -- container, sub)) { #if ENABLE_SPECIALIZATION - if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { + if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; _Py_Specialize_BinarySubscr(container, sub, next_instr); DISPATCH_SAME_OPARG(); @@ -699,7 +699,7 @@ dummy_func( specializing op(_SPECIALIZE_STORE_SUBSCR, (counter/1, container, sub -- container, sub)) { #if ENABLE_SPECIALIZATION - if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { + if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; _Py_Specialize_StoreSubscr(container, sub, next_instr); DISPATCH_SAME_OPARG(); @@ -983,7 +983,7 @@ dummy_func( specializing op(_SPECIALIZE_SEND, (counter/1, receiver, unused -- receiver, unused)) { #if ENABLE_SPECIALIZATION - if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { + if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; _Py_Specialize_Send(receiver, next_instr); DISPATCH_SAME_OPARG(); @@ -1212,7 +1212,7 @@ dummy_func( specializing op(_SPECIALIZE_UNPACK_SEQUENCE, (counter/1, seq -- seq)) { #if ENABLE_SPECIALIZATION - if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { + if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; _Py_Specialize_UnpackSequence(seq, next_instr, oparg); DISPATCH_SAME_OPARG(); @@ -1281,7 +1281,7 @@ dummy_func( specializing op(_SPECIALIZE_STORE_ATTR, (counter/1, owner -- owner)) { #if ENABLE_SPECIALIZATION - if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { + if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { PyObject *name = GETITEM(FRAME_CO_NAMES, oparg); next_instr = this_instr; _Py_Specialize_StoreAttr(owner, next_instr, name); @@ -1399,7 +1399,7 @@ dummy_func( specializing op(_SPECIALIZE_LOAD_GLOBAL, (counter/1 -- )) { #if ENABLE_SPECIALIZATION - if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { + if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { PyObject *name = GETITEM(FRAME_CO_NAMES, oparg>>1); next_instr = this_instr; _Py_Specialize_LoadGlobal(GLOBALS(), BUILTINS(), next_instr, name); @@ -1724,7 +1724,7 @@ dummy_func( specializing op(_SPECIALIZE_LOAD_SUPER_ATTR, (counter/1, global_super, class, unused -- global_super, class, unused)) { #if ENABLE_SPECIALIZATION int load_method = oparg & 1; - if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { + if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; _Py_Specialize_LoadSuperAttr(global_super, class, next_instr, load_method); DISPATCH_SAME_OPARG(); @@ -1837,7 +1837,7 @@ dummy_func( specializing op(_SPECIALIZE_LOAD_ATTR, (counter/1, owner -- owner)) { #if ENABLE_SPECIALIZATION - if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { + if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { PyObject *name = GETITEM(FRAME_CO_NAMES, oparg>>1); next_instr = this_instr; _Py_Specialize_LoadAttr(owner, next_instr, name); @@ -2158,7 +2158,7 @@ dummy_func( specializing op(_SPECIALIZE_COMPARE_OP, (counter/1, left, right -- left, right)) { #if ENABLE_SPECIALIZATION - if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { + if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; _Py_Specialize_CompareOp(left, right, next_instr, oparg); DISPATCH_SAME_OPARG(); @@ -2255,7 +2255,7 @@ dummy_func( specializing op(_SPECIALIZE_CONTAINS_OP, (counter/1, left, right -- left, right)) { #if ENABLE_SPECIALIZATION - if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { + if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; _Py_Specialize_ContainsOp(right, next_instr); DISPATCH_SAME_OPARG(); @@ -2342,7 +2342,7 @@ dummy_func( JUMPBY(-oparg); #if ENABLE_SPECIALIZATION uint16_t counter = this_instr[1].cache; - if (ADAPTIVE_COUNTER_IS_ZERO(counter) && this_instr->op.code == JUMP_BACKWARD) { + if (ADAPTIVE_COUNTER_TRIGGERS(counter) && this_instr->op.code == JUMP_BACKWARD) { _Py_CODEUNIT *start = this_instr; /* Back up over EXTENDED_ARGs so optimizer sees the whole instruction */ while (oparg > 255) { @@ -2523,7 +2523,7 @@ dummy_func( specializing op(_SPECIALIZE_FOR_ITER, (counter/1, iter -- iter)) { #if ENABLE_SPECIALIZATION - if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { + if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; _Py_Specialize_ForIter(iter, next_instr, oparg); DISPATCH_SAME_OPARG(); @@ -3018,7 +3018,7 @@ dummy_func( specializing op(_SPECIALIZE_CALL, (counter/1, callable, self_or_null, args[oparg] -- callable, self_or_null, args[oparg])) { #if ENABLE_SPECIALIZATION - if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { + if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; _Py_Specialize_Call(callable, next_instr, oparg + (self_or_null != NULL)); DISPATCH_SAME_OPARG(); @@ -3921,7 +3921,7 @@ dummy_func( specializing op(_SPECIALIZE_BINARY_OP, (counter/1, lhs, rhs -- lhs, rhs)) { #if ENABLE_SPECIALIZATION - if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { + if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; _Py_Specialize_BinaryOp(lhs, rhs, next_instr, oparg, LOCALS_ARRAY); DISPATCH_SAME_OPARG(); diff --git a/Python/ceval_macros.h b/Python/ceval_macros.h index 8b9b65d61bf6b2..dd265e4fe8a53c 100644 --- a/Python/ceval_macros.h +++ b/Python/ceval_macros.h @@ -262,7 +262,7 @@ GETITEM(PyObject *v, Py_ssize_t i) { STAT_INC(opcode, miss); \ STAT_INC((INSTNAME), miss); \ /* The counter is always the first cache entry: */ \ - if (ADAPTIVE_COUNTER_IS_ZERO(next_instr->cache)) { \ + if (ADAPTIVE_COUNTER_TRIGGERS(next_instr->cache)) { \ STAT_INC((INSTNAME), deopt); \ } \ } while (0) @@ -290,7 +290,7 @@ GETITEM(PyObject *v, Py_ssize_t i) { dtrace_function_entry(frame); \ } -#define ADAPTIVE_COUNTER_IS_ZERO(COUNTER) \ +#define ADAPTIVE_COUNTER_TRIGGERS(COUNTER) \ backoff_counter_is_zero(forge_backoff_counter((COUNTER))) #ifdef Py_GIL_DISABLED diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 229f782868b2ee..d4852f4cbf9b6c 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -115,7 +115,7 @@ uint16_t counter = read_u16(&this_instr[1].cache); (void)counter; #if ENABLE_SPECIALIZATION - if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { + if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; _Py_Specialize_BinaryOp(lhs, rhs, next_instr, oparg, LOCALS_ARRAY); DISPATCH_SAME_OPARG(); @@ -432,7 +432,7 @@ uint16_t counter = read_u16(&this_instr[1].cache); (void)counter; #if ENABLE_SPECIALIZATION - if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { + if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; _Py_Specialize_BinarySubscr(container, sub, next_instr); DISPATCH_SAME_OPARG(); @@ -760,7 +760,7 @@ uint16_t counter = read_u16(&this_instr[1].cache); (void)counter; #if ENABLE_SPECIALIZATION - if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { + if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; _Py_Specialize_Call(callable, next_instr, oparg + (self_or_null != NULL)); DISPATCH_SAME_OPARG(); @@ -2036,7 +2036,7 @@ uint16_t counter = read_u16(&this_instr[1].cache); (void)counter; #if ENABLE_SPECIALIZATION - if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { + if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; _Py_Specialize_CompareOp(left, right, next_instr, oparg); DISPATCH_SAME_OPARG(); @@ -2185,7 +2185,7 @@ uint16_t counter = read_u16(&this_instr[1].cache); (void)counter; #if ENABLE_SPECIALIZATION - if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { + if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; _Py_Specialize_ContainsOp(right, next_instr); DISPATCH_SAME_OPARG(); @@ -2596,7 +2596,7 @@ uint16_t counter = read_u16(&this_instr[1].cache); (void)counter; #if ENABLE_SPECIALIZATION - if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { + if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; _Py_Specialize_ForIter(iter, next_instr, oparg); DISPATCH_SAME_OPARG(); @@ -3416,7 +3416,7 @@ JUMPBY(-oparg); #if ENABLE_SPECIALIZATION uint16_t counter = this_instr[1].cache; - if (ADAPTIVE_COUNTER_IS_ZERO(counter) && this_instr->op.code == JUMP_BACKWARD) { + if (ADAPTIVE_COUNTER_TRIGGERS(counter) && this_instr->op.code == JUMP_BACKWARD) { _Py_CODEUNIT *start = this_instr; /* Back up over EXTENDED_ARGs so optimizer sees the whole instruction */ while (oparg > 255) { @@ -3530,7 +3530,7 @@ uint16_t counter = read_u16(&this_instr[1].cache); (void)counter; #if ENABLE_SPECIALIZATION - if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { + if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { PyObject *name = GETITEM(FRAME_CO_NAMES, oparg>>1); next_instr = this_instr; _Py_Specialize_LoadAttr(owner, next_instr, name); @@ -4225,7 +4225,7 @@ uint16_t counter = read_u16(&this_instr[1].cache); (void)counter; #if ENABLE_SPECIALIZATION - if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { + if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { PyObject *name = GETITEM(FRAME_CO_NAMES, oparg>>1); next_instr = this_instr; _Py_Specialize_LoadGlobal(GLOBALS(), BUILTINS(), next_instr, name); @@ -4429,7 +4429,7 @@ (void)counter; #if ENABLE_SPECIALIZATION int load_method = oparg & 1; - if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { + if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; _Py_Specialize_LoadSuperAttr(global_super, class, next_instr, load_method); DISPATCH_SAME_OPARG(); @@ -5070,7 +5070,7 @@ uint16_t counter = read_u16(&this_instr[1].cache); (void)counter; #if ENABLE_SPECIALIZATION - if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { + if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; _Py_Specialize_Send(receiver, next_instr); DISPATCH_SAME_OPARG(); @@ -5258,7 +5258,7 @@ uint16_t counter = read_u16(&this_instr[1].cache); (void)counter; #if ENABLE_SPECIALIZATION - if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { + if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { PyObject *name = GETITEM(FRAME_CO_NAMES, oparg); next_instr = this_instr; _Py_Specialize_StoreAttr(owner, next_instr, name); @@ -5549,7 +5549,7 @@ uint16_t counter = read_u16(&this_instr[1].cache); (void)counter; #if ENABLE_SPECIALIZATION - if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { + if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; _Py_Specialize_StoreSubscr(container, sub, next_instr); DISPATCH_SAME_OPARG(); @@ -5652,7 +5652,7 @@ uint16_t counter = read_u16(&this_instr[1].cache); (void)counter; #if ENABLE_SPECIALIZATION - if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { + if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; _Py_Specialize_ToBool(value, next_instr); DISPATCH_SAME_OPARG(); @@ -5869,7 +5869,7 @@ uint16_t counter = read_u16(&this_instr[1].cache); (void)counter; #if ENABLE_SPECIALIZATION - if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { + if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; _Py_Specialize_UnpackSequence(seq, next_instr, oparg); DISPATCH_SAME_OPARG(); From 6c0bb305074118888e6cbe9ae83f9c807a92eaef Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 3 Apr 2024 11:11:03 -0700 Subject: [PATCH 36/42] Rename backoff_counter_is_zero to ..._triggers --- Include/internal/pycore_backoff.h | 2 +- Python/bytecodes.c | 2 +- Python/ceval_macros.h | 2 +- Python/executor_cases.c.h | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Include/internal/pycore_backoff.h b/Include/internal/pycore_backoff.h index a314b161083f7e..819033cfc0c5cb 100644 --- a/Include/internal/pycore_backoff.h +++ b/Include/internal/pycore_backoff.h @@ -96,7 +96,7 @@ advance_backoff_counter(backoff_counter_t counter) } static inline bool -backoff_counter_is_zero(backoff_counter_t counter) +backoff_counter_triggers(backoff_counter_t counter) { return counter.value == 0; } diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 37909da3cc3c7d..6c025df5892336 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -4142,7 +4142,7 @@ dummy_func( PyCodeObject *code = _PyFrame_GetCode(frame); _Py_CODEUNIT *target = _PyCode_CODE(code) + exit->target; backoff_counter_t temperature = forge_backoff_counter(exit->temperature); - if (!backoff_counter_is_zero(temperature)) { + if (!backoff_counter_triggers(temperature)) { exit->temperature = advance_backoff_counter(temperature).counter; GOTO_TIER_ONE(target); } diff --git a/Python/ceval_macros.h b/Python/ceval_macros.h index dd265e4fe8a53c..c400d67cd0e154 100644 --- a/Python/ceval_macros.h +++ b/Python/ceval_macros.h @@ -291,7 +291,7 @@ GETITEM(PyObject *v, Py_ssize_t i) { } #define ADAPTIVE_COUNTER_TRIGGERS(COUNTER) \ - backoff_counter_is_zero(forge_backoff_counter((COUNTER))) + backoff_counter_triggers(forge_backoff_counter((COUNTER))) #ifdef Py_GIL_DISABLED #define ADVANCE_ADAPTIVE_COUNTER(COUNTER) \ diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 979cc548d6b064..e7d841ad5e635a 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -3689,7 +3689,7 @@ PyCodeObject *code = _PyFrame_GetCode(frame); _Py_CODEUNIT *target = _PyCode_CODE(code) + exit->target; backoff_counter_t temperature = forge_backoff_counter(exit->temperature); - if (!backoff_counter_is_zero(temperature)) { + if (!backoff_counter_triggers(temperature)) { exit->temperature = advance_backoff_counter(temperature).counter; GOTO_TIER_ONE(target); } From a7c9b6d43271f56e15097f9a7d1a54585085162a Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 3 Apr 2024 11:13:34 -0700 Subject: [PATCH 37/42] Rename reset_background_counter to restart_... --- Include/internal/pycore_backoff.h | 2 +- Include/internal/pycore_code.h | 2 +- Python/bytecodes.c | 2 +- Python/executor_cases.c.h | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Include/internal/pycore_backoff.h b/Include/internal/pycore_backoff.h index 819033cfc0c5cb..d277247c1c68e2 100644 --- a/Include/internal/pycore_backoff.h +++ b/Include/internal/pycore_backoff.h @@ -67,7 +67,7 @@ forge_backoff_counter(uint16_t counter) } static inline backoff_counter_t -reset_backoff_counter(backoff_counter_t counter) +restart_backoff_counter(backoff_counter_t counter) { assert(!is_unreachable_backoff_counter(counter)); if (counter.backoff < 12) { diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index 2d7cc7481f1d5b..e95b7565a282f6 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -495,7 +495,7 @@ adaptive_counter_cooldown(void) { static inline uint16_t adaptive_counter_backoff(uint16_t counter) { - return reset_backoff_counter(forge_backoff_counter(counter)).counter; + return restart_backoff_counter(forge_backoff_counter(counter)).counter; } diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 6c025df5892336..af6bcb703d6470 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -4154,7 +4154,7 @@ dummy_func( else { int optimized = _PyOptimizer_Optimize(frame, target, stack_pointer, &executor); if (optimized <= 0) { - exit->temperature = reset_backoff_counter(temperature).counter; + exit->temperature = restart_backoff_counter(temperature).counter; if (optimized < 0) { Py_DECREF(previous); tstate->previous_executor = Py_None; diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index e7d841ad5e635a..efa8f6f1f30c2b 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -3701,7 +3701,7 @@ else { int optimized = _PyOptimizer_Optimize(frame, target, stack_pointer, &executor); if (optimized <= 0) { - exit->temperature = reset_backoff_counter(temperature).counter; + exit->temperature = restart_backoff_counter(temperature).counter; if (optimized < 0) { Py_DECREF(previous); tstate->previous_executor = Py_None; From 3fee35f0e749feb27e104e8f212bed20c390149e Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 3 Apr 2024 11:58:12 -0700 Subject: [PATCH 38/42] Make _Py_BackoffCounter a member of _Py_CODEUNIT This changes a lot of things but the end result is arguably better. --- Include/cpython/code.h | 11 +++++++ Include/cpython/optimizer.h | 2 +- Include/internal/pycore_backoff.h | 43 ++++++++++----------------- Include/internal/pycore_code.h | 42 +++++++++++++-------------- Python/bytecodes.c | 48 +++++++++++++++---------------- Python/ceval.c | 2 +- Python/ceval_macros.h | 4 +-- Python/executor_cases.c.h | 6 ++-- Python/generated_cases.c.h | 42 +++++++++++++-------------- Python/instrumentation.c | 8 +++--- Python/optimizer.c | 2 +- Python/specialize.c | 4 +-- 12 files changed, 106 insertions(+), 108 deletions(-) diff --git a/Include/cpython/code.h b/Include/cpython/code.h index d5dac1765638f9..b0e226e0e1971a 100644 --- a/Include/cpython/code.h +++ b/Include/cpython/code.h @@ -24,6 +24,16 @@ typedef struct _Py_GlobalMonitors { uint8_t tools[_PY_MONITORING_UNGROUPED_EVENTS]; } _Py_GlobalMonitors; +typedef struct { + union { + struct { + uint16_t backoff : 4; + uint16_t value : 12; + }; + uint16_t as_counter; // For printf("%#x", ...) + }; +} _Py_BackoffCounter; + /* Each instruction in a code object is a fixed-width value, * currently 2 bytes: 1-byte opcode + 1-byte oparg. The EXTENDED_ARG * opcode allows for larger values but the current limit is 3 uses @@ -39,6 +49,7 @@ typedef union { uint8_t code; uint8_t arg; } op; + _Py_BackoffCounter counter; // First cache entry of specializable op } _Py_CODEUNIT; diff --git a/Include/cpython/optimizer.h b/Include/cpython/optimizer.h index 2665dd385d32d5..819251a25bb242 100644 --- a/Include/cpython/optimizer.h +++ b/Include/cpython/optimizer.h @@ -89,7 +89,7 @@ static inline uint16_t uop_get_error_target(const _PyUOpInstruction *inst) typedef struct _exit_data { uint32_t target; - uint16_t temperature; + _Py_BackoffCounter temperature; const struct _PyExecutorObject *executor; } _PyExitData; diff --git a/Include/internal/pycore_backoff.h b/Include/internal/pycore_backoff.h index d277247c1c68e2..ae864a97e39d4b 100644 --- a/Include/internal/pycore_backoff.h +++ b/Include/internal/pycore_backoff.h @@ -31,43 +31,30 @@ extern "C" { There is an exceptional value which must not be updated, 0xFFFF. */ -typedef struct { - union { - uint16_t counter; - struct { - uint16_t backoff : 4; - uint16_t value : 12; - }; - }; -} backoff_counter_t; - -static_assert(sizeof(backoff_counter_t) == sizeof(_Py_CODEUNIT), - "backoff counter size should be the same size as a code unit"); - #define UNREACHABLE_BACKOFF 0xFFFF static inline bool -is_unreachable_backoff_counter(backoff_counter_t counter) +is_unreachable_backoff_counter(_Py_BackoffCounter counter) { - return counter.counter == UNREACHABLE_BACKOFF; + return counter.as_counter == UNREACHABLE_BACKOFF; } -static inline backoff_counter_t +static inline _Py_BackoffCounter make_backoff_counter(uint16_t value, uint16_t backoff) { assert(backoff <= 15); assert(value <= 0xFFF); - return (backoff_counter_t){.value = value, .backoff = backoff}; + return (_Py_BackoffCounter){.value = value, .backoff = backoff}; } -static inline backoff_counter_t +static inline _Py_BackoffCounter forge_backoff_counter(uint16_t counter) { - return (backoff_counter_t){.counter = counter}; + return (_Py_BackoffCounter){.as_counter = counter}; } -static inline backoff_counter_t -restart_backoff_counter(backoff_counter_t counter) +static inline _Py_BackoffCounter +restart_backoff_counter(_Py_BackoffCounter counter) { assert(!is_unreachable_backoff_counter(counter)); if (counter.backoff < 12) { @@ -78,14 +65,14 @@ restart_backoff_counter(backoff_counter_t counter) } } -static inline backoff_counter_t -pause_backoff_counter(backoff_counter_t counter) +static inline _Py_BackoffCounter +pause_backoff_counter(_Py_BackoffCounter counter) { return make_backoff_counter(counter.value | 1, counter.backoff); } -static inline backoff_counter_t -advance_backoff_counter(backoff_counter_t counter) +static inline _Py_BackoffCounter +advance_backoff_counter(_Py_BackoffCounter counter) { if (!is_unreachable_backoff_counter(counter)) { return make_backoff_counter((counter.value - 1) & 0xFFF, counter.backoff); @@ -96,16 +83,16 @@ advance_backoff_counter(backoff_counter_t counter) } static inline bool -backoff_counter_triggers(backoff_counter_t counter) +backoff_counter_triggers(_Py_BackoffCounter counter) { return counter.value == 0; } -static inline uint16_t +static inline _Py_BackoffCounter initial_backoff_counter(void) { // Backoff sequence 16, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096 - return make_backoff_counter(16, 3).counter; + return make_backoff_counter(16, 3); } #ifdef __cplusplus diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index e95b7565a282f6..21e85ddbb47d44 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -31,7 +31,7 @@ extern "C" { #define CACHE_ENTRIES(cache) (sizeof(cache)/sizeof(_Py_CODEUNIT)) typedef struct { - uint16_t counter; + _Py_BackoffCounter counter; uint16_t module_keys_version; uint16_t builtin_keys_version; uint16_t index; @@ -40,44 +40,44 @@ typedef struct { #define INLINE_CACHE_ENTRIES_LOAD_GLOBAL CACHE_ENTRIES(_PyLoadGlobalCache) typedef struct { - uint16_t counter; + _Py_BackoffCounter counter; } _PyBinaryOpCache; #define INLINE_CACHE_ENTRIES_BINARY_OP CACHE_ENTRIES(_PyBinaryOpCache) typedef struct { - uint16_t counter; + _Py_BackoffCounter counter; } _PyUnpackSequenceCache; #define INLINE_CACHE_ENTRIES_UNPACK_SEQUENCE \ CACHE_ENTRIES(_PyUnpackSequenceCache) typedef struct { - uint16_t counter; + _Py_BackoffCounter counter; } _PyCompareOpCache; #define INLINE_CACHE_ENTRIES_COMPARE_OP CACHE_ENTRIES(_PyCompareOpCache) typedef struct { - uint16_t counter; + _Py_BackoffCounter counter; } _PyBinarySubscrCache; #define INLINE_CACHE_ENTRIES_BINARY_SUBSCR CACHE_ENTRIES(_PyBinarySubscrCache) typedef struct { - uint16_t counter; + _Py_BackoffCounter counter; } _PySuperAttrCache; #define INLINE_CACHE_ENTRIES_LOAD_SUPER_ATTR CACHE_ENTRIES(_PySuperAttrCache) typedef struct { - uint16_t counter; + _Py_BackoffCounter counter; uint16_t version[2]; uint16_t index; } _PyAttrCache; typedef struct { - uint16_t counter; + _Py_BackoffCounter counter; uint16_t type_version[2]; union { uint16_t keys_version[2]; @@ -93,39 +93,39 @@ typedef struct { #define INLINE_CACHE_ENTRIES_STORE_ATTR CACHE_ENTRIES(_PyAttrCache) typedef struct { - uint16_t counter; + _Py_BackoffCounter counter; uint16_t func_version[2]; } _PyCallCache; #define INLINE_CACHE_ENTRIES_CALL CACHE_ENTRIES(_PyCallCache) typedef struct { - uint16_t counter; + _Py_BackoffCounter counter; } _PyStoreSubscrCache; #define INLINE_CACHE_ENTRIES_STORE_SUBSCR CACHE_ENTRIES(_PyStoreSubscrCache) typedef struct { - uint16_t counter; + _Py_BackoffCounter counter; } _PyForIterCache; #define INLINE_CACHE_ENTRIES_FOR_ITER CACHE_ENTRIES(_PyForIterCache) typedef struct { - uint16_t counter; + _Py_BackoffCounter counter; } _PySendCache; #define INLINE_CACHE_ENTRIES_SEND CACHE_ENTRIES(_PySendCache) typedef struct { - uint16_t counter; + _Py_BackoffCounter counter; uint16_t version[2]; } _PyToBoolCache; #define INLINE_CACHE_ENTRIES_TO_BOOL CACHE_ENTRIES(_PyToBoolCache) typedef struct { - uint16_t counter; + _Py_BackoffCounter counter; } _PyContainsOpCache; #define INLINE_CACHE_ENTRIES_CONTAINS_OP CACHE_ENTRIES(_PyContainsOpCache) @@ -476,26 +476,26 @@ write_location_entry_start(uint8_t *ptr, int code, int length) #define ADAPTIVE_COOLDOWN_VALUE 52 #define ADAPTIVE_COOLDOWN_BACKOFF 0 -static inline uint16_t +static inline _Py_BackoffCounter adaptive_counter_bits(uint16_t value, uint16_t backoff) { - return make_backoff_counter(value, backoff).counter; + return make_backoff_counter(value, backoff); } -static inline uint16_t +static inline _Py_BackoffCounter adaptive_counter_warmup(void) { return adaptive_counter_bits(ADAPTIVE_WARMUP_VALUE, ADAPTIVE_WARMUP_BACKOFF); } -static inline uint16_t +static inline _Py_BackoffCounter adaptive_counter_cooldown(void) { return adaptive_counter_bits(ADAPTIVE_COOLDOWN_VALUE, ADAPTIVE_COOLDOWN_BACKOFF); } -static inline uint16_t -adaptive_counter_backoff(uint16_t counter) { - return restart_backoff_counter(forge_backoff_counter(counter)).counter; +static inline _Py_BackoffCounter +adaptive_counter_backoff(_Py_BackoffCounter counter) { + return restart_backoff_counter(counter); } diff --git a/Python/bytecodes.c b/Python/bytecodes.c index af6bcb703d6470..abeab8b0cd81e5 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -333,7 +333,7 @@ dummy_func( DISPATCH_SAME_OPARG(); } STAT_INC(TO_BOOL, deferred); - ADVANCE_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); #endif /* ENABLE_SPECIALIZATION */ } @@ -558,7 +558,7 @@ dummy_func( DISPATCH_SAME_OPARG(); } STAT_INC(BINARY_SUBSCR, deferred); - ADVANCE_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); #endif /* ENABLE_SPECIALIZATION */ } @@ -705,7 +705,7 @@ dummy_func( DISPATCH_SAME_OPARG(); } STAT_INC(STORE_SUBSCR, deferred); - ADVANCE_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); #endif /* ENABLE_SPECIALIZATION */ } @@ -989,7 +989,7 @@ dummy_func( DISPATCH_SAME_OPARG(); } STAT_INC(SEND, deferred); - ADVANCE_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); #endif /* ENABLE_SPECIALIZATION */ } @@ -1218,7 +1218,7 @@ dummy_func( DISPATCH_SAME_OPARG(); } STAT_INC(UNPACK_SEQUENCE, deferred); - ADVANCE_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); #endif /* ENABLE_SPECIALIZATION */ (void)seq; (void)counter; @@ -1288,7 +1288,7 @@ dummy_func( DISPATCH_SAME_OPARG(); } STAT_INC(STORE_ATTR, deferred); - ADVANCE_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); #endif /* ENABLE_SPECIALIZATION */ } @@ -1406,7 +1406,7 @@ dummy_func( DISPATCH_SAME_OPARG(); } STAT_INC(LOAD_GLOBAL, deferred); - ADVANCE_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); #endif /* ENABLE_SPECIALIZATION */ } @@ -1712,7 +1712,7 @@ dummy_func( inst(INSTRUMENTED_LOAD_SUPER_ATTR, (unused/1, unused, unused, unused -- unused, unused if (oparg & 1))) { // cancel out the decrement that will happen in LOAD_SUPER_ATTR; we // don't want to specialize instrumented instructions - PAUSE_ADAPTIVE_COUNTER(this_instr[1].cache); + PAUSE_ADAPTIVE_COUNTER(this_instr[1].counter); GO_TO_INSTRUCTION(LOAD_SUPER_ATTR); } @@ -1730,7 +1730,7 @@ dummy_func( DISPATCH_SAME_OPARG(); } STAT_INC(LOAD_SUPER_ATTR, deferred); - ADVANCE_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); #endif /* ENABLE_SPECIALIZATION */ } @@ -1844,7 +1844,7 @@ dummy_func( DISPATCH_SAME_OPARG(); } STAT_INC(LOAD_ATTR, deferred); - ADVANCE_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); #endif /* ENABLE_SPECIALIZATION */ } @@ -2164,7 +2164,7 @@ dummy_func( DISPATCH_SAME_OPARG(); } STAT_INC(COMPARE_OP, deferred); - ADVANCE_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); #endif /* ENABLE_SPECIALIZATION */ } @@ -2261,7 +2261,7 @@ dummy_func( DISPATCH_SAME_OPARG(); } STAT_INC(CONTAINS_OP, deferred); - ADVANCE_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); #endif /* ENABLE_SPECIALIZATION */ } @@ -2341,8 +2341,8 @@ dummy_func( assert(oparg <= INSTR_OFFSET()); JUMPBY(-oparg); #if ENABLE_SPECIALIZATION - uint16_t counter = this_instr[1].cache; - if (ADAPTIVE_COUNTER_TRIGGERS(counter) && this_instr->op.code == JUMP_BACKWARD) { + _Py_BackoffCounter counter = this_instr[1].counter; + if (backoff_counter_triggers(counter) && this_instr->op.code == JUMP_BACKWARD) { _Py_CODEUNIT *start = this_instr; /* Back up over EXTENDED_ARGs so optimizer sees the whole instruction */ while (oparg > 255) { @@ -2358,11 +2358,11 @@ dummy_func( GOTO_TIER_TWO(executor); } else { - this_instr[1].cache = adaptive_counter_backoff(counter); + this_instr[1].counter = restart_backoff_counter(counter); } } else { - ADVANCE_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); } #endif /* ENABLE_SPECIALIZATION */ } @@ -2529,7 +2529,7 @@ dummy_func( DISPATCH_SAME_OPARG(); } STAT_INC(FOR_ITER, deferred); - ADVANCE_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); #endif /* ENABLE_SPECIALIZATION */ } @@ -2989,7 +2989,7 @@ dummy_func( tstate, PY_MONITORING_EVENT_CALL, frame, this_instr, function, arg); ERROR_IF(err, error); - PAUSE_ADAPTIVE_COUNTER(this_instr[1].cache); + PAUSE_ADAPTIVE_COUNTER(this_instr[1].counter); GO_TO_INSTRUCTION(CALL); } @@ -3024,7 +3024,7 @@ dummy_func( DISPATCH_SAME_OPARG(); } STAT_INC(CALL, deferred); - ADVANCE_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); #endif /* ENABLE_SPECIALIZATION */ } @@ -3927,7 +3927,7 @@ dummy_func( DISPATCH_SAME_OPARG(); } STAT_INC(BINARY_OP, deferred); - ADVANCE_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); #endif /* ENABLE_SPECIALIZATION */ assert(NB_ADD <= oparg); assert(oparg <= NB_INPLACE_XOR); @@ -3953,7 +3953,7 @@ dummy_func( ERROR_IF(next_opcode < 0, error); next_instr = this_instr; if (_PyOpcode_Caches[next_opcode]) { - PAUSE_ADAPTIVE_COUNTER(next_instr[1].cache); + PAUSE_ADAPTIVE_COUNTER(next_instr[1].counter); } assert(next_opcode > 0 && next_opcode < 256); opcode = next_opcode; @@ -4141,9 +4141,9 @@ dummy_func( _PyExitData *exit = &previous->exits[oparg]; PyCodeObject *code = _PyFrame_GetCode(frame); _Py_CODEUNIT *target = _PyCode_CODE(code) + exit->target; - backoff_counter_t temperature = forge_backoff_counter(exit->temperature); + _Py_BackoffCounter temperature = exit->temperature; if (!backoff_counter_triggers(temperature)) { - exit->temperature = advance_backoff_counter(temperature).counter; + exit->temperature = advance_backoff_counter(temperature); GOTO_TIER_ONE(target); } _PyExecutorObject *executor; @@ -4154,7 +4154,7 @@ dummy_func( else { int optimized = _PyOptimizer_Optimize(frame, target, stack_pointer, &executor); if (optimized <= 0) { - exit->temperature = restart_backoff_counter(temperature).counter; + exit->temperature = restart_backoff_counter(temperature); if (optimized < 0) { Py_DECREF(previous); tstate->previous_executor = Py_None; diff --git a/Python/ceval.c b/Python/ceval.c index 77d7b61778b219..316ef6fb924194 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1100,7 +1100,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int printf("SIDE EXIT: [UOp "); _PyUOpPrint(&next_uop[-1]); printf(", exit %u, temp %d, target %d -> %s]\n", - exit_index, exit->temperature, exit->target, + exit_index, exit->temperature.as_counter, exit->target, _PyOpcode_OpName[_PyCode_CODE(_PyFrame_GetCode(frame))[exit->target].op.code]); } #endif diff --git a/Python/ceval_macros.h b/Python/ceval_macros.h index c400d67cd0e154..dd50d234e30f61 100644 --- a/Python/ceval_macros.h +++ b/Python/ceval_macros.h @@ -302,13 +302,13 @@ GETITEM(PyObject *v, Py_ssize_t i) { #else #define ADVANCE_ADAPTIVE_COUNTER(COUNTER) \ do { \ - (COUNTER) = advance_backoff_counter(forge_backoff_counter((COUNTER))).counter; \ + (COUNTER) = advance_backoff_counter((COUNTER)); \ } while (0); #endif #define PAUSE_ADAPTIVE_COUNTER(COUNTER) \ do { \ - (COUNTER) = pause_backoff_counter(forge_backoff_counter((COUNTER))).counter; \ + (COUNTER) = pause_backoff_counter((COUNTER)); \ } while (0); #define UNBOUNDLOCAL_ERROR_MSG \ diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index efa8f6f1f30c2b..ff6170ab8a7124 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -3688,9 +3688,9 @@ _PyExitData *exit = &previous->exits[oparg]; PyCodeObject *code = _PyFrame_GetCode(frame); _Py_CODEUNIT *target = _PyCode_CODE(code) + exit->target; - backoff_counter_t temperature = forge_backoff_counter(exit->temperature); + _Py_BackoffCounter temperature = exit->temperature; if (!backoff_counter_triggers(temperature)) { - exit->temperature = advance_backoff_counter(temperature).counter; + exit->temperature = advance_backoff_counter(temperature); GOTO_TIER_ONE(target); } _PyExecutorObject *executor; @@ -3701,7 +3701,7 @@ else { int optimized = _PyOptimizer_Optimize(frame, target, stack_pointer, &executor); if (optimized <= 0) { - exit->temperature = restart_backoff_counter(temperature).counter; + exit->temperature = restart_backoff_counter(temperature); if (optimized < 0) { Py_DECREF(previous); tstate->previous_executor = Py_None; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index d4852f4cbf9b6c..0116acd5ae302f 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -121,7 +121,7 @@ DISPATCH_SAME_OPARG(); } STAT_INC(BINARY_OP, deferred); - ADVANCE_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); #endif /* ENABLE_SPECIALIZATION */ assert(NB_ADD <= oparg); assert(oparg <= NB_INPLACE_XOR); @@ -438,7 +438,7 @@ DISPATCH_SAME_OPARG(); } STAT_INC(BINARY_SUBSCR, deferred); - ADVANCE_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); #endif /* ENABLE_SPECIALIZATION */ } // _BINARY_SUBSCR @@ -766,7 +766,7 @@ DISPATCH_SAME_OPARG(); } STAT_INC(CALL, deferred); - ADVANCE_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); #endif /* ENABLE_SPECIALIZATION */ } /* Skip 2 cache entries */ @@ -2042,7 +2042,7 @@ DISPATCH_SAME_OPARG(); } STAT_INC(COMPARE_OP, deferred); - ADVANCE_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); #endif /* ENABLE_SPECIALIZATION */ } // _COMPARE_OP @@ -2191,7 +2191,7 @@ DISPATCH_SAME_OPARG(); } STAT_INC(CONTAINS_OP, deferred); - ADVANCE_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); #endif /* ENABLE_SPECIALIZATION */ } // _CONTAINS_OP @@ -2602,7 +2602,7 @@ DISPATCH_SAME_OPARG(); } STAT_INC(FOR_ITER, deferred); - ADVANCE_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); #endif /* ENABLE_SPECIALIZATION */ } // _FOR_ITER @@ -3026,7 +3026,7 @@ tstate, PY_MONITORING_EVENT_CALL, frame, this_instr, function, arg); if (err) goto error; - PAUSE_ADAPTIVE_COUNTER(this_instr[1].cache); + PAUSE_ADAPTIVE_COUNTER(this_instr[1].counter); GO_TO_INSTRUCTION(CALL); } @@ -3142,7 +3142,7 @@ if (next_opcode < 0) goto error; next_instr = this_instr; if (_PyOpcode_Caches[next_opcode]) { - PAUSE_ADAPTIVE_COUNTER(next_instr[1].cache); + PAUSE_ADAPTIVE_COUNTER(next_instr[1].counter); } assert(next_opcode > 0 && next_opcode < 256); opcode = next_opcode; @@ -3177,7 +3177,7 @@ /* Skip 1 cache entry */ // cancel out the decrement that will happen in LOAD_SUPER_ATTR; we // don't want to specialize instrumented instructions - PAUSE_ADAPTIVE_COUNTER(this_instr[1].cache); + PAUSE_ADAPTIVE_COUNTER(this_instr[1].counter); GO_TO_INSTRUCTION(LOAD_SUPER_ATTR); } @@ -3415,8 +3415,8 @@ assert(oparg <= INSTR_OFFSET()); JUMPBY(-oparg); #if ENABLE_SPECIALIZATION - uint16_t counter = this_instr[1].cache; - if (ADAPTIVE_COUNTER_TRIGGERS(counter) && this_instr->op.code == JUMP_BACKWARD) { + _Py_BackoffCounter counter = this_instr[1].counter; + if (backoff_counter_triggers(counter) && this_instr->op.code == JUMP_BACKWARD) { _Py_CODEUNIT *start = this_instr; /* Back up over EXTENDED_ARGs so optimizer sees the whole instruction */ while (oparg > 255) { @@ -3432,11 +3432,11 @@ GOTO_TIER_TWO(executor); } else { - this_instr[1].cache = adaptive_counter_backoff(counter); + this_instr[1].counter = restart_backoff_counter(counter); } } else { - ADVANCE_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); } #endif /* ENABLE_SPECIALIZATION */ DISPATCH(); @@ -3537,7 +3537,7 @@ DISPATCH_SAME_OPARG(); } STAT_INC(LOAD_ATTR, deferred); - ADVANCE_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); #endif /* ENABLE_SPECIALIZATION */ } /* Skip 8 cache entries */ @@ -4232,7 +4232,7 @@ DISPATCH_SAME_OPARG(); } STAT_INC(LOAD_GLOBAL, deferred); - ADVANCE_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); #endif /* ENABLE_SPECIALIZATION */ } /* Skip 1 cache entry */ @@ -4435,7 +4435,7 @@ DISPATCH_SAME_OPARG(); } STAT_INC(LOAD_SUPER_ATTR, deferred); - ADVANCE_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); #endif /* ENABLE_SPECIALIZATION */ } // _LOAD_SUPER_ATTR @@ -5076,7 +5076,7 @@ DISPATCH_SAME_OPARG(); } STAT_INC(SEND, deferred); - ADVANCE_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); #endif /* ENABLE_SPECIALIZATION */ } // _SEND @@ -5265,7 +5265,7 @@ DISPATCH_SAME_OPARG(); } STAT_INC(STORE_ATTR, deferred); - ADVANCE_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); #endif /* ENABLE_SPECIALIZATION */ } /* Skip 3 cache entries */ @@ -5555,7 +5555,7 @@ DISPATCH_SAME_OPARG(); } STAT_INC(STORE_SUBSCR, deferred); - ADVANCE_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); #endif /* ENABLE_SPECIALIZATION */ } // _STORE_SUBSCR @@ -5658,7 +5658,7 @@ DISPATCH_SAME_OPARG(); } STAT_INC(TO_BOOL, deferred); - ADVANCE_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); #endif /* ENABLE_SPECIALIZATION */ } /* Skip 2 cache entries */ @@ -5875,7 +5875,7 @@ DISPATCH_SAME_OPARG(); } STAT_INC(UNPACK_SEQUENCE, deferred); - ADVANCE_ADAPTIVE_COUNTER(this_instr[1].cache); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); #endif /* ENABLE_SPECIALIZATION */ (void)seq; (void)counter; diff --git a/Python/instrumentation.c b/Python/instrumentation.c index 018cd662b1561a..0f60290865000c 100644 --- a/Python/instrumentation.c +++ b/Python/instrumentation.c @@ -590,7 +590,7 @@ de_instrument(PyCodeObject *code, int i, int event) CHECK(_PyOpcode_Deopt[deinstrumented] == deinstrumented); *opcode_ptr = deinstrumented; if (_PyOpcode_Caches[deinstrumented]) { - instr[1].cache = adaptive_counter_warmup(); + instr[1].counter = adaptive_counter_warmup(); } } @@ -611,7 +611,7 @@ de_instrument_line(PyCodeObject *code, int i) CHECK(original_opcode == _PyOpcode_Deopt[original_opcode]); instr->op.code = original_opcode; if (_PyOpcode_Caches[original_opcode]) { - instr[1].cache = adaptive_counter_warmup(); + instr[1].counter = adaptive_counter_warmup(); } assert(instr->op.code != INSTRUMENTED_LINE); } @@ -634,7 +634,7 @@ de_instrument_per_instruction(PyCodeObject *code, int i) CHECK(original_opcode == _PyOpcode_Deopt[original_opcode]); *opcode_ptr = original_opcode; if (_PyOpcode_Caches[original_opcode]) { - instr[1].cache = adaptive_counter_warmup(); + instr[1].counter = adaptive_counter_warmup(); } assert(*opcode_ptr != INSTRUMENTED_INSTRUCTION); assert(instr->op.code != INSTRUMENTED_INSTRUCTION); @@ -667,7 +667,7 @@ instrument(PyCodeObject *code, int i) assert(instrumented); *opcode_ptr = instrumented; if (_PyOpcode_Caches[deopt]) { - instr[1].cache = adaptive_counter_warmup(); + instr[1].counter = adaptive_counter_warmup(); } } } diff --git a/Python/optimizer.c b/Python/optimizer.c index f5fdcd7adcfa3c..c02078948cb356 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -1525,7 +1525,7 @@ _Py_ExecutorClear(_PyExecutorObject *executor) for (uint32_t i = 0; i < executor->exit_count; i++) { Py_DECREF(executor->exits[i].executor); executor->exits[i].executor = &COLD_EXITS[i]; - executor->exits[i].temperature = UNREACHABLE_BACKOFF; + executor->exits[i].temperature = forge_backoff_counter(UNREACHABLE_BACKOFF); } _Py_CODEUNIT *instruction = &_PyCode_CODE(code)[executor->vm_data.index]; assert(instruction->op.code == ENTER_EXECUTOR); diff --git a/Python/specialize.c b/Python/specialize.c index d73988e1325b5e..d17dcbf667b9a9 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -422,7 +422,7 @@ _PyCode_Quicken(PyCodeObject *code) int initial_value; switch (opcode) { case JUMP_BACKWARD: - initial_value = initial_backoff_counter(); + initial_value = initial_backoff_counter().as_counter; break; case POP_JUMP_IF_FALSE: case POP_JUMP_IF_TRUE: @@ -431,7 +431,7 @@ _PyCode_Quicken(PyCodeObject *code) initial_value = 0x5555; // Alternating 0, 1 bits break; default: - initial_value = adaptive_counter_warmup(); + initial_value = adaptive_counter_warmup().as_counter; break; } instructions[i + 1].cache = initial_value; From df6f34c623d2ca1a4d4801a882855d7b4cee27f9 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 3 Apr 2024 13:03:37 -0700 Subject: [PATCH 39/42] Refactor initial counter values. - The initial exit temperature is 64; this must be greater than the specialization cooldown value (52) otherwise we might create a trace before we have re-specialized the Tier 1 bytecode - There's now a handy helper function for every counter initialization --- Include/internal/pycore_backoff.h | 33 ++++++++++++++++++++++++++++--- Include/internal/pycore_code.h | 4 ++++ Python/ceval_macros.h | 3 +++ Python/optimizer.c | 4 ++-- Python/specialize.c | 8 +++----- 5 files changed, 42 insertions(+), 10 deletions(-) diff --git a/Include/internal/pycore_backoff.h b/Include/internal/pycore_backoff.h index ae864a97e39d4b..5d93c889e84976 100644 --- a/Include/internal/pycore_backoff.h +++ b/Include/internal/pycore_backoff.h @@ -88,11 +88,38 @@ backoff_counter_triggers(_Py_BackoffCounter counter) return counter.value == 0; } +/* Initial JUMP_BACKWARD counter. + * This determines when we create a trace for a loop. +* Backoff sequence 16, 32, 64, 128, 256, 512, 1024, 2048, 4096. */ +#define JUMP_BACKWARD_INITIAL_VALUE 16 +#define JUMP_BACKWARD_INITIAL_BACKOFF 4 static inline _Py_BackoffCounter -initial_backoff_counter(void) +initial_jump_backoff_counter(void) { - // Backoff sequence 16, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096 - return make_backoff_counter(16, 3); + return make_backoff_counter(JUMP_BACKWARD_INITIAL_VALUE, + JUMP_BACKWARD_INITIAL_BACKOFF); +} + +/* Initial exit temperature. + * Must be larger than ADAPTIVE_COOLDOWN_VALUE, + * otherwise when a side exit warms up we may construct + * a new trace before the Tier 1 code has properly re-specialized. + * Backoff sequence 64, 128, 256, 512, 1024, 2048, 4096. */ +#define COLD_EXIT_INITIAL_VALUE 64 +#define COLD_EXIT_INITIAL_BACKOFF 6 + +static inline _Py_BackoffCounter +initial_temperature_backoff_counter(void) +{ + return make_backoff_counter(COLD_EXIT_INITIAL_VALUE, + COLD_EXIT_INITIAL_BACKOFF); +} + +/* Unreachable backoff counter. */ +static inline _Py_BackoffCounter +initial_unreachable_backoff_counter(void) +{ + return forge_backoff_counter(UNREACHABLE_BACKOFF); } #ifdef __cplusplus diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index 21e85ddbb47d44..688051bbff7aac 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -476,6 +476,10 @@ write_location_entry_start(uint8_t *ptr, int code, int length) #define ADAPTIVE_COOLDOWN_VALUE 52 #define ADAPTIVE_COOLDOWN_BACKOFF 0 +// Can't assert this in pycore_backoff.h because of header order dependencies +static_assert(COLD_EXIT_INITIAL_VALUE > ADAPTIVE_COOLDOWN_VALUE, + "Cold exit value should be larger than adaptive cooldown value"); + static inline _Py_BackoffCounter adaptive_counter_bits(uint16_t value, uint16_t backoff) { return make_backoff_counter(value, backoff); diff --git a/Python/ceval_macros.h b/Python/ceval_macros.h index dd50d234e30f61..224cd1da7d4a0e 100644 --- a/Python/ceval_macros.h +++ b/Python/ceval_macros.h @@ -290,6 +290,9 @@ GETITEM(PyObject *v, Py_ssize_t i) { dtrace_function_entry(frame); \ } +/* This takes a uint16_t instead of a _Py_BackoffCounter, + * because it is used directly on the cache entry in generated code, + * which is always an integral type. */ #define ADAPTIVE_COUNTER_TRIGGERS(COUNTER) \ backoff_counter_triggers(forge_backoff_counter((COUNTER))) diff --git a/Python/optimizer.c b/Python/optimizer.c index c02078948cb356..5c69d9d5de92eb 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -1088,7 +1088,7 @@ make_executor_from_uops(_PyUOpInstruction *buffer, int length, const _PyBloomFil assert(exit_count < COLD_EXIT_COUNT); for (int i = 0; i < exit_count; i++) { executor->exits[i].executor = &COLD_EXITS[i]; - executor->exits[i].temperature = initial_backoff_counter(); + executor->exits[i].temperature = initial_temperature_backoff_counter(); } int next_exit = exit_count-1; _PyUOpInstruction *dest = (_PyUOpInstruction *)&executor->trace[length]; @@ -1525,7 +1525,7 @@ _Py_ExecutorClear(_PyExecutorObject *executor) for (uint32_t i = 0; i < executor->exit_count; i++) { Py_DECREF(executor->exits[i].executor); executor->exits[i].executor = &COLD_EXITS[i]; - executor->exits[i].temperature = forge_backoff_counter(UNREACHABLE_BACKOFF); + executor->exits[i].temperature = initial_unreachable_backoff_counter(); } _Py_CODEUNIT *instruction = &_PyCode_CODE(code)[executor->vm_data.index]; assert(instruction->op.code == ENTER_EXECUTOR); diff --git a/Python/specialize.c b/Python/specialize.c index d17dcbf667b9a9..0b4b199a23e297 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -419,22 +419,20 @@ _PyCode_Quicken(PyCodeObject *code) int caches = _PyOpcode_Caches[opcode]; if (caches) { // The initial value depends on the opcode - int initial_value; switch (opcode) { case JUMP_BACKWARD: - initial_value = initial_backoff_counter().as_counter; + instructions[i + 1].counter = initial_jump_backoff_counter(); break; case POP_JUMP_IF_FALSE: case POP_JUMP_IF_TRUE: case POP_JUMP_IF_NONE: case POP_JUMP_IF_NOT_NONE: - initial_value = 0x5555; // Alternating 0, 1 bits + instructions[i + 1].cache = 0x5555; // Alternating 0, 1 bits break; default: - initial_value = adaptive_counter_warmup().as_counter; + instructions[i + 1].counter = adaptive_counter_warmup(); break; } - instructions[i + 1].cache = initial_value; i += caches; } } From 72f6b0dd2afa0308780f787687bae7425a36dcd1 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 3 Apr 2024 13:16:38 -0700 Subject: [PATCH 40/42] Export tier 2 threshold from _testinternalcapi --- Lib/test/test_capi/test_opt.py | 15 ++++++++------- Modules/_testinternalcapi.c | 6 ++++++ 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 80deb4a1981d00..50b88c011afb47 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -10,6 +10,7 @@ from test.support import script_helper, requires_specialization +from _testinternalcapi import TIER2_THRESHOLD @contextlib.contextmanager def temporary_optimizer(opt): @@ -69,8 +70,8 @@ def loop(): self.assertEqual(opt.get_count(), 0) with clear_executors(loop): loop() - # Subtract 16 because optimizer doesn't kick in until 16 - self.assertEqual(opt.get_count(), 1000 - 16) + # Subtract because optimizer doesn't kick in sooner + self.assertEqual(opt.get_count(), 1000 - TIER2_THRESHOLD) def test_long_loop(self): "Check that we aren't confused by EXTENDED_ARG" @@ -97,7 +98,7 @@ def long_loop(): with temporary_optimizer(opt): self.assertEqual(opt.get_count(), 0) long_loop() - self.assertEqual(opt.get_count(), 20 - 16) # Need 16 iterations to warm up + self.assertEqual(opt.get_count(), 20 - TIER2_THRESHOLD) # Need iterations to warm up def test_code_restore_for_ENTER_EXECUTOR(self): def testfunc(x): @@ -933,10 +934,10 @@ def testfunc(n): exec(src, ns, ns) testfunc = ns['testfunc'] ns['_test_global'] = 0 - _, ex = self._run_with_optimizer(testfunc, 16) + _, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) self.assertIsNone(ex) ns['_test_global'] = 1 - _, ex = self._run_with_optimizer(testfunc, 16) + _, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) self.assertIsNotNone(ex) uops = get_opnames(ex) self.assertNotIn("_GUARD_BOTH_INT", uops) @@ -947,10 +948,10 @@ def testfunc(n): exec(src, ns, ns) testfunc = ns['testfunc'] ns['_test_global'] = 0 - _, ex = self._run_with_optimizer(testfunc, 16) + _, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) self.assertIsNone(ex) ns['_test_global'] = 3.14 - _, ex = self._run_with_optimizer(testfunc, 16) + _, ex = self._run_with_optimizer(testfunc, TIER2_THRESHOLD) self.assertIsNone(ex) def test_many_nested(self): diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index d6d50e75b612df..04f7a59fd961d0 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -10,6 +10,7 @@ #undef NDEBUG #include "Python.h" +#include "pycore_backoff.h" // JUMP_BACKWARD_INITIAL_VALUE #include "pycore_bitutils.h" // _Py_bswap32() #include "pycore_bytesobject.h" // _PyBytes_Find() #include "pycore_ceval.h" // _PyEval_AddPendingCall() @@ -1918,6 +1919,11 @@ module_exec(PyObject *module) return 1; } + if (PyModule_Add(module, "TIER2_THRESHOLD", + PyLong_FromLong(JUMP_BACKWARD_INITIAL_VALUE)) < 0) { + return 1; + } + return 0; } From f38d9224b1ebb4cbab116ecbfb53e21bd612713e Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 3 Apr 2024 13:44:16 -0700 Subject: [PATCH 41/42] Add news --- .../2024-04-03-13-44-04.gh-issue-116968.zgcdG2.rst | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-04-03-13-44-04.gh-issue-116968.zgcdG2.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-04-03-13-44-04.gh-issue-116968.zgcdG2.rst b/Misc/NEWS.d/next/Core and Builtins/2024-04-03-13-44-04.gh-issue-116968.zgcdG2.rst new file mode 100644 index 00000000000000..15f5f6160aae76 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-04-03-13-44-04.gh-issue-116968.zgcdG2.rst @@ -0,0 +1,11 @@ +Introduce a unified 16-bit backoff counter type (``_Py_BackoffCounter``), +shared between the Tier 1 adaptive specializer and the Tier 2 optimizer. The +API used for adaptive specialization counters is changed but the behavior is +(supposed to be) identical. + +The behavior of the Tier 2 counters is changed: + +- There are no longer dynamic thresholds (we never varied these). - All +counters now use the same exponential backoff. - The counter for +``JUMP_BACKWARD`` starts counting down from 16. - The ``temperature`` in +side exits starts counting down from 64. From ef6366b2f5e12a4aecf10afc6632dc70151d2ec4 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Thu, 4 Apr 2024 07:32:50 -0700 Subject: [PATCH 42/42] Fix blurb formatting (I hope) --- .../2024-04-03-13-44-04.gh-issue-116968.zgcdG2.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-04-03-13-44-04.gh-issue-116968.zgcdG2.rst b/Misc/NEWS.d/next/Core and Builtins/2024-04-03-13-44-04.gh-issue-116968.zgcdG2.rst index 15f5f6160aae76..dc5beee0022181 100644 --- a/Misc/NEWS.d/next/Core and Builtins/2024-04-03-13-44-04.gh-issue-116968.zgcdG2.rst +++ b/Misc/NEWS.d/next/Core and Builtins/2024-04-03-13-44-04.gh-issue-116968.zgcdG2.rst @@ -5,7 +5,7 @@ API used for adaptive specialization counters is changed but the behavior is The behavior of the Tier 2 counters is changed: -- There are no longer dynamic thresholds (we never varied these). - All -counters now use the same exponential backoff. - The counter for -``JUMP_BACKWARD`` starts counting down from 16. - The ``temperature`` in -side exits starts counting down from 64. +* There are no longer dynamic thresholds (we never varied these). +* All counters now use the same exponential backoff. +* The counter for ``JUMP_BACKWARD`` starts counting down from 16. +* The ``temperature`` in side exits starts counting down from 64.