Thanks to visit codestin.com
Credit goes to github.com

Skip to content

GH-118095: Unify the behavior of tier 2 FOR_ITER branch micro-ops #118420

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
May 2, 2024
4 changes: 1 addition & 3 deletions Python/bytecodes.c
Original file line number Diff line number Diff line change
Expand Up @@ -2610,9 +2610,7 @@ dummy_func(
_PyErr_Clear(tstate);
}
/* iterator ended normally */
Py_DECREF(iter);
STACK_SHRINK(1);
/* The translator sets the deopt target just past END_FOR */
/* The translator sets the deopt target just past the matching END_FOR */
DEOPT_IF(true);
}
// Common case: no jump, leave it to the code generator
Expand Down
4 changes: 1 addition & 3 deletions Python/executor_cases.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

50 changes: 40 additions & 10 deletions Python/optimizer.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,19 @@

#define MAX_EXECUTORS_SIZE 256

#ifdef Py_DEBUG
static int
base_opcode(PyCodeObject *code, int offset)
{
int opcode = _Py_GetBaseOpcode(code, offset);
if (opcode == ENTER_EXECUTOR) {
int oparg = _PyCode_CODE(code)[offset].op.arg;
_PyExecutorObject *ex = code->co_executors->executors[oparg];
return ex->vm_data.opcode;
}
return opcode;
}
#endif

static bool
has_space_for_executor(PyCodeObject *code, _Py_CODEUNIT *instr)
Expand Down Expand Up @@ -445,6 +458,14 @@ _PyUOp_Replacements[MAX_UOP_ID + 1] = {
[_FOR_ITER] = _FOR_ITER_TIER_TWO,
};

static const uint8_t
is_for_iter_test[MAX_UOP_ID + 1] = {
[_GUARD_NOT_EXHAUSTED_RANGE] = 1,
[_GUARD_NOT_EXHAUSTED_LIST] = 1,
[_GUARD_NOT_EXHAUSTED_TUPLE] = 1,
[_FOR_ITER_TIER_TWO] = 1,
};

static const uint16_t
BRANCH_TO_GUARD[4][2] = {
[POP_JUMP_IF_FALSE - POP_JUMP_IF_FALSE][0] = _GUARD_IS_TRUE_POP,
Expand Down Expand Up @@ -594,7 +615,6 @@ translate_bytecode_to_trace(

uint32_t opcode = instr->op.code;
uint32_t oparg = instr->op.arg;
uint32_t extended = 0;

DPRINTF(2, "%d: %s(%d)\n", target, _PyOpcode_OpName[opcode], oparg);

Expand All @@ -608,7 +628,6 @@ translate_bytecode_to_trace(

if (opcode == EXTENDED_ARG) {
instr++;
extended = 1;
opcode = instr->op.code;
oparg = (oparg << 8) | instr->op.arg;
if (opcode == EXTENDED_ARG) {
Expand Down Expand Up @@ -772,12 +791,15 @@ translate_bytecode_to_trace(
case OPARG_REPLACED:
uop = _PyUOp_Replacements[uop];
assert(uop != 0);
if (uop == _FOR_ITER_TIER_TWO) {
target += 1 + INLINE_CACHE_ENTRIES_FOR_ITER + oparg + 2 + extended;
assert(_PyCode_CODE(code)[target-2].op.code == END_FOR ||
_PyCode_CODE(code)[target-2].op.code == INSTRUMENTED_END_FOR);
assert(_PyCode_CODE(code)[target-1].op.code == POP_TOP);
#ifdef Py_DEBUG
{
uint32_t next_inst = target + 1 + INLINE_CACHE_ENTRIES_FOR_ITER + (oparg > 255);
uint32_t jump_target = next_inst + oparg;
assert(base_opcode(code, jump_target) == END_FOR ||
base_opcode(code, jump_target) == INSTRUMENTED_END_FOR);
assert(base_opcode(code, jump_target+1) == POP_TOP);
}
#endif
break;
default:
fprintf(stderr,
Expand Down Expand Up @@ -1000,10 +1022,18 @@ prepare_for_execution(_PyUOpInstruction *buffer, int length)
if (_PyUop_Flags[opcode] & (HAS_EXIT_FLAG | HAS_DEOPT_FLAG)) {
uint16_t exit_op = (_PyUop_Flags[opcode] & HAS_EXIT_FLAG) ?
_SIDE_EXIT : _DEOPT;
if (target != current_jump_target || current_exit_op != exit_op) {
make_exit(&buffer[next_spare], exit_op, target);
int32_t jump_target = target;
if (is_for_iter_test[opcode]) {
/* Target the POP_TOP immediately after the END_FOR,
* leaving only the iterator on the stack. */
int extended_arg = inst->oparg > 255;
int32_t next_inst = target + 1 + INLINE_CACHE_ENTRIES_FOR_ITER + extended_arg;
jump_target = next_inst + inst->oparg + 1;
}
if (jump_target != current_jump_target || current_exit_op != exit_op) {
make_exit(&buffer[next_spare], exit_op, jump_target);
current_exit_op = exit_op;
current_jump_target = target;
current_jump_target = jump_target;
current_jump = next_spare;
next_spare++;
}
Expand Down
13 changes: 10 additions & 3 deletions Python/optimizer_symbols.c
Original file line number Diff line number Diff line change
Expand Up @@ -164,19 +164,26 @@ _Py_uop_sym_set_const(_Py_UopsSymbol *sym, PyObject *const_val)
return true;
}


bool
_Py_uop_sym_set_null(_Py_UopsSymbol *sym)
{
if (_Py_uop_sym_is_not_null(sym)) {
sym_set_bottom(sym);
return false;
}
sym_set_flag(sym, IS_NULL);
return !_Py_uop_sym_is_bottom(sym);
return true;
Comment on lines +170 to +175
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this refactoring matter? If so, why not do the same for set_non_null below?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is not a refactoring.
Calling _Py_uop_sym_set_null on a non-NULL symbol would fail an assertion in _Py_uop_sym_is_bottom

And yes, it should be applied to set_non_null as well.

}

bool
_Py_uop_sym_set_non_null(_Py_UopsSymbol *sym)
{
if (_Py_uop_sym_is_null(sym)) {
sym_set_bottom(sym);
return false;
}
sym_set_flag(sym, NOT_NULL);
return !_Py_uop_sym_is_bottom(sym);
return true;
}


Expand Down
6 changes: 4 additions & 2 deletions Python/specialize.c
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,7 @@ print_gc_stats(FILE *out, GCStats *stats)
}
}

#ifdef _Py_TIER2
static void
print_histogram(FILE *out, const char *name, uint64_t hist[_Py_UOP_HIST_SIZE])
{
Expand Down Expand Up @@ -249,7 +250,6 @@ print_optimization_stats(FILE *out, OptimizationStats *stats)
stats->optimizer_failure_reason_no_memory);
fprintf(out, "Optimizer remove globals builtins changed: %" PRIu64 "\n", stats->remove_globals_builtins_changed);
fprintf(out, "Optimizer remove globals incorrect keys: %" PRIu64 "\n", stats->remove_globals_incorrect_keys);

for (int i = 0; i <= MAX_UOP_ID; i++) {
if (stats->opcode[i].execution_count) {
fprintf(out, "uops[%s].execution_count : %" PRIu64 "\n", _PyUOpName(i), stats->opcode[i].execution_count);
Expand All @@ -258,7 +258,6 @@ print_optimization_stats(FILE *out, OptimizationStats *stats)
fprintf(out, "uops[%s].specialization.miss : %" PRIu64 "\n", _PyUOpName(i), stats->opcode[i].miss);
}
}

for (int i = 0; i < 256; i++) {
if (stats->unsupported_opcode[i]) {
fprintf(
Expand Down Expand Up @@ -289,6 +288,7 @@ print_optimization_stats(FILE *out, OptimizationStats *stats)
}
}
}
#endif

static void
print_rare_event_stats(FILE *out, RareEventStats *stats)
Expand All @@ -309,7 +309,9 @@ print_stats(FILE *out, PyStats *stats)
print_call_stats(out, &stats->call_stats);
print_object_stats(out, &stats->object_stats);
print_gc_stats(out, stats->gc_stats);
#ifdef _Py_TIER2
print_optimization_stats(out, &stats->optimization_stats);
#endif
print_rare_event_stats(out, &stats->rare_event_stats);
}

Expand Down
Loading