-
-
Notifications
You must be signed in to change notification settings - Fork 31.9k
gh-104909: Split BINARY_OP into micro-ops #104910
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
4cac682
24026a0
6f7c955
fd46248
5cd8b66
17483ad
74d4eab
3259d6e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -279,73 +279,111 @@ dummy_func( | |
|
||
family(binary_op, INLINE_CACHE_ENTRIES_BINARY_OP) = { | ||
BINARY_OP, | ||
BINARY_OP_ADD_FLOAT, | ||
BINARY_OP_MULTIPLY_INT, | ||
BINARY_OP_ADD_INT, | ||
BINARY_OP_ADD_UNICODE, | ||
// BINARY_OP_INPLACE_ADD_UNICODE, // This is an odd duck. | ||
BINARY_OP_SUBTRACT_INT, | ||
BINARY_OP_MULTIPLY_FLOAT, | ||
BINARY_OP_MULTIPLY_INT, | ||
BINARY_OP_ADD_FLOAT, | ||
BINARY_OP_SUBTRACT_FLOAT, | ||
BINARY_OP_SUBTRACT_INT, | ||
BINARY_OP_ADD_UNICODE, | ||
// BINARY_OP_INPLACE_ADD_UNICODE, // See comments at that opcode. | ||
}; | ||
|
||
|
||
inst(BINARY_OP_MULTIPLY_INT, (unused/1, left, right -- prod)) { | ||
op(_GUARD_BOTH_INT, (left, right -- left, right)) { | ||
DEOPT_IF(!PyLong_CheckExact(left), BINARY_OP); | ||
DEOPT_IF(!PyLong_CheckExact(right), BINARY_OP); | ||
} | ||
|
||
op(_BINARY_OP_MULTIPLY_INT, (unused/1, left, right -- res)) { | ||
STAT_INC(BINARY_OP, hit); | ||
prod = _PyLong_Multiply((PyLongObject *)left, (PyLongObject *)right); | ||
res = _PyLong_Multiply((PyLongObject *)left, (PyLongObject *)right); | ||
_Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free); | ||
_Py_DECREF_SPECIALIZED(left, (destructor)PyObject_Free); | ||
ERROR_IF(prod == NULL, error); | ||
ERROR_IF(res == NULL, error); | ||
} | ||
|
||
inst(BINARY_OP_MULTIPLY_FLOAT, (unused/1, left, right -- prod)) { | ||
DEOPT_IF(!PyFloat_CheckExact(left), BINARY_OP); | ||
DEOPT_IF(!PyFloat_CheckExact(right), BINARY_OP); | ||
op(_BINARY_OP_ADD_INT, (unused/1, left, right -- res)) { | ||
STAT_INC(BINARY_OP, hit); | ||
double dprod = ((PyFloatObject *)left)->ob_fval * | ||
((PyFloatObject *)right)->ob_fval; | ||
DECREF_INPUTS_AND_REUSE_FLOAT(left, right, dprod, prod); | ||
res = _PyLong_Add((PyLongObject *)left, (PyLongObject *)right); | ||
_Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free); | ||
_Py_DECREF_SPECIALIZED(left, (destructor)PyObject_Free); | ||
Comment on lines
+308
to
+309
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This might benefit in the future from being two additional uops too, since they can be removed for known immortal values (although mechanically that might be difficult right now, given that we don't have a good way of sharing locals across uops). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, once we're over our concern for dispatch overhead in the Tier-2 interpreter, that makes a lot of sense. But I'm not quite over that (I still would like to see a Tier-2 interpreter that not slower than Tier-1 without all conceivable optimizations). So maybe we should just postpone this (maybe we need a new Ideas issue about the granularity of uops). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's not worry about refcount stuff for now, we can always do it later. |
||
ERROR_IF(res == NULL, error); | ||
} | ||
|
||
inst(BINARY_OP_SUBTRACT_INT, (unused/1, left, right -- sub)) { | ||
DEOPT_IF(!PyLong_CheckExact(left), BINARY_OP); | ||
DEOPT_IF(!PyLong_CheckExact(right), BINARY_OP); | ||
op(_BINARY_OP_SUBTRACT_INT, (unused/1, left, right -- res)) { | ||
STAT_INC(BINARY_OP, hit); | ||
sub = _PyLong_Subtract((PyLongObject *)left, (PyLongObject *)right); | ||
res = _PyLong_Subtract((PyLongObject *)left, (PyLongObject *)right); | ||
_Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free); | ||
_Py_DECREF_SPECIALIZED(left, (destructor)PyObject_Free); | ||
ERROR_IF(sub == NULL, error); | ||
ERROR_IF(res == NULL, error); | ||
} | ||
|
||
inst(BINARY_OP_SUBTRACT_FLOAT, (unused/1, left, right -- sub)) { | ||
macro(BINARY_OP_MULTIPLY_INT) = | ||
_GUARD_BOTH_INT + _BINARY_OP_MULTIPLY_INT; | ||
macro(BINARY_OP_ADD_INT) = | ||
_GUARD_BOTH_INT + _BINARY_OP_ADD_INT; | ||
macro(BINARY_OP_SUBTRACT_INT) = | ||
_GUARD_BOTH_INT + _BINARY_OP_SUBTRACT_INT; | ||
|
||
op(_GUARD_BOTH_FLOAT, (left, right -- left, right)) { | ||
DEOPT_IF(!PyFloat_CheckExact(left), BINARY_OP); | ||
DEOPT_IF(!PyFloat_CheckExact(right), BINARY_OP); | ||
} | ||
|
||
op(_BINARY_OP_MULTIPLY_FLOAT, (unused/1, left, right -- res)) { | ||
STAT_INC(BINARY_OP, hit); | ||
double dsub = ((PyFloatObject *)left)->ob_fval - ((PyFloatObject *)right)->ob_fval; | ||
DECREF_INPUTS_AND_REUSE_FLOAT(left, right, dsub, sub); | ||
double dres = | ||
((PyFloatObject *)left)->ob_fval * | ||
((PyFloatObject *)right)->ob_fval; | ||
DECREF_INPUTS_AND_REUSE_FLOAT(left, right, dres, res); | ||
} | ||
|
||
op(_BINARY_OP_ADD_FLOAT, (unused/1, left, right -- res)) { | ||
STAT_INC(BINARY_OP, hit); | ||
double dres = | ||
((PyFloatObject *)left)->ob_fval + | ||
((PyFloatObject *)right)->ob_fval; | ||
DECREF_INPUTS_AND_REUSE_FLOAT(left, right, dres, res); | ||
} | ||
|
||
op(_BINARY_OP_SUBTRACT_FLOAT, (unused/1, left, right -- res)) { | ||
STAT_INC(BINARY_OP, hit); | ||
double dres = | ||
((PyFloatObject *)left)->ob_fval - | ||
((PyFloatObject *)right)->ob_fval; | ||
DECREF_INPUTS_AND_REUSE_FLOAT(left, right, dres, res); | ||
} | ||
|
||
inst(BINARY_OP_ADD_UNICODE, (unused/1, left, right -- res)) { | ||
macro(BINARY_OP_MULTIPLY_FLOAT) = | ||
_GUARD_BOTH_FLOAT + _BINARY_OP_MULTIPLY_FLOAT; | ||
macro(BINARY_OP_ADD_FLOAT) = | ||
_GUARD_BOTH_FLOAT + _BINARY_OP_ADD_FLOAT; | ||
macro(BINARY_OP_SUBTRACT_FLOAT) = | ||
_GUARD_BOTH_FLOAT + _BINARY_OP_SUBTRACT_FLOAT; | ||
|
||
op(_GUARD_BOTH_UNICODE, (left, right -- left, right)) { | ||
DEOPT_IF(!PyUnicode_CheckExact(left), BINARY_OP); | ||
DEOPT_IF(Py_TYPE(right) != Py_TYPE(left), BINARY_OP); | ||
DEOPT_IF(!PyUnicode_CheckExact(right), BINARY_OP); | ||
} | ||
|
||
op(_BINARY_OP_ADD_UNICODE, (unused/1, left, right -- res)) { | ||
STAT_INC(BINARY_OP, hit); | ||
res = PyUnicode_Concat(left, right); | ||
_Py_DECREF_SPECIALIZED(left, _PyUnicode_ExactDealloc); | ||
_Py_DECREF_SPECIALIZED(right, _PyUnicode_ExactDealloc); | ||
ERROR_IF(res == NULL, error); | ||
} | ||
|
||
macro(BINARY_OP_ADD_UNICODE) = | ||
_GUARD_BOTH_UNICODE + _BINARY_OP_ADD_UNICODE; | ||
|
||
// This is a subtle one. It's a super-instruction for | ||
// BINARY_OP_ADD_UNICODE followed by STORE_FAST | ||
// where the store goes into the left argument. | ||
// So the inputs are the same as for all BINARY_OP | ||
// specializations, but there is no output. | ||
// At the end we just skip over the STORE_FAST. | ||
inst(BINARY_OP_INPLACE_ADD_UNICODE, (left, right --)) { | ||
DEOPT_IF(!PyUnicode_CheckExact(left), BINARY_OP); | ||
DEOPT_IF(Py_TYPE(right) != Py_TYPE(left), BINARY_OP); | ||
op(_BINARY_OP_INPLACE_ADD_UNICODE, (left, right --)) { | ||
_Py_CODEUNIT true_next = next_instr[INLINE_CACHE_ENTRIES_BINARY_OP]; | ||
assert(true_next.op.code == STORE_FAST || | ||
true_next.op.code == STORE_FAST__LOAD_FAST); | ||
|
@@ -372,24 +410,8 @@ dummy_func( | |
JUMPBY(INLINE_CACHE_ENTRIES_BINARY_OP + 1); | ||
} | ||
|
||
inst(BINARY_OP_ADD_FLOAT, (unused/1, left, right -- sum)) { | ||
DEOPT_IF(!PyFloat_CheckExact(left), BINARY_OP); | ||
DEOPT_IF(Py_TYPE(right) != Py_TYPE(left), BINARY_OP); | ||
STAT_INC(BINARY_OP, hit); | ||
double dsum = ((PyFloatObject *)left)->ob_fval + | ||
((PyFloatObject *)right)->ob_fval; | ||
DECREF_INPUTS_AND_REUSE_FLOAT(left, right, dsum, sum); | ||
} | ||
|
||
inst(BINARY_OP_ADD_INT, (unused/1, left, right -- sum)) { | ||
DEOPT_IF(!PyLong_CheckExact(left), BINARY_OP); | ||
DEOPT_IF(Py_TYPE(right) != Py_TYPE(left), BINARY_OP); | ||
STAT_INC(BINARY_OP, hit); | ||
sum = _PyLong_Add((PyLongObject *)left, (PyLongObject *)right); | ||
_Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free); | ||
_Py_DECREF_SPECIALIZED(left, (destructor)PyObject_Free); | ||
ERROR_IF(sum == NULL, error); | ||
} | ||
macro(BINARY_OP_INPLACE_ADD_UNICODE) = | ||
_GUARD_BOTH_UNICODE + _BINARY_OP_INPLACE_ADD_UNICODE; | ||
|
||
family(binary_subscr, INLINE_CACHE_ENTRIES_BINARY_SUBSCR) = { | ||
BINARY_SUBSCR, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Minor nit: these were alphabetized before, and now they're not. I think it makes sense to reorder the implementations below, but I'm not sure there's also value in reordering these.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not a big fan of alphabetization any more (just search :-), and I tried to let the grouping match the ordering of the definitions below, with the exception of
BINARY_OP
itself, which is somewhere else entirely.TBH I'm not sure that there's a single organizing principle in the ordering in this file any more; I like to keep families together, but I also don't like to move code around unnecessarily.
If you insist I can undo this chunk.