-
Notifications
You must be signed in to change notification settings - Fork 5.2k
JIT: Have lowering set up IR for post-indexed addressing and make strength reduced IV updates amenable to post-indexed addressing #105185
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
This adds a transformation in lowering that tries to set up the IR to be amenable to post-indexed addressing in the backend. It does so by looking for RMW additions/subtractions of a local that was also recently used as the address to an indirection.
- Loading branch information
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -472,7 +472,10 @@ bool Lowering::IsContainableUnaryOrBinaryOp(GenTree* parentNode, GenTree* childN | |
// This involves: | ||
// - Widening small stores (on ARM). | ||
// | ||
void Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc) | ||
// Returns: | ||
// Next node to lower. | ||
// | ||
GenTree* Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc) | ||
{ | ||
#ifdef TARGET_ARM | ||
// On ARM, small stores can cost a bit more in terms of code size so we try to widen them. This is legal | ||
|
@@ -495,6 +498,17 @@ void Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc) | |
} | ||
|
||
ContainCheckStoreLoc(storeLoc); | ||
|
||
GenTree* next = storeLoc->gtNext; | ||
|
||
#ifdef TARGET_ARM64 | ||
if (comp->opts.OptimizationEnabled()) | ||
{ | ||
TryMoveAddSubRMWAfterIndir(storeLoc); | ||
} | ||
#endif | ||
|
||
return next; | ||
} | ||
|
||
//------------------------------------------------------------------------ | ||
|
@@ -1053,6 +1067,203 @@ void Lowering::LowerModPow2(GenTree* node) | |
ContainCheckNode(mod); | ||
} | ||
|
||
const int POST_INDEXED_ADDRESSING_MAX_DISTANCE = 16; | ||
|
||
//------------------------------------------------------------------------ | ||
// TryMoveAddSubRMWAfterIndir: Try to move an RMW update of a local with an | ||
// ADD/SUB operand earlier to happen right after an indirection on the same | ||
// local, attempting to make these combinable intro post-indexed addressing. | ||
// | ||
// Arguments: | ||
// store - The store to a local | ||
// | ||
// Return Value: | ||
// True if the store was moved; otherwise false. | ||
// | ||
bool Lowering::TryMoveAddSubRMWAfterIndir(GenTreeLclVarCommon* store) | ||
{ | ||
if (!store->OperIs(GT_STORE_LCL_VAR)) | ||
{ | ||
return false; | ||
} | ||
|
||
unsigned lclNum = store->GetLclNum(); | ||
if (comp->lvaGetDesc(lclNum)->lvDoNotEnregister) | ||
{ | ||
return false; | ||
} | ||
|
||
GenTree* data = store->Data(); | ||
if (!data->OperIs(GT_ADD, GT_SUB) || data->gtOverflow()) | ||
{ | ||
return false; | ||
} | ||
|
||
GenTree* op1 = data->gtGetOp1(); | ||
GenTree* op2 = data->gtGetOp2(); | ||
if (!op1->OperIs(GT_LCL_VAR) || !op2->isContainedIntOrIImmed()) | ||
{ | ||
return false; | ||
} | ||
|
||
if (op1->AsLclVarCommon()->GetLclNum() != lclNum) | ||
{ | ||
return false; | ||
} | ||
|
||
int maxCount = min(m_blockIndirs.Height(), POST_INDEXED_ADDRESSING_MAX_DISTANCE / 2); | ||
for (int i = 0; i < maxCount; i++) | ||
{ | ||
SavedIndir& prev = m_blockIndirs.TopRef(i); | ||
if ((prev.AddrBase->GetLclNum() != lclNum) || (prev.Offset != 0)) | ||
{ | ||
continue; | ||
} | ||
|
||
GenTreeIndir* prevIndir = prev.Indir; | ||
if ((prevIndir == nullptr) || (prevIndir->gtNext == nullptr)) | ||
{ | ||
continue; | ||
} | ||
|
||
JITDUMP( | ||
"[%06u] is an an RMW ADD/SUB on local V%02u which is used as the address to [%06u]. Trying to make them adjacent.\n", | ||
Compiler::dspTreeID(store), lclNum, Compiler::dspTreeID(prevIndir)); | ||
|
||
if (TryMakeIndirAndStoreAdjacent(prevIndir, store)) | ||
{ | ||
prev.Indir = nullptr; | ||
return true; | ||
} | ||
} | ||
|
||
return false; | ||
} | ||
|
||
//------------------------------------------------------------------------ | ||
// TryMakeIndirAndStoreAdjacent: Try to move a store earlier, right after the | ||
// specified indirection. | ||
// | ||
// Arguments: | ||
// prevIndir - Indirection that comes before "store" | ||
// store - Store that we want to happen next to the indirection | ||
// | ||
// Return Value: | ||
// True if the store was moved; otherwise false. | ||
// | ||
bool Lowering::TryMakeIndirAndStoreAdjacent(GenTreeIndir* prevIndir, GenTreeLclVarCommon* store) | ||
{ | ||
GenTree* cur = prevIndir; | ||
for (int i = 0; i < POST_INDEXED_ADDRESSING_MAX_DISTANCE; i++) | ||
{ | ||
cur = cur->gtNext; | ||
if (cur == store) | ||
break; | ||
} | ||
|
||
if (cur != store) | ||
{ | ||
JITDUMP(" Too far separated, giving up\n"); | ||
return false; | ||
} | ||
|
||
JITDUMP(" They are close. Trying to move the following range (where * are nodes part of the data flow):\n\n"); | ||
#ifdef DEBUG | ||
bool isClosed; | ||
GenTree* startDumpNode = BlockRange().GetTreeRange(prevIndir, &isClosed).FirstNode(); | ||
GenTree* endDumpNode = store->gtNext; | ||
|
||
auto dumpWithMarks = [=]() { | ||
if (!comp->verbose) | ||
{ | ||
return; | ||
} | ||
|
||
for (GenTree* node = startDumpNode; node != endDumpNode; node = node->gtNext) | ||
{ | ||
const char* prefix; | ||
if (node == prevIndir) | ||
prefix = "1. "; | ||
else if (node == store) | ||
prefix = "2. "; | ||
else if ((node->gtLIRFlags & LIR::Flags::Mark) != 0) | ||
prefix = "* "; | ||
else | ||
prefix = " "; | ||
|
||
comp->gtDispLIRNode(node, prefix); | ||
} | ||
}; | ||
|
||
#endif | ||
|
||
MarkTree(store); | ||
|
||
INDEBUG(dumpWithMarks()); | ||
JITDUMP("\n"); | ||
|
||
assert((prevIndir->gtLIRFlags & LIR::Flags::Mark) == 0); | ||
m_scratchSideEffects.Clear(); | ||
|
||
for (GenTree* cur = prevIndir->gtNext; cur != store; cur = cur->gtNext) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wonder if this could be cheaper if you computed two side effect sets and then checked for interference. But it probably doesn't make much difference. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm, possibly -- although it would be a bit less precise than what's here since not all nodes that are part of |
||
{ | ||
if ((cur->gtLIRFlags & LIR::Flags::Mark) != 0) | ||
{ | ||
// 'cur' is part of data flow of 'store', so we will be moving the | ||
// currently recorded effects past 'cur'. | ||
if (m_scratchSideEffects.InterferesWith(comp, cur, true)) | ||
{ | ||
JITDUMP("Giving up due to interference with [%06u]\n", Compiler::dspTreeID(cur)); | ||
UnmarkTree(store); | ||
return false; | ||
} | ||
} | ||
else | ||
{ | ||
// Not part of dataflow; add its effects that will move past | ||
// 'store'. | ||
m_scratchSideEffects.AddNode(comp, cur); | ||
} | ||
} | ||
|
||
if (m_scratchSideEffects.InterferesWith(comp, store, true)) | ||
{ | ||
JITDUMP("Have interference. Giving up.\n"); | ||
UnmarkTree(store); | ||
return false; | ||
} | ||
|
||
JITDUMP("Interference checks passed. Moving nodes that are not part of data flow of [%06u]\n\n", | ||
Compiler::dspTreeID(store)); | ||
|
||
GenTree* previous = prevIndir; | ||
for (GenTree* node = prevIndir->gtNext;;) | ||
{ | ||
GenTree* next = node->gtNext; | ||
|
||
if ((node->gtLIRFlags & LIR::Flags::Mark) != 0) | ||
{ | ||
// Part of data flow. Move it to happen right after 'previous'. | ||
BlockRange().Remove(node); | ||
BlockRange().InsertAfter(previous, node); | ||
previous = node; | ||
} | ||
|
||
if (node == store) | ||
{ | ||
break; | ||
} | ||
|
||
node = next; | ||
} | ||
|
||
JITDUMP("Result:\n\n"); | ||
INDEBUG(dumpWithMarks()); | ||
JITDUMP("\n"); | ||
UnmarkTree(store); | ||
return true; | ||
} | ||
|
||
//------------------------------------------------------------------------ | ||
// LowerAddForPossibleContainment: Tries to lower GT_ADD in such a way | ||
// that would allow one of its operands | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would it be more efficient to start checking with the last indir instead of the first?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This does start with the last indir (since it is using
TopRef
instead ofBottomRef
)