Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
JIT: Have lowering set up IR for post-indexed addressing
This adds a transformation in lowering that tries to set up the IR to be
amenable to post-indexed addressing in the backend. It does so by
looking for RMW additions/subtractions of a local that was also recently
used as the address to an indirection.
  • Loading branch information
jakobbotsch committed Jul 21, 2024
commit c267ef90e09b27bfc073fc448ef167bfced22b3b
17 changes: 10 additions & 7 deletions src/coreclr/jit/lower.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -632,8 +632,7 @@ GenTree* Lowering::LowerNode(GenTree* node)
FALLTHROUGH;

case GT_STORE_LCL_FLD:
LowerStoreLocCommon(node->AsLclVarCommon());
break;
return LowerStoreLocCommon(node->AsLclVarCommon());

#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
case GT_CMPXCHG:
Expand Down Expand Up @@ -4783,7 +4782,10 @@ void Lowering::LowerRet(GenTreeOp* ret)
// Arguments:
// lclStore - The store lcl node to lower.
//
void Lowering::LowerStoreLocCommon(GenTreeLclVarCommon* lclStore)
// Returns:
// Next node to lower.
//
GenTree* Lowering::LowerStoreLocCommon(GenTreeLclVarCommon* lclStore)
{
assert(lclStore->OperIs(GT_STORE_LCL_FLD, GT_STORE_LCL_VAR));
JITDUMP("lowering store lcl var/field (before):\n");
Expand Down Expand Up @@ -4870,8 +4872,7 @@ void Lowering::LowerStoreLocCommon(GenTreeLclVarCommon* lclStore)
lclStore->gtOp1 = spilledCall;
src = lclStore->gtOp1;
JITDUMP("lowering store lcl var/field has to spill call src.\n");
LowerStoreLocCommon(lclStore);
return;
return LowerStoreLocCommon(lclStore);
}
#endif // !WINDOWS_AMD64_ABI
convertToStoreObj = false;
Expand Down Expand Up @@ -4966,7 +4967,7 @@ void Lowering::LowerStoreLocCommon(GenTreeLclVarCommon* lclStore)
DISPTREERANGE(BlockRange(), objStore);
JITDUMP("\n");

return;
return objStore->gtNext;
}
}

Expand All @@ -4984,11 +4985,13 @@ void Lowering::LowerStoreLocCommon(GenTreeLclVarCommon* lclStore)
ContainCheckBitCast(bitcast);
}

LowerStoreLoc(lclStore);
GenTree* next = LowerStoreLoc(lclStore);

JITDUMP("lowering store lcl var/field (after):\n");
DISPTREERANGE(BlockRange(), lclStore);
JITDUMP("\n");

return next;
}

//----------------------------------------------------------------------------------------------
Expand Down
14 changes: 8 additions & 6 deletions src/coreclr/jit/lower.h
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ class Lowering final : public Phase
GenTreeCC* LowerNodeCC(GenTree* node, GenCondition condition);
void LowerJmpMethod(GenTree* jmp);
void LowerRet(GenTreeOp* ret);
void LowerStoreLocCommon(GenTreeLclVarCommon* lclVar);
GenTree* LowerStoreLocCommon(GenTreeLclVarCommon* lclVar);
void LowerRetStruct(GenTreeUnOp* ret);
void LowerRetSingleRegStructLclVar(GenTreeUnOp* ret);
void LowerCallStruct(GenTreeCall* call);
Expand Down Expand Up @@ -353,6 +353,8 @@ class Lowering final : public Phase
GenTree* LowerIndir(GenTreeIndir* ind);
bool OptimizeForLdpStp(GenTreeIndir* ind);
bool TryMakeIndirsAdjacent(GenTreeIndir* prevIndir, GenTreeIndir* indir);
bool TryMoveAddSubRMWAfterIndir(GenTreeLclVarCommon* store);
bool TryMakeIndirAndStoreAdjacent(GenTreeIndir* prevIndir, GenTreeLclVarCommon* store);
void MarkTree(GenTree* root);
void UnmarkTree(GenTree* root);
GenTree* LowerStoreIndir(GenTreeStoreInd* node);
Expand Down Expand Up @@ -401,11 +403,11 @@ class Lowering final : public Phase
bool LowerRMWMemOp(GenTreeIndir* storeInd);
#endif

void WidenSIMD12IfNecessary(GenTreeLclVarCommon* node);
bool CheckMultiRegLclVar(GenTreeLclVar* lclNode, int registerCount);
void LowerStoreLoc(GenTreeLclVarCommon* tree);
void LowerRotate(GenTree* tree);
void LowerShift(GenTreeOp* shift);
void WidenSIMD12IfNecessary(GenTreeLclVarCommon* node);
bool CheckMultiRegLclVar(GenTreeLclVar* lclNode, int registerCount);
GenTree* LowerStoreLoc(GenTreeLclVarCommon* tree);
void LowerRotate(GenTree* tree);
void LowerShift(GenTreeOp* shift);
#ifdef FEATURE_HW_INTRINSICS
GenTree* LowerHWIntrinsic(GenTreeHWIntrinsic* node);
void LowerHWIntrinsicCC(GenTreeHWIntrinsic* node, NamedIntrinsic newIntrinsicId, GenCondition condition);
Expand Down
213 changes: 212 additions & 1 deletion src/coreclr/jit/lowerarmarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -472,7 +472,10 @@ bool Lowering::IsContainableUnaryOrBinaryOp(GenTree* parentNode, GenTree* childN
// This involves:
// - Widening small stores (on ARM).
//
void Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc)
// Returns:
// Next node to lower.
//
GenTree* Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc)
{
#ifdef TARGET_ARM
// On ARM, small stores can cost a bit more in terms of code size so we try to widen them. This is legal
Expand All @@ -495,6 +498,17 @@ void Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc)
}

ContainCheckStoreLoc(storeLoc);

GenTree* next = storeLoc->gtNext;

#ifdef TARGET_ARM64
if (comp->opts.OptimizationEnabled())
{
TryMoveAddSubRMWAfterIndir(storeLoc);
}
#endif

return next;
}

//------------------------------------------------------------------------
Expand Down Expand Up @@ -1053,6 +1067,203 @@ void Lowering::LowerModPow2(GenTree* node)
ContainCheckNode(mod);
}

const int POST_INDEXED_ADDRESSING_MAX_DISTANCE = 16;

//------------------------------------------------------------------------
// TryMoveAddSubRMWAfterIndir: Try to move an RMW update of a local with an
// ADD/SUB operand earlier to happen right after an indirection on the same
// local, attempting to make these combinable intro post-indexed addressing.
//
// Arguments:
// store - The store to a local
//
// Return Value:
// True if the store was moved; otherwise false.
//
bool Lowering::TryMoveAddSubRMWAfterIndir(GenTreeLclVarCommon* store)
{
if (!store->OperIs(GT_STORE_LCL_VAR))
{
return false;
}

unsigned lclNum = store->GetLclNum();
if (comp->lvaGetDesc(lclNum)->lvDoNotEnregister)
{
return false;
}

GenTree* data = store->Data();
if (!data->OperIs(GT_ADD, GT_SUB) || data->gtOverflow())
{
return false;
}

GenTree* op1 = data->gtGetOp1();
GenTree* op2 = data->gtGetOp2();
if (!op1->OperIs(GT_LCL_VAR) || !op2->isContainedIntOrIImmed())
{
return false;
}

if (op1->AsLclVarCommon()->GetLclNum() != lclNum)
{
return false;
}

int maxCount = min(m_blockIndirs.Height(), POST_INDEXED_ADDRESSING_MAX_DISTANCE / 2);
for (int i = 0; i < maxCount; i++)
{
SavedIndir& prev = m_blockIndirs.TopRef(i);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it be more efficient to start checking with the last indir instead of the first?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This does start with the last indir (since it is using TopRef instead of BottomRef)

if ((prev.AddrBase->GetLclNum() != lclNum) || (prev.Offset != 0))
{
continue;
}

GenTreeIndir* prevIndir = prev.Indir;
if ((prevIndir == nullptr) || (prevIndir->gtNext == nullptr))
{
continue;
}

JITDUMP(
"[%06u] is an an RMW ADD/SUB on local V%02u which is used as the address to [%06u]. Trying to make them adjacent.\n",
Compiler::dspTreeID(store), lclNum, Compiler::dspTreeID(prevIndir));

if (TryMakeIndirAndStoreAdjacent(prevIndir, store))
{
prev.Indir = nullptr;
return true;
}
}

return false;
}

//------------------------------------------------------------------------
// TryMakeIndirAndStoreAdjacent: Try to move a store earlier, right after the
// specified indirection.
//
// Arguments:
// prevIndir - Indirection that comes before "store"
// store - Store that we want to happen next to the indirection
//
// Return Value:
// True if the store was moved; otherwise false.
//
bool Lowering::TryMakeIndirAndStoreAdjacent(GenTreeIndir* prevIndir, GenTreeLclVarCommon* store)
{
GenTree* cur = prevIndir;
for (int i = 0; i < POST_INDEXED_ADDRESSING_MAX_DISTANCE; i++)
{
cur = cur->gtNext;
if (cur == store)
break;
}

if (cur != store)
{
JITDUMP(" Too far separated, giving up\n");
return false;
}

JITDUMP(" They are close. Trying to move the following range (where * are nodes part of the data flow):\n\n");
#ifdef DEBUG
bool isClosed;
GenTree* startDumpNode = BlockRange().GetTreeRange(prevIndir, &isClosed).FirstNode();
GenTree* endDumpNode = store->gtNext;

auto dumpWithMarks = [=]() {
if (!comp->verbose)
{
return;
}

for (GenTree* node = startDumpNode; node != endDumpNode; node = node->gtNext)
{
const char* prefix;
if (node == prevIndir)
prefix = "1. ";
else if (node == store)
prefix = "2. ";
else if ((node->gtLIRFlags & LIR::Flags::Mark) != 0)
prefix = "* ";
else
prefix = " ";

comp->gtDispLIRNode(node, prefix);
}
};

#endif

MarkTree(store);

INDEBUG(dumpWithMarks());
JITDUMP("\n");

assert((prevIndir->gtLIRFlags & LIR::Flags::Mark) == 0);
m_scratchSideEffects.Clear();

for (GenTree* cur = prevIndir->gtNext; cur != store; cur = cur->gtNext)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if this could be cheaper if you computed two side effect sets and then checked for interference. But it probably doesn't make much difference.

Copy link
Member Author

@jakobbotsch jakobbotsch Jul 21, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, possibly -- although it would be a bit less precise than what's here since not all nodes that are part of store's dataflow necessarily happen after all the nodes we are moving.

{
if ((cur->gtLIRFlags & LIR::Flags::Mark) != 0)
{
// 'cur' is part of data flow of 'store', so we will be moving the
// currently recorded effects past 'cur'.
if (m_scratchSideEffects.InterferesWith(comp, cur, true))
{
JITDUMP("Giving up due to interference with [%06u]\n", Compiler::dspTreeID(cur));
UnmarkTree(store);
return false;
}
}
else
{
// Not part of dataflow; add its effects that will move past
// 'store'.
m_scratchSideEffects.AddNode(comp, cur);
}
}

if (m_scratchSideEffects.InterferesWith(comp, store, true))
{
JITDUMP("Have interference. Giving up.\n");
UnmarkTree(store);
return false;
}

JITDUMP("Interference checks passed. Moving nodes that are not part of data flow of [%06u]\n\n",
Compiler::dspTreeID(store));

GenTree* previous = prevIndir;
for (GenTree* node = prevIndir->gtNext;;)
{
GenTree* next = node->gtNext;

if ((node->gtLIRFlags & LIR::Flags::Mark) != 0)
{
// Part of data flow. Move it to happen right after 'previous'.
BlockRange().Remove(node);
BlockRange().InsertAfter(previous, node);
previous = node;
}

if (node == store)
{
break;
}

node = next;
}

JITDUMP("Result:\n\n");
INDEBUG(dumpWithMarks());
JITDUMP("\n");
UnmarkTree(store);
return true;
}

//------------------------------------------------------------------------
// LowerAddForPossibleContainment: Tries to lower GT_ADD in such a way
// that would allow one of its operands
Expand Down
6 changes: 5 additions & 1 deletion src/coreclr/jit/lowerloongarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -249,14 +249,18 @@ GenTree* Lowering::LowerBinaryArithmetic(GenTreeOp* binOp)
// This involves:
// - Widening operations of unsigneds.
//
void Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc)
// Returns:
// Next node to lower.
//
GenTree* Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc)
{
if (storeLoc->OperIs(GT_STORE_LCL_FLD))
{
// We should only encounter this for lclVars that are lvDoNotEnregister.
verifyLclFldDoNotEnregister(storeLoc->GetLclNum());
}
ContainCheckStoreLoc(storeLoc);
return storeLoc->gtNext;
}

//------------------------------------------------------------------------
Expand Down
6 changes: 5 additions & 1 deletion src/coreclr/jit/lowerriscv64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -198,14 +198,18 @@ GenTree* Lowering::LowerBinaryArithmetic(GenTreeOp* binOp)
// This involves:
// - Widening operations of unsigneds.
//
void Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc)
// Returns:
// Next node to lower.
//
GenTree* Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc)
{
if (storeLoc->OperIs(GT_STORE_LCL_FLD))
{
// We should only encounter this for lclVars that are lvDoNotEnregister.
verifyLclFldDoNotEnregister(storeLoc->GetLclNum());
}
ContainCheckStoreLoc(storeLoc);
return storeLoc->gtNext;
}

//------------------------------------------------------------------------
Expand Down
6 changes: 5 additions & 1 deletion src/coreclr/jit/lowerxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,10 @@ void Lowering::LowerRotate(GenTree* tree)
// - Handling of contained immediates.
// - Widening some small stores.
//
void Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc)
// Returns:
// Next tree to lower.
//
GenTree* Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc)
{
// Most small locals (the exception is dependently promoted fields) have 4 byte wide stack slots, so
// we can widen the store, if profitable. The widening is only (largely) profitable for 2 byte stores.
Expand All @@ -64,6 +67,7 @@ void Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc)
}

ContainCheckStoreLoc(storeLoc);
return storeLoc->gtNext;
}

//------------------------------------------------------------------------
Expand Down