Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Handle the save/restore of predicate registers
  • Loading branch information
kunalspathak committed Jun 27, 2024
commit 570583fc1eae00e5fbdf594d690ab6e2a125601c
3 changes: 3 additions & 0 deletions src/coreclr/jit/codegencommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,9 @@ CodeGen::CodeGen(Compiler* theCompiler)
#endif // DEBUG

regSet.tmpInit();
#if defined(TARGET_ARM64)
predicateOffset = 0;
#endif

#ifdef LATE_DISASM
getDisAssembler().disInit(compiler);
Expand Down
4 changes: 3 additions & 1 deletion src/coreclr/jit/codegeninterface.h
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,9 @@ class CodeGenInterface
RegState intRegState;
RegState floatRegState;
NodeInternalRegisters internalRegisters;

#if defined(TARGET_ARM64)
int predicateOffset;
#endif // TARGET_ARM64
protected:
Compiler* compiler;
bool m_genAlignLoops;
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/codegenlinear.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1295,6 +1295,7 @@ void CodeGen::genUnspillRegIfNeeded(GenTree* tree)
emitAttr emitType = emitActualTypeSize(unspillTree->TypeGet());
// Reload into the register specified by 'tree' which may be a GT_RELOAD.
regNumber dstReg = tree->GetRegNum();
// TODO: Here need to see if this is for GT_MASK, then we need to pass the #imm offset I think
GetEmitter()->emitIns_R_S(ins_Load(unspillTree->gtType), emitType, dstReg, t->tdTempNum(), 0);
regSet.tmpRlsTemp(t);

Expand Down
15 changes: 14 additions & 1 deletion src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -1430,12 +1430,18 @@ class TempDsc
int tdNum;
BYTE tdSize;
var_types tdType;
#if defined(TARGET_ARM64)
BYTE tdSeqNum;
#endif // TARGET_ARM64

public:
TempDsc(int _tdNum, unsigned _tdSize, var_types _tdType)
TempDsc(int _tdNum, unsigned _tdSize, var_types _tdType, unsigned _tdSeqNum)
: tdNum(_tdNum)
, tdSize((BYTE)_tdSize)
, tdType(_tdType)
#if defined(TARGET_ARM64)
, tdSeqNum((BYTE)_tdSeqNum)
#endif // TARGET_ARM64
{
#ifdef DEBUG
// temps must have a negative number (so they have a different number from all local variables)
Expand Down Expand Up @@ -1484,6 +1490,13 @@ class TempDsc
{
return tdType;
}
#ifdef TARGET_ARM64
unsigned tdTempSeqNum() const
{
assert(varTypeIsMask(tdType));
return tdSeqNum;
}
#endif
};

// Specify compiler data that a phase might modify
Expand Down
11 changes: 10 additions & 1 deletion src/coreclr/jit/compiler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2708,7 +2708,16 @@ inline
tmpDsc = codeGen->regSet.tmpFindNum(varNum, RegSet::TEMP_USAGE_USED);
}
assert(tmpDsc != nullptr);
varOffset = tmpDsc->tdTempOffs();
#if defined(TARGET_ARM64)
if (varTypeIsMask(tmpDsc->tdTempType()))
{
varOffset = tmpDsc->tdTempSeqNum();
}
else
#endif // TARGET_ARM64
{
varOffset = tmpDsc->tdTempOffs();
}
}
else
{
Expand Down
66 changes: 64 additions & 2 deletions src/coreclr/jit/emitarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7884,7 +7884,36 @@ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int va
isSimple = false;
size = EA_SCALABLE;
attr = size;
fmt = isVectorRegister(reg1) ? IF_SVE_IE_2A : IF_SVE_ID_2A;
if (isPredicateRegister(reg1))
{
assert(offs == 0);
// For predicate, generate based of rsGetRsvdReg()
regNumber rsvdReg = codeGen->rsGetRsvdReg();

if (varx >= 0)
{
// local

// add rsvd, fp, #imm
emitIns_R_R_I(INS_add, EA_8BYTE, rsvdReg, reg2, imm);
// str p0, [rsvd, #0, mul vl]
emitIns_R_R_I(ins, attr, reg1, rsvdReg, 0);
}
else
{
// temp

// `base` contains seqNum and offs = 0, so imm contains seqNum
// add rsvd, fp #predicateStartOffset
emitIns_R_R_I(INS_add, EA_8BYTE, rsvdReg, reg2, codeGen->predicateOffset);
// str p0, [rsvd, #imm, mul vl]
emitIns_R_R_I(ins, attr, reg1, rsvdReg, imm);
}
return;
}

assert(isVectorRegister(reg1));
fmt = IF_SVE_IE_2A;

// TODO-SVE: Don't assume 128bit vectors
// Predicate size is vector length / 8
Expand Down Expand Up @@ -8135,7 +8164,40 @@ void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber reg1, int va
isSimple = false;
size = EA_SCALABLE;
attr = size;
fmt = isVectorRegister(reg1) ? IF_SVE_JH_2A : IF_SVE_JG_2A;

if (isPredicateRegister(reg1))
{
assert(offs == 0);

// For predicate, generate based of rsGetRsvdReg()
regNumber rsvdReg = codeGen->rsGetRsvdReg();

if (varx >= 0)
{
// local

// add rsvd, fp, #imm
emitIns_R_R_I(INS_add, EA_8BYTE, rsvdReg, reg2, imm);
// str p0, [rsvd, #0, mul vl]
emitIns_R_R_I(ins, attr, reg1, rsvdReg, 0);
}
else
{
// temp

// `base` contains seqNum and offs = 0, so imm contains seqNum
// add rsvd, fp #predicateStartOffset
emitIns_R_R_I(INS_add, EA_8BYTE, rsvdReg, reg2, codeGen->predicateOffset);
// str p0, [rsvd, #imm, mul vl]
emitIns_R_R_I(ins, attr, reg1, rsvdReg, imm);
}

// TODO: deal with marking the local
return;
}

assert(isVectorRegister(reg1));
fmt = IF_SVE_JH_2A;

// TODO-SVE: Don't assume 128bit vectors
// Predicate size is vector length / 8
Expand Down
12 changes: 10 additions & 2 deletions src/coreclr/jit/lclvars.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5591,9 +5591,9 @@ unsigned Compiler::lvaGetMaxSpillTempSize()
*
*
* Doing this all in one pass is 'hard'. So instead we do it in 2 basic passes:
* 1. Assign all the offsets relative to the Virtual '0'. Offsets above (the
* 1. Assign all the offsets relative to the Virtual '0'. Offsets above (thetemp->tdAdjustTempOffs(delta);
* incoming arguments) are positive. Offsets below (everything else) are
* negative. This pass also calcuates the total frame size (between Caller's
* negative. This pass also calculates the total frame size (between Caller's
* SP/return address and the Ambient SP).
* 2. Figure out where to place the frame pointer, and then adjust the offsets
* as needed for the final stack size and whether the offset is frame pointer
Expand Down Expand Up @@ -5872,6 +5872,14 @@ void Compiler::lvaFixVirtualFrameOffsets()
for (TempDsc* temp = codeGen->regSet.tmpListBeg(); temp != nullptr; temp = codeGen->regSet.tmpListNxt(temp))
{
temp->tdAdjustTempOffs(delta);
#if defined(TARGET_ARM64)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there some guarantee that all the predicate temps end up adjacent on this list? Otherwise it seems like this indexing scheme might not work out.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you see below, we iterate over all the type and call tmpPreAllocateTemps with the number of slots we need for that type.

for (int i = 0; i < TYP_COUNT; i++)
{
if (var_types(i) != RegSet::tmpNormalizeType(var_types(i)))
{
// Only normalized types should have anything in the maxSpill array.
// We assume here that if type 'i' does not normalize to itself, then
// nothing else normalizes to 'i', either.
assert(maxSpill[i] == 0);
}
if (maxSpill[i] != 0)
{
JITDUMP(" %s: %d\n", varTypeName(var_types(i)), maxSpill[i]);
compiler->codeGen->regSet.tmpPreAllocateTemps(var_types(i), maxSpill[i]);
}
}

In tmpPreAllocateTemps(), we iterate through the number of slots we want to allocate and create them:

for (unsigned i = 0; i < count; i++)
{
tmpCount++;
tmpSize += size;
#ifdef TARGET_ARM
if (type == TYP_DOUBLE)
{
// Adjust tmpSize to accommodate possible alignment padding.
// Note that at this point the offsets aren't yet finalized, so we don't yet know if it will be required.
tmpSize += TARGET_POINTER_SIZE;
}
#endif // TARGET_ARM
TempDsc* temp = new (m_rsCompiler, CMK_Unknown) TempDsc(-((int)tmpCount), size, type);

if (varTypeIsMask(temp->tdTempType()) && temp->tdTempSeqNum() == 0)
{
// For the first register, store the offset, which we will use to
// generate the offsets for subsequent temp mask registers
codeGen->predicateOffset = temp->tdTempOffs();
}
#endif
}

lvaCachedGenericContextArgOffs += delta;
Expand Down
3 changes: 3 additions & 0 deletions src/coreclr/jit/lsrabuild.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -855,6 +855,9 @@ regMaskTP LinearScan::getKillSetForCall(GenTreeCall* call)

#else
killMask.RemoveRegsetForType(RBM_FLT_CALLEE_TRASH.GetFloatRegSet(), FloatRegisterType);
#if defined(TARGET_ARM64)
killMask.RemoveRegsetForType(RBM_MSK_CALLEE_TRASH.GetFloatRegSet(), MaskRegisterType);
#endif // TARGET_ARM64
#endif // TARGET_XARCH
}
#ifdef TARGET_ARM
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/regset.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -705,7 +705,7 @@ void RegSet::tmpPreAllocateTemps(var_types type, unsigned count)
}
#endif // TARGET_ARM

TempDsc* temp = new (m_rsCompiler, CMK_Unknown) TempDsc(-((int)tmpCount), size, type);
TempDsc* temp = new (m_rsCompiler, CMK_Unknown) TempDsc(-((int)tmpCount), size, type, i);

#ifdef DEBUG
if (m_rsCompiler->verbose)
Expand Down