73#define DEBUG_TYPE "arm-low-overhead-loops"
74#define ARM_LOW_OVERHEAD_LOOPS_NAME "ARM Low Overhead Loops pass"
78 cl::desc(
"Disable tail-predication in the ARM LowOverheadLoop pass"),
83 cl::desc(
"Disable omitting 'dls lr, lr' instructions"),
88 return PIdx != -1 &&
MI->getOperand(PIdx + 1).getReg() == ARM::VPR;
92 return MI->findRegisterDefOperandIdx(ARM::VPR,
nullptr) != -1;
96 return MI.findRegisterUseOperandIdx(ARM::VPR,
nullptr) != -1;
111 if (
MI.isDebugInstr())
126 class PostOrderLoopTraversal {
128 MachineLoopInfo &MLI;
129 SmallPtrSet<MachineBasicBlock*, 4> Visited;
130 SmallVector<MachineBasicBlock*, 4> Order;
133 PostOrderLoopTraversal(MachineLoop &ML, MachineLoopInfo &MLI)
134 : ML(ML), MLI(MLI) { }
136 const SmallVectorImpl<MachineBasicBlock*> &getOrder()
const {
143 std::function<void(MachineBasicBlock *)> Search =
144 [
this, &Search](MachineBasicBlock *
MBB) ->
void {
145 if (!Visited.insert(
MBB).second)
149 if (!ML.contains(Succ))
153 Order.push_back(
MBB);
158 ML.getExitBlocks(ExitBlocks);
162 Search(ML.getHeader());
165 std::function<void(MachineBasicBlock*)> GetPredecessor =
166 [
this, &GetPredecessor] (MachineBasicBlock *
MBB) ->
void {
167 Order.push_back(
MBB);
172 if (
auto *Preheader = ML.getLoopPreheader())
173 GetPredecessor(Preheader);
174 else if (
auto *Preheader = MLI.findLoopPreheader(&ML,
true,
true))
175 GetPredecessor(Preheader);
180 SmallVector<MachineInstr *, 4> Insts;
183 VPTBlock(MachineInstr *
MI) { Insts.push_back(
MI); }
187 bool hasUniformPredicate() {
return getDivergent() ==
nullptr; }
191 MachineInstr *getDivergent() {
192 SmallVectorImpl<MachineInstr *> &Insts = getInsts();
193 for (
unsigned i = 1; i < Insts.size(); ++i) {
194 MachineInstr *
Next = Insts[i];
201 void insert(MachineInstr *
MI) {
204 assert(Insts.size() <= 5 &&
"Too many instructions in VPT block!");
209 unsigned size()
const {
return Insts.size(); }
210 SmallVectorImpl<MachineInstr *> &getInsts() {
return Insts; }
219 friend struct LowOverheadLoop;
222 SetVector<MachineInstr *> CurrentPredicates;
223 std::map<MachineInstr *, SetVector<MachineInstr *>> PredicatedInsts;
226 assert((CurrentPredicates.size() ||
MI->getParent()->isLiveIn(ARM::VPR))
227 &&
"Can't begin VPT without predicate");
228 Blocks.emplace_back(
MI);
232 PredicatedInsts[
MI] = CurrentPredicates;
235 void addInst(MachineInstr *
MI) {
236 Blocks.back().insert(
MI);
237 PredicatedInsts[
MI] = CurrentPredicates;
240 void addPredicate(MachineInstr *
MI) {
242 CurrentPredicates.insert(
MI);
245 void resetPredicate(MachineInstr *
MI) {
247 CurrentPredicates.clear();
248 CurrentPredicates.insert(
MI);
253 bool isPredicatedOnVCTP(MachineInstr *
MI,
bool Exclusive =
false) {
254 SetVector<MachineInstr *> &Predicates = PredicatedInsts[
MI];
255 if (Exclusive && Predicates.
size() != 1)
264 bool isEntryPredicatedOnVCTP(VPTBlock &
Block,
bool Exclusive =
false) {
265 SmallVectorImpl<MachineInstr *> &Insts =
Block.getInsts();
266 return isPredicatedOnVCTP(Insts.
front(), Exclusive);
272 bool hasImplicitlyValidVPT(VPTBlock &
Block, ReachingDefInfo &
RDI) {
273 SmallVectorImpl<MachineInstr *> &Insts =
Block.getInsts();
274 MachineInstr *VPT = Insts.
front();
276 "Expected VPT block to begin with VPT/VPST");
286 return !MI->mayStore() && !MI->mayLoad() &&
287 !isHorizontalReduction(*MI) && !isVCTP(MI);
291 auto IsOperandPredicated = [&](MachineInstr *
MI,
unsigned Idx) {
293 return Op && PredicatedInsts.count(
Op) && isPredicatedOnVCTP(
Op);
296 auto IsOperandInvariant = [&](MachineInstr *
MI,
unsigned Idx) {
297 MachineOperand &MO =
MI->getOperand(Idx);
301 SmallPtrSet<MachineInstr *, 2> Defs;
306 for (
auto *Def : Defs)
314 return (IsOperandPredicated(VPT, 1) || IsOperandPredicated(VPT, 2)) &&
315 (IsOperandPredicated(VPT, 1) || IsOperandInvariant(VPT, 1)) &&
316 (IsOperandPredicated(VPT, 2) || IsOperandInvariant(VPT, 2));
323 for (
auto &
Block : Blocks) {
324 if (isEntryPredicatedOnVCTP(
Block,
false) &&
326 return getVPTInstrPredicate(*MI) == ARMVCC::Else;
329 if (hasImplicitlyValidVPT(
Block,
RDI))
332 SmallVectorImpl<MachineInstr *> &Insts =
Block.getInsts();
336 "Expected VPT block to start with a VPST or VPT!");
337 if (Insts.
size() == 2 && Insts.
front()->getOpcode() != ARM::MVE_VPST &&
341 for (
auto *
MI : Insts) {
351 if (!isPredicatedOnVCTP(
MI)) {
361 struct LowOverheadLoop {
364 MachineBasicBlock *Preheader =
nullptr;
365 MachineLoopInfo &MLI;
366 ReachingDefInfo &RDI;
367 const TargetRegisterInfo &TRI;
368 const ARMBaseInstrInfo &TII;
369 MachineFunction *MF =
nullptr;
371 MachineBasicBlock *StartInsertBB =
nullptr;
372 MachineInstr *Start =
nullptr;
373 MachineInstr *Dec =
nullptr;
374 MachineInstr *End =
nullptr;
375 MachineOperand TPNumElements;
376 SmallVector<MachineInstr *, 4> VCTPs;
377 SmallPtrSet<MachineInstr *, 4> ToRemove;
378 SmallPtrSet<MachineInstr *, 4> BlockMasksToRecompute;
379 SmallPtrSet<MachineInstr *, 4> DoubleWidthResultInstrs;
380 SmallPtrSet<MachineInstr *, 4> VMOVCopies;
382 bool CannotTailPredicate =
false;
385 LowOverheadLoop(MachineLoop &ML, MachineLoopInfo &MLI, ReachingDefInfo &RDI,
386 const TargetRegisterInfo &TRI,
const ARMBaseInstrInfo &TII)
387 : ML(ML), MLI(MLI), RDI(RDI), TRI(TRI), TII(TII),
388 TPNumElements(MachineOperand::CreateImm(0)) {
390 if (
auto *
MBB = ML.getLoopPreheader())
392 else if (
auto *
MBB = MLI.findLoopPreheader(&ML,
true,
true))
399 bool ValidateMVEInst(MachineInstr *
MI);
401 void AnalyseMVEInst(MachineInstr *
MI) {
402 CannotTailPredicate = !ValidateMVEInst(
MI);
405 bool IsTailPredicationLegal()
const {
408 return !Revert && FoundAllComponents() && !VCTPs.empty() &&
409 !CannotTailPredicate && ML.getNumBlocks() == 1;
414 bool AddVCTP(MachineInstr *
MI);
419 bool ValidateTailPredicate();
423 bool ValidateLiveOuts();
427 void Validate(ARMBasicBlockUtils *BBUtils);
429 bool FoundAllComponents()
const {
430 return Start && Dec && End;
433 SmallVectorImpl<VPTBlock> &getVPTBlocks() {
return VPTstate.Blocks; }
437 MachineOperand &getLoopStartOperand() {
438 if (IsTailPredicationLegal())
439 return TPNumElements;
440 return Start->getOperand(1);
443 unsigned getStartOpcode()
const {
445 if (!IsTailPredicationLegal())
446 return IsDo ? ARM::t2DLS : ARM::t2WLS;
452 if (Start)
dbgs() <<
"ARM Loops: Found Loop Start: " << *Start;
453 if (Dec)
dbgs() <<
"ARM Loops: Found Loop Dec: " << *Dec;
454 if (End)
dbgs() <<
"ARM Loops: Found Loop End: " << *End;
455 if (!VCTPs.empty()) {
456 dbgs() <<
"ARM Loops: Found VCTP(s):\n";
457 for (
auto *
MI : VCTPs)
460 if (!FoundAllComponents())
461 dbgs() <<
"ARM Loops: Not a low-overhead loop.\n";
462 else if (!(Start && Dec && End))
463 dbgs() <<
"ARM Loops: Failed to find all loop components.\n";
468 MachineFunction *MF =
nullptr;
469 MachineLoopInfo *MLI =
nullptr;
470 ReachingDefInfo *RDI =
nullptr;
471 const ARMBaseInstrInfo *TII =
nullptr;
472 MachineRegisterInfo *MRI =
nullptr;
473 const TargetRegisterInfo *TRI =
nullptr;
474 std::unique_ptr<ARMBasicBlockUtils> BBUtils =
nullptr;
479 ARMLowOverheadLoops() : MachineFunctionPass(ID) { }
481 void getAnalysisUsage(AnalysisUsage &AU)
const override {
488 bool runOnMachineFunction(MachineFunction &MF)
override;
490 MachineFunctionProperties getRequiredProperties()
const override {
491 return MachineFunctionProperties().setNoVRegs().setTracksLiveness();
494 StringRef getPassName()
const override {
499 bool ProcessLoop(MachineLoop *
ML);
501 bool RevertNonLoops();
503 void RevertWhile(MachineInstr *
MI)
const;
504 void RevertDo(MachineInstr *
MI)
const;
510 void RevertLoopEndDec(MachineInstr *
MI)
const;
512 void ConvertVPTBlocks(LowOverheadLoop &LoLoop);
514 MachineInstr *ExpandLoopStart(LowOverheadLoop &LoLoop);
516 void Expand(LowOverheadLoop &LoLoop);
518 void IterationCountDCE(LowOverheadLoop &LoLoop);
522char ARMLowOverheadLoops::ID = 0;
535 for (
auto *Dead : Killed)
536 BasicBlocks.
insert(Dead->getParent());
539 std::map<MachineInstr *, SmallPtrSet<MachineInstr *, 2>> ITBlocks;
540 for (
auto *
MBB : BasicBlocks) {
541 for (
auto &
IT : *
MBB) {
542 if (
IT.getOpcode() != ARM::t2IT)
553 for (
auto *Dead : Killed) {
555 Dead->findRegisterUseOperand(ARM::ITSTATE,
nullptr)) {
558 auto &CurrentBlock = ITBlocks[
IT];
559 CurrentBlock.erase(Dead);
560 if (CurrentBlock.empty())
566 if (!ModifiedITs.
empty())
579 <<
" - can also remove:\n";
584 RDI.collectKilledOperands(
MI, Killed);
585 if (WontCorruptITs(Killed,
RDI)) {
588 dbgs() <<
" - " << *Dead);
595bool LowOverheadLoop::ValidateTailPredicate() {
596 if (!IsTailPredicationLegal()) {
598 dbgs() <<
"ARM Loops: Didn't find a VCTP instruction.\n";
599 dbgs() <<
"ARM Loops: Tail-predication is not valid.\n");
603 assert(!VCTPs.
empty() &&
"VCTP instruction expected but is not set");
604 assert(
ML.getBlocks().size() == 1 &&
605 "Shouldn't be processing a loop with more than one block");
608 LLVM_DEBUG(
dbgs() <<
"ARM Loops: tail-predication is disabled\n");
612 if (!VPTstate.isValid(
RDI)) {
617 if (!ValidateLiveOuts()) {
626 MachineInstr *VCTP = VCTPs.
back();
627 if (
Start->getOpcode() == ARM::t2DoLoopStartTP ||
628 Start->getOpcode() == ARM::t2WhileLoopStartTP) {
629 TPNumElements =
Start->getOperand(2);
630 StartInsertPt =
Start;
631 StartInsertBB =
Start->getParent();
640 LLVM_DEBUG(
dbgs() <<
"ARM Loops: VCTP operand is defined in the loop.\n");
648 if (StartInsertPt != StartInsertBB->
end() &&
653 ElemDef->removeFromParent();
654 StartInsertBB->
insert(StartInsertPt, ElemDef);
656 <<
"ARM Loops: Moved element count def: " << *ElemDef);
658 StartInsertPt->removeFromParent();
661 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Moved start past: " << *ElemDef);
666 MachineOperand Operand = ElemDef->getOperand(1);
671 TPNumElements = Operand;
672 NumElements = TPNumElements.
getReg();
675 <<
"ARM Loops: Unable to move element count to loop "
676 <<
"start instruction.\n");
686 auto CannotProvideElements = [
this](MachineBasicBlock *
MBB,
687 MCRegister NumElements) {
702 MachineBasicBlock *
MBB = Preheader;
703 while (
MBB &&
MBB != StartInsertBB) {
704 if (CannotProvideElements(
MBB, NumElements)) {
705 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Unable to provide element count.\n");
722 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Instruction blocks [W|D]LSTP\n");
731 for (MachineInstr *
MI : DoubleWidthResultInstrs) {
733 if (InstrVecSize > VCTPVecSize) {
734 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Double width result larger than VCTP "
735 <<
"VecSize:\n" << *
MI);
744 auto IsValidSub = [](MachineInstr *
MI,
int ExpectedVecWidth) {
755 SmallPtrSet<MachineInstr*, 2> ElementChain;
756 SmallPtrSet<MachineInstr*, 2>
Ignore;
759 Ignore.insert_range(VCTPs);
761 if (TryRemove(Def,
RDI, ElementChain,
Ignore)) {
762 bool FoundSub =
false;
764 for (
auto *
MI : ElementChain) {
769 if (FoundSub || !IsValidSub(
MI, ExpectedVectorWidth)) {
770 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Unexpected instruction in element"
776 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Unexpected instruction in element"
781 ToRemove.insert_range(ElementChain);
788 if ((
Start->getOpcode() == ARM::t2DoLoopStartTP ||
789 Start->getOpcode() == ARM::t2WhileLoopStartTP) &&
790 Preheader && !Preheader->
empty() &&
836 switch (
MI.getOpcode()) {
845 case ARM::MVE_VCLZs8:
846 case ARM::MVE_VCLZs16:
847 case ARM::MVE_VCLZs32:
861 InstSet &FalseLanesZero) {
873 Def->getOpcode() == ARM::MVE_VMOVimmi32 &&
874 Def->getOperand(1).getImm() == 0;
878 for (
auto &MO :
MI.operands()) {
896 for (
auto *Def : Defs) {
897 if (Def == &
MI || FalseLanesZero.count(Def) || IsZeroInit(Def))
899 if (MO.
isUse() && isPredicated)
908bool LowOverheadLoop::ValidateLiveOuts() {
930 const TargetRegisterClass *QPRs =
TRI.getRegClass(ARM::MQPRRegClassID);
931 SetVector<MachineInstr *> FalseLanesUnknown;
934 MachineBasicBlock *Header =
ML.getHeader();
938 for (
auto &
MI : *Header) {
946 bool retainsOrReduces =
953 else if (
MI.getNumDefs() == 0)
955 else if (!isPredicated && retainsOrReduces) {
956 LLVM_DEBUG(
dbgs() <<
" Unpredicated instruction that retainsOrReduces: " <<
MI);
958 }
else if (!isPredicated &&
MI.getOpcode() != ARM::MQPRCopy)
963 dbgs() <<
" Predicated:\n";
966 dbgs() <<
" FalseLanesZero:\n";
967 for (
auto *
I : FalseLanesZero)
969 dbgs() <<
" FalseLanesUnknown:\n";
970 for (
auto *
I : FalseLanesUnknown)
974 auto HasPredicatedUsers = [
this](MachineInstr *
MI,
const MachineOperand &MO,
975 SmallPtrSetImpl<MachineInstr *> &
Predicated) {
976 SmallPtrSet<MachineInstr *, 2>
Uses;
978 for (
auto *Use :
Uses) {
991 SmallPtrSet<MachineInstr*, 2> NonPredicated;
992 for (
auto *
MI :
reverse(FalseLanesUnknown)) {
993 for (
auto &MO :
MI->operands()) {
998 <<
TRI.getRegAsmName(MO.getReg()) <<
" at " << *
MI);
1008 SmallPtrSet<MachineInstr *, 2> LiveOutMIs;
1010 ML.getExitBlocks(ExitBlocks);
1011 assert(
ML.getNumBlocks() == 1 &&
"Expected single block loop!");
1012 assert(ExitBlocks.
size() == 1 &&
"Expected a single exit block");
1013 MachineBasicBlock *ExitBB = ExitBlocks.
front();
1014 for (
const MachineBasicBlock::RegisterMaskPair &RegMask : ExitBB->
liveins()) {
1017 if (RegMask.PhysReg == ARM::VPR) {
1023 if (QPRs->
contains(RegMask.PhysReg))
1036 while (!Worklist.empty()) {
1037 MachineInstr *
MI = Worklist.pop_back_val();
1038 if (
MI->getOpcode() == ARM::MQPRCopy) {
1040 MachineInstr *CopySrc =
1043 Worklist.push_back(CopySrc);
1054void LowOverheadLoop::Validate(ARMBasicBlockUtils *BBUtils) {
1060 auto ValidateRanges = [](MachineInstr *
Start, MachineInstr *End,
1061 ARMBasicBlockUtils *BBUtils, MachineLoop &
ML) {
1062 MachineBasicBlock *TgtBB = End->getOpcode() == ARM::t2LoopEnd
1063 ? End->getOperand(1).getMBB()
1064 : End->getOperand(2).getMBB();
1067 if (TgtBB !=
ML.getHeader()) {
1068 LLVM_DEBUG(
dbgs() <<
"ARM Loops: LoopEnd is not targeting header.\n");
1074 if (BBUtils->getOffsetOf(End) < BBUtils->getOffsetOf(
ML.getHeader()) ||
1075 !BBUtils->isBBInRange(End,
ML.getHeader(), 4094)) {
1082 if (BBUtils->getOffsetOf(Start) > BBUtils->getOffsetOf(TargetBB) ||
1083 !BBUtils->isBBInRange(Start, TargetBB, 4094)) {
1084 LLVM_DEBUG(
dbgs() <<
"ARM Loops: WLS offset is out-of-range!\n");
1092 StartInsertBB =
Start->getParent();
1096 Revert = !ValidateRanges(Start, End, BBUtils,
ML);
1097 CannotTailPredicate = !ValidateTailPredicate();
1100bool LowOverheadLoop::AddVCTP(MachineInstr *
MI) {
1102 if (VCTPs.
empty()) {
1109 MachineInstr *Prev = VCTPs.
back();
1112 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Found VCTP with a different reaching "
1113 "definition from the main VCTP");
1126 return FS->getFrameIndex();
1133 switch (
I->getOpcode()) {
1134 case ARM::MVE_VSTRWU32:
1135 case ARM::MVE_VLDRWU32: {
1136 return I->getOperand(1).getReg() == ARM::SP &&
1137 I->memoperands().size() == 1 &&
1138 GetFrameIndex(
I->memoperands().front()) >= 0;
1147 if (
MI->getOpcode() != ARM::MVE_VSTRWU32 || !IsStackOp(
MI))
1153 if (
MI->memoperands().size() == 0)
1155 int FI = GetFrameIndex(
MI->memoperands().front());
1157 auto &FrameInfo =
MI->getParent()->getParent()->getFrameInfo();
1158 if (FI == -1 || !FrameInfo.isSpillSlotObjectIndex(FI))
1162 ML->getExitBlocks(Frontier);
1165 while (Idx < Frontier.
size()) {
1167 bool LookAtSuccessors =
true;
1168 for (
auto &
I : *BB) {
1169 if (!IsStackOp(&
I) ||
I.memoperands().size() == 0)
1171 if (GetFrameIndex(
I.memoperands().front()) != FI)
1175 if (
I.getOpcode() == ARM::MVE_VSTRWU32) {
1176 LookAtSuccessors =
false;
1181 if (
I.getOpcode() == ARM::MVE_VLDRWU32)
1185 if (LookAtSuccessors) {
1198bool LowOverheadLoop::ValidateMVEInst(MachineInstr *
MI) {
1199 if (CannotTailPredicate)
1205 if (
MI->getOpcode() == ARM::MVE_VPSEL ||
1206 MI->getOpcode() == ARM::MVE_VPNOT) {
1224 const MCInstrDesc &MCID =
MI->getDesc();
1226 unsigned LastOpIdx =
MI->getNumOperands() - 1;
1228 const MachineOperand &MO =
MI->getOperand(LastOpIdx -
Op.index());
1233 VPTstate.addInst(
MI);
1235 }
else if (
MI->getOpcode() != ARM::MVE_VPST) {
1244 bool RequiresExplicitPredication =
1247 if (
MI->getOpcode() == ARM::MQPRCopy)
1250 DoubleWidthResultInstrs.insert(
MI);
1255 <<
"ARM Loops: Can't tail predicate: " << *
MI);
1270 VPTstate.resetPredicate(
MI);
1272 VPTstate.addPredicate(
MI);
1278 VPTstate.CreateVPTBlock(
MI);
1283bool ARMLowOverheadLoops::runOnMachineFunction(MachineFunction &mf) {
1291 MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
1292 RDI = &getAnalysis<ReachingDefInfoWrapperPass>().getRDI();
1295 TII =
ST.getInstrInfo();
1296 TRI =
ST.getRegisterInfo();
1297 BBUtils = std::make_unique<ARMBasicBlockUtils>(*MF);
1298 BBUtils->computeAllBlockSizes();
1299 BBUtils->adjustBBOffsetsAfter(&MF->
front());
1302 for (
auto *
ML : *MLI) {
1303 if (
ML->isOutermost())
1310bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *
ML) {
1314 for (MachineLoop *L : *
ML)
1318 dbgs() <<
"ARM Loops: Processing loop containing:\n";
1319 if (
auto *Preheader =
ML->getLoopPreheader())
1321 else if (
auto *Preheader = MLI->findLoopPreheader(
ML,
true,
true))
1323 for (
auto *
MBB :
ML->getBlocks())
1329 std::function<MachineInstr*(MachineBasicBlock*)> SearchForStart =
1330 [&SearchForStart](MachineBasicBlock *
MBB) -> MachineInstr* {
1331 for (
auto &
MI : *
MBB) {
1340 LowOverheadLoop LoLoop(*
ML, *MLI, *
RDI, *
TRI, *
TII);
1344 if (LoLoop.Preheader)
1345 LoLoop.Start = SearchForStart(LoLoop.Preheader);
1353 for (
auto &
MI : *
MBB) {
1354 if (
MI.isDebugValue())
1356 else if (
MI.getOpcode() == ARM::t2LoopDec)
1358 else if (
MI.getOpcode() == ARM::t2LoopEnd)
1360 else if (
MI.getOpcode() == ARM::t2LoopEndDec)
1361 LoLoop.End = LoLoop.Dec = &
MI;
1364 else if (
MI.getDesc().isCall()) {
1368 LoLoop.Revert =
true;
1373 LoLoop.AnalyseMVEInst(&
MI);
1379 if (!LoLoop.FoundAllComponents()) {
1380 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Didn't find loop start, update, end\n");
1384 assert(LoLoop.Start->getOpcode() != ARM::t2WhileLoopStart &&
1385 "Expected t2WhileLoopStart to be removed before regalloc!");
1390 if (LoLoop.Dec != LoLoop.End) {
1391 SmallPtrSet<MachineInstr *, 2>
Uses;
1393 if (
Uses.size() > 1 || !
Uses.count(LoLoop.End)) {
1395 LoLoop.Revert =
true;
1398 LoLoop.Validate(BBUtils.get());
1407void ARMLowOverheadLoops::RevertWhile(MachineInstr *
MI)
const {
1410 unsigned BrOpc = BBUtils->isBBInRange(
MI, DestBB, 254) ?
1411 ARM::tBcc : ARM::t2Bcc;
1416void ARMLowOverheadLoops::RevertDo(MachineInstr *
MI)
const {
1421bool ARMLowOverheadLoops::RevertLoopDec(MachineInstr *
MI)
const {
1423 MachineBasicBlock *
MBB =
MI->getParent();
1424 SmallPtrSet<MachineInstr*, 1>
Ignore;
1426 if (
I->getOpcode() == ARM::t2LoopEnd) {
1441void ARMLowOverheadLoops::RevertLoopEnd(MachineInstr *
MI,
bool SkipCmp)
const {
1444 MachineBasicBlock *DestBB =
MI->getOperand(1).getMBB();
1445 unsigned BrOpc = BBUtils->isBBInRange(
MI, DestBB, 254) ?
1446 ARM::tBcc : ARM::t2Bcc;
1452void ARMLowOverheadLoops::RevertLoopEndDec(MachineInstr *
MI)
const {
1454 assert(
MI->getOpcode() == ARM::t2LoopEndDec &&
"Expected a t2LoopEndDec!");
1455 MachineBasicBlock *
MBB =
MI->getParent();
1457 MachineInstrBuilder MIB =
1460 MIB.
add(
MI->getOperand(1));
1463 MIB.
addReg(ARM::NoRegister);
1467 MachineBasicBlock *DestBB =
MI->getOperand(2).getMBB();
1469 BBUtils->isBBInRange(
MI, DestBB, 254) ? ARM::tBcc : ARM::t2Bcc;
1473 MIB.
add(
MI->getOperand(2));
1477 MI->eraseFromParent();
1504void ARMLowOverheadLoops::IterationCountDCE(LowOverheadLoop &LoLoop) {
1505 if (!LoLoop.IsTailPredicationLegal())
1508 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Trying DCE on loop iteration count.\n");
1512 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Couldn't find iteration count.\n");
1517 SmallPtrSet<MachineInstr*, 4> Killed = { LoLoop.Start, LoLoop.Dec,
1519 if (!TryRemove(Def, *
RDI, LoLoop.ToRemove, Killed))
1520 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Unsafe to remove loop iteration count.\n");
1523MachineInstr* ARMLowOverheadLoops::ExpandLoopStart(LowOverheadLoop &LoLoop) {
1527 IterationCountDCE(LoLoop);
1530 MachineInstr *
Start = LoLoop.Start;
1531 MachineBasicBlock *
MBB = LoLoop.StartInsertBB;
1532 unsigned Opc = LoLoop.getStartOpcode();
1533 MachineOperand &
Count = LoLoop.getLoopStartOperand();
1536 MachineInstr* NewStart;
1538 Count.getReg() == ARM::LR) {
1539 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Didn't insert start: DLS lr, lr");
1542 MachineInstrBuilder MIB =
1554 LoLoop.ToRemove.insert(Start);
1558void ARMLowOverheadLoops::ConvertVPTBlocks(LowOverheadLoop &LoLoop) {
1559 auto RemovePredicate = [](MachineInstr *
MI) {
1560 if (
MI->isDebugInstr())
1564 assert(PIdx >= 1 &&
"Trying to unpredicate a non-predicated instruction");
1566 "Expected Then predicate!");
1568 MI->getOperand(PIdx + 1).setReg(0);
1571 for (
auto &
Block : LoLoop.getVPTBlocks()) {
1572 SmallVectorImpl<MachineInstr *> &Insts =
Block.getInsts();
1574 auto ReplaceVCMPWithVPT = [&](MachineInstr *&TheVCMP, MachineInstr *At) {
1575 assert(TheVCMP &&
"Replacing a removed or non-existent VCMP");
1577 MachineInstrBuilder MIB =
1578 BuildMI(*At->getParent(), At, At->getDebugLoc(),
1587 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Combining with VCMP to VPT: " << *MIB);
1588 LoLoop.BlockMasksToRecompute.insert(MIB.
getInstr());
1589 LoLoop.ToRemove.insert(TheVCMP);
1593 if (LoLoop.VPTstate.isEntryPredicatedOnVCTP(
Block,
true)) {
1594 MachineInstr *VPST = Insts.
front();
1595 if (
Block.hasUniformPredicate()) {
1601 for (
unsigned i = 1; i < Insts.
size(); ++i)
1602 RemovePredicate(Insts[i]);
1611 MachineInstr *Divergent =
Block.getDivergent();
1614 while (DivergentNext !=
MBB->
end() && DivergentNext->isDebugInstr())
1617 bool DivergentNextIsPredicated =
1618 DivergentNext !=
MBB->
end() &&
1623 RemovePredicate(&*
I);
1627 MachineInstr *
VCMP =
1630 if (DivergentNextIsPredicated) {
1637 MachineInstrBuilder MIB =
1642 LoLoop.BlockMasksToRecompute.insert(MIB.
getInstr());
1646 ReplaceVCMPWithVPT(VCMP, VCMP);
1651 LoLoop.ToRemove.insert(VPST);
1652 }
else if (
Block.containsVCTP()) {
1655 MachineInstr *VPST = Insts.
front();
1656 if (
Block.size() == 2) {
1658 "Found a VPST in an otherwise empty vpt block");
1659 LoLoop.ToRemove.insert(VPST);
1661 LoLoop.BlockMasksToRecompute.insert(VPST);
1662 }
else if (Insts.
front()->getOpcode() == ARM::MVE_VPST) {
1666 MachineInstr *VPST = Insts.
front();
1669 "The instruction after a VPST must be predicated");
1673 !LoLoop.ToRemove.contains(VprDef)) {
1674 MachineInstr *
VCMP = VprDef;
1684 ReplaceVCMPWithVPT(VCMP, VPST);
1686 LoLoop.ToRemove.insert(VPST);
1692 LoLoop.ToRemove.insert_range(LoLoop.VCTPs);
1695void ARMLowOverheadLoops::Expand(LowOverheadLoop &LoLoop) {
1698 auto ExpandLoopEnd = [
this](LowOverheadLoop &LoLoop) {
1699 MachineInstr *End = LoLoop.End;
1701 unsigned Opc = LoLoop.IsTailPredicationLegal() ?
1702 ARM::MVE_LETP : ARM::t2LEUpdate;
1706 unsigned Off = LoLoop.Dec == LoLoop.End ? 1 : 0;
1710 LoLoop.ToRemove.insert(LoLoop.Dec);
1711 LoLoop.ToRemove.insert(End);
1720 auto RemoveDeadBranch = [](MachineInstr *
I) {
1721 MachineBasicBlock *BB =
I->getParent();
1723 if (
Terminator->isUnconditionalBranch() &&
I != Terminator) {
1724 MachineBasicBlock *Succ =
Terminator->getOperand(0).getMBB();
1726 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Removing branch: " << *Terminator);
1734 auto ExpandVMOVCopies = [
this](SmallPtrSet<MachineInstr *, 4> &VMOVCopies) {
1735 for (
auto *
MI : VMOVCopies) {
1737 assert(
MI->getOpcode() == ARM::MQPRCopy &&
"Only expected MQPRCOPY!");
1738 MachineBasicBlock *
MBB =
MI->getParent();
1742 ARM::D0 + (Dst - ARM::Q0) * 2)
1743 .
addReg(ARM::D0 + (Src - ARM::Q0) * 2)
1748 ARM::D0 + (Dst - ARM::Q0) * 2 + 1)
1749 .
addReg(ARM::D0 + (Src - ARM::Q0) * 2 + 1)
1753 MI->eraseFromParent();
1757 if (LoLoop.Revert) {
1759 RevertWhile(LoLoop.Start);
1761 RevertDo(LoLoop.Start);
1762 if (LoLoop.Dec == LoLoop.End)
1763 RevertLoopEndDec(LoLoop.End);
1767 ExpandVMOVCopies(LoLoop.VMOVCopies);
1768 LoLoop.Start = ExpandLoopStart(LoLoop);
1770 RemoveDeadBranch(LoLoop.Start);
1771 LoLoop.End = ExpandLoopEnd(LoLoop);
1772 RemoveDeadBranch(LoLoop.End);
1773 if (LoLoop.IsTailPredicationLegal())
1774 ConvertVPTBlocks(LoLoop);
1775 for (
auto *
I : LoLoop.ToRemove) {
1777 I->eraseFromParent();
1779 for (
auto *
I : LoLoop.BlockMasksToRecompute) {
1780 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Recomputing VPT/VPST Block Mask: " << *
I);
1786 PostOrderLoopTraversal DFS(LoLoop.ML, *MLI);
1788 const SmallVectorImpl<MachineBasicBlock*> &PostOrder = DFS.getOrder();
1798bool ARMLowOverheadLoops::RevertNonLoops() {
1799 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Reverting any remaining pseudos...\n");
1802 for (
auto &
MBB : *MF) {
1803 SmallVector<MachineInstr*, 4> Starts;
1804 SmallVector<MachineInstr*, 4> Decs;
1805 SmallVector<MachineInstr*, 4> Ends;
1806 SmallVector<MachineInstr *, 4> EndDecs;
1807 SmallVector<MachineInstr *, 4> MQPRCopies;
1809 for (
auto &
I :
MBB) {
1812 else if (
I.getOpcode() == ARM::t2LoopDec)
1814 else if (
I.getOpcode() == ARM::t2LoopEnd)
1816 else if (
I.getOpcode() == ARM::t2LoopEndDec)
1818 else if (
I.getOpcode() == ARM::MQPRCopy)
1828 for (
auto *Start : Starts) {
1834 for (
auto *Dec : Decs)
1837 for (
auto *End : Ends)
1839 for (
auto *End : EndDecs)
1840 RevertLoopEndDec(End);
1841 for (
auto *
MI : MQPRCopies) {
1843 assert(
MI->getOpcode() == ARM::MQPRCopy &&
"Only expected MQPRCOPY!");
1844 MachineBasicBlock *
MBB =
MI->getParent();
1846 MI->getOperand(0).getReg())
1847 .
add(
MI->getOperand(1))
1848 .
add(
MI->getOperand(1));
1850 MI->eraseFromParent();
1857 return new ARMLowOverheadLoops();
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static int getVecSize(const AMDGPULibFunc &FInfo)
static bool isDomainMVE(MachineInstr *MI)
static bool isVectorPredicated(MachineInstr *MI)
static bool canGenerateNonZeros(const MachineInstr &MI)
static bool isHorizontalReduction(const MachineInstr &MI)
static bool producesDoubleWidthResult(const MachineInstr &MI)
static bool hasVPRUse(MachineInstr &MI)
static bool isRegInClass(const MachineOperand &MO, const TargetRegisterClass *Class)
static bool ValidateMVEStore(MachineInstr *MI, MachineLoop *ML)
static bool isVectorPredicate(MachineInstr *MI)
static bool retainsPreviousHalfElement(const MachineInstr &MI)
static bool shouldInspect(MachineInstr &MI)
static bool producesFalseLanesZero(MachineInstr &MI, const TargetRegisterClass *QPRs, const ReachingDefInfo &RDI, InstSet &FalseLanesZero)
static cl::opt< bool > DisableTailPredication("arm-loloops-disable-tailpred", cl::Hidden, cl::desc("Disable tail-predication in the ARM LowOverheadLoop pass"), cl::init(false))
ReachingDefInfo InstSet InstSet & Ignore
if(!RDI.isSafeToRemove(MI, Uses, Ignore)) return false
static int getVecSize(const MachineInstr &MI)
#define ARM_LOW_OVERHEAD_LOOPS_NAME
static cl::opt< bool > DisableOmitDLS("arm-disable-omit-dls", cl::Hidden, cl::desc("Disable omitting 'dls lr, lr' instructions"), cl::init(false))
ReachingDefInfo InstSet & ToRemove
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
static const Function * getParent(const Value *V)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
const HexagonInstrInfo * TII
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
static ARM::PredBlockMask CreateVPTBlock(MachineBasicBlock::instr_iterator &Iter, MachineBasicBlock::instr_iterator EndIter, SmallVectorImpl< MachineInstr * > &DeadInstructions)
Register const TargetRegisterInfo * TRI
Promote Memory to Register
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Remove Loads Into Fake Uses
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
This file implements a set that has insertion order iteration characteristics.
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
FunctionPass class - This class is used to implement most global optimizations.
Describe properties that are true of each instruction in the target description file.
ArrayRef< MCOperandInfo > operands() const
static MCRegister from(unsigned Val)
Check the provided unsigned value is a valid MCRegister.
unsigned pred_size() const
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator_range< livein_iterator > liveins() const
MachineInstr & instr_back()
pred_iterator pred_begin()
LLVM_ABI bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
iterator_range< succ_iterator > successors()
iterator insertAfter(iterator I, MachineInstr *MI)
Insert MI into the instruction list after I.
MachineInstrBundleIterator< MachineInstr > iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineFunctionProperties & getProperties() const
Get the function properties.
const MachineBasicBlock & front() const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
MachineOperand class - Representation of each machine instruction operand.
LLVM_ABI unsigned getOperandNo() const
Returns the index of this operand in the instruction that it belongs to.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
LLVM_ABI void setIsDef(bool Val=true)
Change a def to a use, or a use to a def.
Special value supplied for machine level alias analysis.
This class provides the reaching def analysis.
MachineInstr * getUniqueReachingMIDef(MachineInstr *MI, Register Reg) const
If a single MachineInstr creates the reaching definition, then return it.
bool isReachingDefLiveOut(MachineInstr *MI, Register Reg) const
Return whether the reaching def for MI also is live out of its parent block.
bool isSafeToMoveForwards(MachineInstr *From, MachineInstr *To) const
Return whether From can be moved forwards to just before To.
void getReachingLocalUses(MachineInstr *MI, Register Reg, InstSet &Uses) const
Provides the uses, in the same block as MI, of register that MI defines.
bool hasLocalDefBefore(MachineInstr *MI, Register Reg) const
Provide whether the register has been defined in the same basic block as, and before,...
void reset()
Re-run the analysis.
void getGlobalUses(MachineInstr *MI, Register Reg, InstSet &Uses) const
Collect the users of the value stored in Reg, which is defined by MI.
MachineInstr * getMIOperand(MachineInstr *MI, unsigned Idx) const
If a single MachineInstr creates the reaching definition, for MIs operand at Idx, then return it.
bool isSafeToMoveBackwards(MachineInstr *From, MachineInstr *To) const
Return whether From can be moved backwards to just after To.
bool hasSameReachingDef(MachineInstr *A, MachineInstr *B, Register Reg) const
Return whether A and B use the same def of Reg.
void getGlobalReachingDefs(MachineInstr *MI, Register Reg, InstSet &Defs) const
Collect all possible definitions of the value stored in Reg, which is used by MI.
MachineInstr * getLocalLiveOutMIDef(MachineBasicBlock *MBB, Register Reg) const
Return the local MI that produces the live out value for Reg, or nullptr for a non-live out or non-lo...
bool isSafeToDefRegAt(MachineInstr *MI, Register Reg) const
Return whether a MachineInstr could be inserted at MI and safely define the given register without af...
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
size_type size() const
Determine the number of elements in the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
bool contains(const key_type &key) const
Check if the SetVector contains the given key.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
void insert_range(Range &&R)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
A Use represents the edge between a Value definition and its users.
@ ValidForTailPredication
@ RetainsPreviousHalfElement
bool isPredicated(const MCInst &MI, const MCInstrInfo *MCII)
bool isVpred(OperandType op)
initializer< Ty > init(const Ty &Val)
NodeAddr< DefNode * > Def
This is an optimization pass for GlobalISel generic memory operations.
static bool isDoLoopStart(const MachineInstr &MI)
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
int findFirstVPTPredOperandIdx(const MachineInstr &MI)
ARMVCC::VPTCodes getVPTInstrPredicate(const MachineInstr &MI, Register &PredReg)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
static bool isVCTP(const MachineInstr *MI)
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
constexpr from_range_t from_range
static bool isVPTOpcode(int Opc)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
static unsigned getTailPredVectorWidth(unsigned Opcode)
static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)
Get the operands corresponding to the given Pred value.
FunctionPass * createARMLowOverheadLoopsPass()
static bool isMovRegOpcode(int Opc)
static bool isSubImmOpcode(int Opc)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
FunctionAddr VTableAddr Count
static bool isLoopStart(const MachineInstr &MI)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
void RevertWhileLoopStartLR(MachineInstr *MI, const TargetInstrInfo *TII, unsigned BrOpc=ARM::t2Bcc, bool UseCmp=false)
void recomputeLivenessFlags(MachineBasicBlock &MBB)
Recomputes dead and kill flags in MBB.
static unsigned VCTPOpcodeToLSTP(unsigned Opcode, bool IsDoLoop)
void addUnpredicatedMveVpredROp(MachineInstrBuilder &MIB, Register DestReg)
FunctionAddr VTableAddr Next
DWARFExpression::Operation Op
void RevertLoopEnd(MachineInstr *MI, const TargetInstrInfo *TII, unsigned BrOpc=ARM::t2Bcc, bool SkipCmp=false)
void RevertLoopDec(MachineInstr *MI, const TargetInstrInfo *TII, bool SetFlags=false)
MachineBasicBlock * getWhileLoopStartTargetBB(const MachineInstr &MI)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
static bool isWhileLoopStart(const MachineInstr &MI)
static unsigned VCMPOpcodeToVPT(unsigned Opcode)
void RevertDoLoopStart(MachineInstr *MI, const TargetInstrInfo *TII)
int getAddSubImmediate(MachineInstr &MI)
void recomputeVPTBlockMask(MachineInstr &Instr)
void fullyRecomputeLiveIns(ArrayRef< MachineBasicBlock * > MBBs)
Convenience function for recomputing live-in's for a set of MBBs until the computation converges.
LLVM_ABI Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.