41#define DEBUG_TYPE "aarch64-ccmp"
47 cl::desc(
"Maximum number of instructions per speculated block."));
53STATISTIC(NumConsidered,
"Number of ccmps considered");
54STATISTIC(NumPhiRejs,
"Number of ccmps rejected (PHI)");
55STATISTIC(NumPhysRejs,
"Number of ccmps rejected (Physregs)");
56STATISTIC(NumPhi2Rejs,
"Number of ccmps rejected (PHI2)");
57STATISTIC(NumHeadBranchRejs,
"Number of ccmps rejected (Head branch)");
58STATISTIC(NumCmpBranchRejs,
"Number of ccmps rejected (CmpBB branch)");
59STATISTIC(NumCmpTermRejs,
"Number of ccmps rejected (CmpBB is cbz...)");
60STATISTIC(NumImmRangeRejs,
"Number of ccmps rejected (Imm out of range)");
61STATISTIC(NumLiveDstRejs,
"Number of ccmps rejected (Cmp dest live)");
62STATISTIC(NumMultNZCVUses,
"Number of ccmps rejected (NZCV used)");
63STATISTIC(NumUnknNZCVDefs,
"Number of ccmps rejected (NZCV def unknown)");
65STATISTIC(NumSpeculateRejs,
"Number of ccmps rejected (Can't speculate)");
67STATISTIC(NumConverted,
"Number of ccmp instructions created");
68STATISTIC(NumCompBranches,
"Number of cbz/cbnz branches converted");
171 bool trivialTailPHIs();
174 void updateTailPHIs();
177 bool isDeadDef(
unsigned DstReg);
200 bool canConvert(MachineBasicBlock *
MBB);
204 void convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks);
208 int expectedCodeSizeDelta()
const;
214bool SSACCmpConv::trivialTailPHIs() {
215 for (
auto &
I : *
Tail) {
218 unsigned HeadReg = 0, CmpBBReg = 0;
220 for (
unsigned oi = 1, oe =
I.getNumOperands(); oi != oe; oi += 2) {
221 MachineBasicBlock *
MBB =
I.getOperand(oi + 1).getMBB();
224 assert((!HeadReg || HeadReg ==
Reg) &&
"Inconsistent PHI operands");
228 assert((!CmpBBReg || CmpBBReg ==
Reg) &&
"Inconsistent PHI operands");
232 if (HeadReg != CmpBBReg)
240void SSACCmpConv::updateTailPHIs() {
241 for (
auto &
I : *
Tail) {
245 for (
unsigned oi =
I.getNumOperands(); oi > 2; oi -= 2) {
247 if (
I.getOperand(oi - 1).getMBB() == CmpBB) {
248 I.removeOperand(oi - 1);
249 I.removeOperand(oi - 2);
257bool SSACCmpConv::isDeadDef(
unsigned DstReg) {
259 if (DstReg == AArch64::WZR || DstReg == AArch64::XZR)
261 if (!Register::isVirtualRegister(DstReg))
265 return MRI->use_nodbg_empty(DstReg);
274 assert(
Cond.size() == 1 &&
"Unknown Cond array format");
286 assert(
Cond.size() == 3 &&
"Unknown Cond array format");
291 assert(
Cond.size() == 3 &&
"Unknown Cond array format");
297MachineInstr *SSACCmpConv::findConvertibleCompare(MachineBasicBlock *
MBB) {
302 if (!
I->readsRegister(AArch64::NZCV,
nullptr)) {
303 switch (
I->getOpcode()) {
319 assert(!
I->isTerminator() &&
"Spurious terminator");
320 switch (
I->getOpcode()) {
322 case AArch64::SUBSWri:
323 case AArch64::SUBSXri:
325 case AArch64::ADDSWri:
326 case AArch64::ADDSXri:
329 if (
I->getOperand(3).getImm() || !
isUInt<5>(
I->getOperand(2).getImm())) {
335 case AArch64::SUBSWrr:
336 case AArch64::SUBSXrr:
337 case AArch64::ADDSWrr:
338 case AArch64::ADDSXrr:
339 if (isDeadDef(
I->getOperand(0).getReg()))
341 LLVM_DEBUG(
dbgs() <<
"Can't convert compare with live destination: "
345 case AArch64::FCMPSrr:
346 case AArch64::FCMPDrr:
347 case AArch64::FCMPESrr:
348 case AArch64::FCMPEDrr:
380bool SSACCmpConv::canSpeculateInstrs(MachineBasicBlock *
MBB,
381 const MachineInstr *CmpMI) {
394 if (
I.isDebugInstr())
418 bool DontMoveAcrossStore =
true;
419 if (!
I.isSafeToMove(DontMoveAcrossStore)) {
425 if (&
I != CmpMI &&
I.modifiesRegister(AArch64::NZCV,
TRI)) {
436bool SSACCmpConv::canConvert(MachineBasicBlock *
MBB) {
438 Tail = CmpBB =
nullptr;
442 MachineBasicBlock *Succ0 = Head->
succ_begin()[0];
443 MachineBasicBlock *Succ1 = Head->
succ_begin()[1];
471 if (!trivialTailPHIs()) {
477 if (!
Tail->livein_empty()) {
492 LLVM_DEBUG(
dbgs() <<
"Can't handle live-in physregs in CmpBB.\n");
499 MachineBasicBlock *
TBB =
nullptr, *FBB =
nullptr;
510 dbgs() <<
"analyzeBranch didn't find conditional branch in Head.\n");
537 dbgs() <<
"analyzeBranch didn't find conditional branch in CmpBB.\n");
542 if (!
parseCond(CmpBBCond, CmpBBTailCC)) {
553 <<
", CmpBB->Tail on "
556 CmpMI = findConvertibleCompare(CmpBB);
560 if (!canSpeculateInstrs(CmpBB, CmpMI)) {
567void SSACCmpConv::convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks) {
596 Head2Tail + Head2CmpBB * CmpBB2Tail);
615 if (HeadCond[0].
getImm() == -1) {
618 switch (HeadCond[1].
getImm()) {
621 Opc = AArch64::SUBSWri;
625 Opc = AArch64::SUBSXri;
630 const MCInstrDesc &MCID =
TII->get(
Opc);
633 MRI->createVirtualRegister(
TII->getRegClass(MCID, 0,
TRI));
641 MRI->constrainRegClass(HeadCond[2].
getReg(),
642 TII->getRegClass(MCID, 1,
TRI));
650 unsigned FirstOp = 1;
651 bool isZBranch =
false;
655 case AArch64::SUBSWri:
Opc = AArch64::CCMPWi;
break;
656 case AArch64::SUBSWrr:
Opc = AArch64::CCMPWr;
break;
657 case AArch64::SUBSXri:
Opc = AArch64::CCMPXi;
break;
658 case AArch64::SUBSXrr:
Opc = AArch64::CCMPXr;
break;
659 case AArch64::ADDSWri:
Opc = AArch64::CCMNWi;
break;
660 case AArch64::ADDSWrr:
Opc = AArch64::CCMNWr;
break;
661 case AArch64::ADDSXri:
Opc = AArch64::CCMNXi;
break;
662 case AArch64::ADDSXrr:
Opc = AArch64::CCMNXr;
break;
663 case AArch64::FCMPSrr:
Opc = AArch64::FCCMPSrr; FirstOp = 0;
break;
664 case AArch64::FCMPDrr:
Opc = AArch64::FCCMPDrr; FirstOp = 0;
break;
665 case AArch64::FCMPESrr:
Opc = AArch64::FCCMPESrr; FirstOp = 0;
break;
666 case AArch64::FCMPEDrr:
Opc = AArch64::FCCMPEDrr; FirstOp = 0;
break;
669 Opc = AArch64::CCMPWi;
675 Opc = AArch64::CCMPXi;
687 const MCInstrDesc &MCID =
TII->get(
Opc);
689 TII->getRegClass(MCID, 0,
TRI));
692 TII->getRegClass(MCID, 1,
TRI));
704 bool isNZ = CmpMI->
getOpcode() == AArch64::CBNZW ||
718int SSACCmpConv::expectedCodeSizeDelta()
const {
723 if (HeadCond[0].
getImm() == -1) {
724 switch (HeadCond[1].
getImm()) {
758class AArch64ConditionalCompares :
public MachineFunctionPass {
759 const MachineBranchProbabilityInfo *MBPI;
760 const TargetInstrInfo *
TII;
761 const TargetRegisterInfo *
TRI;
762 MCSchedModel SchedModel;
765 MachineRegisterInfo *
MRI;
766 MachineDominatorTree *DomTree;
767 MachineLoopInfo *
Loops;
768 MachineTraceMetrics *Traces;
774 AArch64ConditionalCompares() : MachineFunctionPass(
ID) {}
775 void getAnalysisUsage(AnalysisUsage &AU)
const override;
776 bool runOnMachineFunction(MachineFunction &MF)
override;
777 StringRef getPassName()
const override {
778 return "AArch64 Conditional Compares";
782 bool tryConvert(MachineBasicBlock *);
785 void invalidateTraces();
790char AArch64ConditionalCompares::ID = 0;
793 "AArch64 CCMP Pass",
false,
false)
801 return new AArch64ConditionalCompares();
804void AArch64ConditionalCompares::getAnalysisUsage(
AnalysisUsage &AU)
const {
816void AArch64ConditionalCompares::updateDomTree(
821 for (MachineBasicBlock *RemovedMBB : Removed) {
823 assert(Node != HeadNode &&
"Cannot erase the head node");
824 assert(
Node->getIDom() == HeadNode &&
"CmpBB should be dominated by Head");
825 while (
Node->getNumChildren())
836 for (MachineBasicBlock *RemovedMBB : Removed)
837 Loops->removeBlock(RemovedMBB);
841void AArch64ConditionalCompares::invalidateTraces() {
849bool AArch64ConditionalCompares::shouldConvert() {
854 MinInstr = Traces->
getEnsemble(MachineTraceStrategy::TS_MinInstrCount);
861 int CodeSizeDelta = CmpConv.expectedCodeSizeDelta();
862 LLVM_DEBUG(
dbgs() <<
"Code size delta: " << CodeSizeDelta <<
'\n');
865 if (CodeSizeDelta < 0)
867 if (CodeSizeDelta > 0) {
868 LLVM_DEBUG(
dbgs() <<
"Code size is increasing, give up on this one.\n");
885 unsigned CmpBBDepth =
888 <<
"\nCmpBB depth: " << CmpBBDepth <<
'\n');
889 if (CmpBBDepth > HeadDepth + DelayLimit) {
890 LLVM_DEBUG(
dbgs() <<
"Branch delay would be larger than " << DelayLimit
903 if (ResDepth > HeadDepth) {
910bool AArch64ConditionalCompares::tryConvert(MachineBasicBlock *
MBB) {
914 SmallVector<MachineBasicBlock *, 4> RemovedBlocks;
915 CmpConv.convert(RemovedBlocks);
917 updateDomTree(RemovedBlocks);
918 for (MachineBasicBlock *
MBB : RemovedBlocks)
920 updateLoops(RemovedBlocks);
925bool AArch64ConditionalCompares::runOnMachineFunction(MachineFunction &MF) {
926 LLVM_DEBUG(
dbgs() <<
"********** AArch64 Conditional Compares **********\n"
927 <<
"********** Function: " << MF.
getName() <<
'\n');
935 DomTree = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
936 Loops = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
937 MBPI = &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI();
938 Traces = &getAnalysis<MachineTraceMetricsWrapperPass>().getMTM();
943 CmpConv.runOnMachineFunction(MF, MBPI);
951 if (tryConvert(
I->getBlock()))
unsigned const MachineRegisterInfo * MRI
static cl::opt< bool > Stress("aarch64-stress-ccmp", cl::Hidden, cl::desc("Turn all knobs to 11"))
static cl::opt< unsigned > BlockInstrLimit("aarch64-ccmp-limit", cl::init(30), cl::Hidden, cl::desc("Maximum number of instructions per speculated block."))
static bool parseCond(ArrayRef< MachineOperand > Cond, AArch64CC::CondCode &CC)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static unsigned InstrCount
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
static cl::opt< bool > Stress("stress-early-ifcvt", cl::Hidden, cl::desc("Turn all knobs to 11"))
static cl::opt< unsigned > BlockInstrLimit("early-ifcvt-limit", cl::init(30), cl::Hidden, cl::desc("Maximum number of instructions per speculated block."))
const HexagonInstrInfo * TII
Register const TargetRegisterInfo * TRI
Promote Memory to Register
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
void eraseNode(NodeT *BB)
eraseNode - Removes a node from the dominator tree.
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
FunctionPass class - This class is used to implement most global optimizations.
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
Remove the branching code at the end of the specific MBB.
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget's CPU.
unsigned pred_size() const
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI void updateTerminator(MachineBasicBlock *PreviousLayoutSuccessor)
Update the terminator instructions in block to account for changes to block layout which may have bee...
LLVM_ABI void setSuccProbability(succ_iterator I, BranchProbability Prob)
Set successor probability of a given iterator.
succ_iterator succ_begin()
bool livein_empty() const
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
unsigned succ_size() const
LLVM_ABI void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
bool hasSuccessorProbabilities() const
Return true if any of the successors have probabilities attached to them.
LLVM_ABI void eraseFromParent()
This method unlinks 'this' from the containing function and deletes it.
LLVM_ABI bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
BranchProbability getEdgeProbability(const MachineBasicBlock *Src, const MachineBasicBlock *Dst) const
Analysis pass which computes a MachineDominatorTree.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Trace getTrace(const MachineBasicBlock *MBB)
Get the trace that passes through MBB.
InstrCycles getInstrCycles(const MachineInstr &MI) const
Return the depth and height of MI.
unsigned getResourceDepth(bool Bottom) const
Return the resource depth of the top/bottom of the trace center block.
Ensemble * getEnsemble(MachineTraceStrategy)
Get the trace ensemble representing the given trace selection strategy.
void invalidate(const MachineBasicBlock *MBB)
Invalidate cached information about MBB.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
TargetInstrInfo - Interface to description of machine instruction set.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static const char * getCondCodeName(CondCode Code)
static CondCode getInvertedCondCode(CondCode Code)
static unsigned getNZCVToSatisfyCondCode(CondCode Code)
Given a condition code, return NZCV flags that would satisfy that condition.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
@ Define
Register definition.
initializer< Ty > init(const Ty &Val)
NodeAddr< NodeBase * > Node
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI PhysRegInfo AnalyzePhysRegInBundle(const MachineInstr &MI, Register Reg, const TargetRegisterInfo *TRI)
AnalyzePhysRegInBundle - Analyze how the current instruction or bundle uses a physical register.
FunctionPass * createAArch64ConditionalCompares()
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
DomTreeNodeBase< MachineBasicBlock > MachineDomTreeNode
ArrayRef(const T &OneElt) -> ArrayRef< T >
iterator_range< df_iterator< T > > depth_first(const T &G)
IterT prev_nodbg(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It, then continue decrementing it while it points to a debug instruction.
LLVM_ABI Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
unsigned MispredictPenalty
unsigned Depth
Earliest issue cycle as determined by data dependencies and instruction latencies from the beginning ...
bool Read
Reg or one of its aliases is read.
bool Defined
Reg or one of its aliases is defined.
bool Clobbered
There is a regmask operand indicating Reg is clobbered.