Thanks to visit codestin.com
Credit goes to llvm.org

LLVM 22.0.0git
GCNILPSched.cpp
Go to the documentation of this file.
1//===---------------------------- GCNILPSched.cpp - -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10//
11//===----------------------------------------------------------------------===//
12
14
15using namespace llvm;
16
17#define DEBUG_TYPE "machine-scheduler"
18
19namespace {
20
21class GCNILPScheduler {
22 struct Candidate : ilist_node<Candidate> {
23 SUnit *SU;
24
25 Candidate(SUnit *SU_)
26 : SU(SU_) {}
27 };
28
30 using Queue = simple_ilist<Candidate>;
31 Queue PendingQueue;
32 Queue AvailQueue;
33 unsigned CurQueueId = 0;
34
35 std::vector<unsigned> SUNumbers;
36
37 /// CurCycle - The current scheduler state corresponds to this cycle.
38 unsigned CurCycle = 0;
39
40 unsigned getNodePriority(const SUnit *SU) const;
41
42 const SUnit *pickBest(const SUnit *left, const SUnit *right);
43 Candidate* pickCandidate();
44
45 void releasePending();
46 void advanceToCycle(unsigned NextCycle);
47 void releasePredecessors(const SUnit* SU);
48
49public:
50 std::vector<const SUnit*> schedule(ArrayRef<const SUnit*> TopRoots,
51 const ScheduleDAG &DAG);
52};
53} // namespace
54
55/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
56/// Smaller number is the higher priority.
57static unsigned
58CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
59 unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum];
60 if (SethiUllmanNumber != 0)
61 return SethiUllmanNumber;
62
63 unsigned Extra = 0;
64 for (const SDep &Pred : SU->Preds) {
65 if (Pred.isCtrl()) continue; // ignore chain preds
66 SUnit *PredSU = Pred.getSUnit();
67 unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers);
68 if (PredSethiUllman > SethiUllmanNumber) {
69 SethiUllmanNumber = PredSethiUllman;
70 Extra = 0;
71 }
72 else if (PredSethiUllman == SethiUllmanNumber)
73 ++Extra;
74 }
75
76 SethiUllmanNumber += Extra;
77
78 if (SethiUllmanNumber == 0)
79 SethiUllmanNumber = 1;
80
81 return SethiUllmanNumber;
82}
83
84// Lower priority means schedule further down. For bottom-up scheduling, lower
85// priority SUs are scheduled before higher priority SUs.
86unsigned GCNILPScheduler::getNodePriority(const SUnit *SU) const {
87 assert(SU->NodeNum < SUNumbers.size());
88 if (SU->NumSuccs == 0 && SU->NumPreds != 0)
89 // If SU does not have a register use, i.e. it doesn't produce a value
90 // that would be consumed (e.g. store), then it terminates a chain of
91 // computation. Give it a large SethiUllman number so it will be
92 // scheduled right before its predecessors that it doesn't lengthen
93 // their live ranges.
94 return 0xffff;
95
96 if (SU->NumPreds == 0 && SU->NumSuccs != 0)
97 // If SU does not have a register def, schedule it close to its uses
98 // because it does not lengthen any live ranges.
99 return 0;
100
101 return SUNumbers[SU->NodeNum];
102}
103
104/// closestSucc - Returns the scheduled cycle of the successor which is
105/// closest to the current cycle.
106static unsigned closestSucc(const SUnit *SU) {
107 unsigned MaxHeight = 0;
108 for (const SDep &Succ : SU->Succs) {
109 if (Succ.isCtrl()) continue; // ignore chain succs
110 unsigned Height = Succ.getSUnit()->getHeight();
111 // If there are bunch of CopyToRegs stacked up, they should be considered
112 // to be at the same position.
113 if (Height > MaxHeight)
114 MaxHeight = Height;
115 }
116 return MaxHeight;
117}
118
119/// calcMaxScratches - Returns an cost estimate of the worse case requirement
120/// for scratch registers, i.e. number of data dependencies.
121static unsigned calcMaxScratches(const SUnit *SU) {
122 unsigned Scratches = 0;
123 for (const SDep &Pred : SU->Preds) {
124 if (Pred.isCtrl()) continue; // ignore chain preds
125 Scratches++;
126 }
127 return Scratches;
128}
129
130// Return -1 if left has higher priority, 1 if right has higher priority.
131// Return 0 if latency-based priority is equivalent.
132static int BUCompareLatency(const SUnit *left, const SUnit *right) {
133 // Scheduling an instruction that uses a VReg whose postincrement has not yet
134 // been scheduled will induce a copy. Model this as an extra cycle of latency.
135 int LHeight = (int)left->getHeight();
136 int RHeight = (int)right->getHeight();
137
138 // If either node is scheduling for latency, sort them by height/depth
139 // and latency.
140
141 // If neither instruction stalls (!LStall && !RStall) and HazardRecognizer
142 // is enabled, grouping instructions by cycle, then its height is already
143 // covered so only its depth matters. We also reach this point if both stall
144 // but have the same height.
145 if (LHeight != RHeight)
146 return LHeight > RHeight ? 1 : -1;
147
148 int LDepth = left->getDepth();
149 int RDepth = right->getDepth();
150 if (LDepth != RDepth) {
151 LLVM_DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum
152 << ") depth " << LDepth << " vs SU (" << right->NodeNum
153 << ") depth " << RDepth << "\n");
154 return LDepth < RDepth ? 1 : -1;
155 }
156 if (left->Latency != right->Latency)
157 return left->Latency > right->Latency ? 1 : -1;
158
159 return 0;
160}
161
162const SUnit *GCNILPScheduler::pickBest(const SUnit *left, const SUnit *right)
163{
164 // TODO: add register pressure lowering checks
165
166 bool const DisableSchedCriticalPath = false;
167 int MaxReorderWindow = 6;
169 int spread = (int)left->getDepth() - (int)right->getDepth();
170 if (std::abs(spread) > MaxReorderWindow) {
171 LLVM_DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
172 << left->getDepth() << " != SU(" << right->NodeNum
173 << "): " << right->getDepth() << "\n");
174 return left->getDepth() < right->getDepth() ? right : left;
175 }
176 }
177
178 bool const DisableSchedHeight = false;
179 if (!DisableSchedHeight && left->getHeight() != right->getHeight()) {
180 int spread = (int)left->getHeight() - (int)right->getHeight();
181 if (std::abs(spread) > MaxReorderWindow)
182 return left->getHeight() > right->getHeight() ? right : left;
183 }
184
185 // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down.
186 unsigned LPriority = getNodePriority(left);
187 unsigned RPriority = getNodePriority(right);
188
189 if (LPriority != RPriority)
190 return LPriority > RPriority ? right : left;
191
192 // Try schedule def + use closer when Sethi-Ullman numbers are the same.
193 // e.g.
194 // t1 = op t2, c1
195 // t3 = op t4, c2
196 //
197 // and the following instructions are both ready.
198 // t2 = op c3
199 // t4 = op c4
200 //
201 // Then schedule t2 = op first.
202 // i.e.
203 // t4 = op c4
204 // t2 = op c3
205 // t1 = op t2, c1
206 // t3 = op t4, c2
207 //
208 // This creates more short live intervals.
209 unsigned LDist = closestSucc(left);
210 unsigned RDist = closestSucc(right);
211 if (LDist != RDist)
212 return LDist < RDist ? right : left;
213
214 // How many registers becomes live when the node is scheduled.
215 unsigned LScratch = calcMaxScratches(left);
216 unsigned RScratch = calcMaxScratches(right);
217 if (LScratch != RScratch)
218 return LScratch > RScratch ? right : left;
219
220 bool const DisableSchedCycles = false;
221 if (!DisableSchedCycles) {
222 int result = BUCompareLatency(left, right);
223 if (result != 0)
224 return result > 0 ? right : left;
225 return left;
226 }
227 if (left->getHeight() != right->getHeight())
228 return (left->getHeight() > right->getHeight()) ? right : left;
229
230 if (left->getDepth() != right->getDepth())
231 return (left->getDepth() < right->getDepth()) ? right : left;
232
233 assert(left->NodeQueueId && right->NodeQueueId &&
234 "NodeQueueId cannot be zero");
235 return (left->NodeQueueId > right->NodeQueueId) ? right : left;
236}
237
238GCNILPScheduler::Candidate* GCNILPScheduler::pickCandidate() {
239 if (AvailQueue.empty())
240 return nullptr;
241 auto Best = AvailQueue.begin();
242 for (auto I = std::next(AvailQueue.begin()), E = AvailQueue.end(); I != E; ++I) {
243 const auto *NewBestSU = pickBest(Best->SU, I->SU);
244 if (NewBestSU != Best->SU) {
245 assert(NewBestSU == I->SU);
246 Best = I;
247 }
248 }
249 return &*Best;
250}
251
252void GCNILPScheduler::releasePending() {
253 // Check to see if any of the pending instructions are ready to issue. If
254 // so, add them to the available queue.
255 for(auto I = PendingQueue.begin(), E = PendingQueue.end(); I != E;) {
256 auto &C = *I++;
257 if (C.SU->getHeight() <= CurCycle) {
258 PendingQueue.remove(C);
259 AvailQueue.push_back(C);
260 C.SU->NodeQueueId = CurQueueId++;
261 }
262 }
263}
264
265/// Move the scheduler state forward by the specified number of Cycles.
266void GCNILPScheduler::advanceToCycle(unsigned NextCycle) {
267 if (NextCycle <= CurCycle)
268 return;
269 CurCycle = NextCycle;
270 releasePending();
271}
272
273void GCNILPScheduler::releasePredecessors(const SUnit* SU) {
274 for (const auto &PredEdge : SU->Preds) {
275 auto *PredSU = PredEdge.getSUnit();
276 if (PredEdge.isWeak())
277 continue;
278 assert(PredSU->isBoundaryNode() || PredSU->NumSuccsLeft > 0);
279
280 PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge.getLatency());
281
282 if (!PredSU->isBoundaryNode() && --PredSU->NumSuccsLeft == 0)
283 PendingQueue.push_front(*new (Alloc.Allocate()) Candidate(PredSU));
284 }
285}
286
287std::vector<const SUnit*>
288GCNILPScheduler::schedule(ArrayRef<const SUnit*> BotRoots,
289 const ScheduleDAG &DAG) {
290 auto &SUnits = const_cast<ScheduleDAG&>(DAG).SUnits;
291
292 std::vector<SUnit> SUSavedCopy;
293 SUSavedCopy.resize(SUnits.size());
294
295 // we cannot save only those fields we touch: some of them are private
296 // so save units verbatim: this assumes SUnit should have value semantics
297 for (const SUnit &SU : SUnits)
298 SUSavedCopy[SU.NodeNum] = SU;
299
300 SUNumbers.assign(SUnits.size(), 0);
301 for (const SUnit &SU : SUnits)
302 CalcNodeSethiUllmanNumber(&SU, SUNumbers);
303
304 for (const auto *SU : BotRoots) {
305 AvailQueue.push_back(
306 *new (Alloc.Allocate()) Candidate(const_cast<SUnit*>(SU)));
307 }
308 releasePredecessors(&DAG.ExitSU);
309
310 std::vector<const SUnit*> Schedule;
311 Schedule.reserve(SUnits.size());
312 while (true) {
313 if (AvailQueue.empty() && !PendingQueue.empty()) {
314 auto *EarliestSU =
315 llvm::min_element(PendingQueue, [=](const Candidate &C1,
316 const Candidate &C2) {
317 return C1.SU->getHeight() < C2.SU->getHeight();
318 })->SU;
319 advanceToCycle(std::max(CurCycle + 1, EarliestSU->getHeight()));
320 }
321 if (AvailQueue.empty())
322 break;
323
324 LLVM_DEBUG(dbgs() << "\n=== Picking candidate\n"
325 "Ready queue:";
326 for (auto &C
327 : AvailQueue) dbgs()
328 << ' ' << C.SU->NodeNum;
329 dbgs() << '\n';);
330
331 auto *C = pickCandidate();
332 assert(C);
333 AvailQueue.remove(*C);
334 auto *SU = C->SU;
335 LLVM_DEBUG(dbgs() << "Selected "; DAG.dumpNode(*SU));
336
337 advanceToCycle(SU->getHeight());
338
339 releasePredecessors(SU);
340 Schedule.push_back(SU);
341 SU->isScheduled = true;
342 }
343 assert(SUnits.size() == Schedule.size());
344
345 std::reverse(Schedule.begin(), Schedule.end());
346
347 // restore units
348 for (auto &SU : SUnits)
349 SU = SUSavedCopy[SU.NodeNum];
350
351 return Schedule;
352}
353
354namespace llvm {
355std::vector<const SUnit*> makeGCNILPScheduler(ArrayRef<const SUnit*> BotRoots,
356 const ScheduleDAG &DAG) {
357 GCNILPScheduler S;
358 return S.schedule(BotRoots, DAG);
359}
360} // namespace llvm
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Prepare AGPR Alloc
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static unsigned CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector< unsigned > &SUNumbers)
CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
#define I(x, y, z)
Definition MD5.cpp:58
static cl::opt< bool > DisableSchedCycles("disable-sched-cycles", cl::Hidden, cl::init(false), cl::desc("Disable cycle-level precision during preRA scheduling"))
static cl::opt< bool > DisableSchedCriticalPath("disable-sched-critical-path", cl::Hidden, cl::init(false), cl::desc("Disable critical path priority in sched=list-ilp"))
static cl::opt< int > MaxReorderWindow("max-sched-reorder", cl::Hidden, cl::init(6), cl::desc("Number of instructions to allow ahead of the critical path " "in sched=list-ilp"))
static unsigned closestSucc(const SUnit *SU)
closestSucc - Returns the scheduled cycle of the successor which is closest to the current cycle.
static cl::opt< bool > DisableSchedHeight("disable-sched-height", cl::Hidden, cl::init(false), cl::desc("Disable scheduled-height priority in sched=list-ilp"))
static unsigned calcMaxScratches(const SUnit *SU)
calcMaxScratches - Returns an cost estimate of the worse case requirement for scratch registers,...
static unsigned CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector< unsigned > &SUNumbers)
CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref, RegReductionPQBase *SPQ)
#define LLVM_DEBUG(...)
Definition Debug.h:114
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
Scheduling dependency.
Definition ScheduleDAG.h:51
SUnit * getSUnit() const
bool isCtrl() const
Shorthand for getKind() != SDep::Data.
Scheduling unit. This is a node in the scheduling DAG.
unsigned NumSuccs
unsigned NumPreds
unsigned NodeQueueId
Queue id of node.
unsigned NodeNum
Entry # of node in the node vector.
unsigned getHeight() const
Returns the height of this node, which is the length of the maximum path down to any node which has n...
unsigned short Latency
Node latency.
unsigned getDepth() const
Returns the depth of this node, which is the length of the maximum path up to any node which has no p...
bool isScheduled
True once scheduled.
SmallVector< SDep, 4 > Succs
All sunit successors.
SmallVector< SDep, 4 > Preds
All sunit predecessors.
virtual void dumpNode(const SUnit &SU) const =0
SUnit ExitSU
Special node for the region exit.
A BumpPtrAllocator that allows only elements of a specific type to be allocated.
Definition Allocator.h:390
A simple intrusive list implementation.
void push_front(reference Node)
Insert a node at the front; never copies.
bool empty() const
Check if the list is empty in constant time.
void push_back(reference Node)
Insert a node at the back; never copies.
void remove(reference N)
Remove a node by reference; never deletes.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
This is an optimization pass for GlobalISel generic memory operations.
auto min_element(R &&Range)
Provide wrappers to std::min_element which take ranges instead of having to pass begin/end explicitly...
Definition STLExtras.h:2000
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
std::vector< const SUnit * > makeGCNILPScheduler(ArrayRef< const SUnit * > BotRoots, const ScheduleDAG &DAG)