Thanks to visit codestin.com
Credit goes to llvm.org

LLVM 22.0.0git
ModuloSchedule.cpp
Go to the documentation of this file.
1//===- ModuloSchedule.cpp - Software pipeline schedule expansion ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
18#include "llvm/MC/MCContext.h"
19#include "llvm/Support/Debug.h"
22
23#define DEBUG_TYPE "pipeliner"
24using namespace llvm;
25
27 "pipeliner-swap-branch-targets-mve", cl::Hidden, cl::init(false),
28 cl::desc("Swap target blocks of a conditional branch for MVE expander"));
29
31 for (MachineInstr *MI : ScheduledInstrs)
32 OS << "[stage " << getStage(MI) << " @" << getCycle(MI) << "c] " << *MI;
33}
34
35//===----------------------------------------------------------------------===//
36// ModuloScheduleExpander implementation
37//===----------------------------------------------------------------------===//
38
39/// Return the register values for the operands of a Phi instruction.
40/// This function assume the instruction is a Phi.
42 Register &InitVal, Register &LoopVal) {
43 assert(Phi.isPHI() && "Expecting a Phi.");
44
45 InitVal = Register();
46 LoopVal = Register();
47 for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
48 if (Phi.getOperand(i + 1).getMBB() != Loop)
49 InitVal = Phi.getOperand(i).getReg();
50 else
51 LoopVal = Phi.getOperand(i).getReg();
52
53 assert(InitVal && LoopVal && "Unexpected Phi structure.");
54}
55
56/// Return the Phi register value that comes from the incoming block.
58 for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
59 if (Phi.getOperand(i + 1).getMBB() != LoopBB)
60 return Phi.getOperand(i).getReg();
61 return Register();
62}
63
64/// Return the Phi register value that comes the loop block.
66 for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
67 if (Phi.getOperand(i + 1).getMBB() == LoopBB)
68 return Phi.getOperand(i).getReg();
69 return Register();
70}
71
73 BB = Schedule.getLoop()->getTopBlock();
74 Preheader = *BB->pred_begin();
75 if (Preheader == BB)
76 Preheader = *std::next(BB->pred_begin());
77
78 // Iterate over the definitions in each instruction, and compute the
79 // stage difference for each use. Keep the maximum value.
80 for (MachineInstr *MI : Schedule.getInstructions()) {
81 int DefStage = Schedule.getStage(MI);
82 for (const MachineOperand &Op : MI->all_defs()) {
83 Register Reg = Op.getReg();
84 unsigned MaxDiff = 0;
85 bool PhiIsSwapped = false;
86 for (MachineOperand &UseOp : MRI.use_operands(Reg)) {
87 MachineInstr *UseMI = UseOp.getParent();
88 int UseStage = Schedule.getStage(UseMI);
89 unsigned Diff = 0;
90 if (UseStage != -1 && UseStage >= DefStage)
91 Diff = UseStage - DefStage;
92 if (MI->isPHI()) {
93 if (isLoopCarried(*MI))
94 ++Diff;
95 else
96 PhiIsSwapped = true;
97 }
98 MaxDiff = std::max(Diff, MaxDiff);
99 }
100 RegToStageDiff[Reg] = std::make_pair(MaxDiff, PhiIsSwapped);
101 }
102 }
103
104 generatePipelinedLoop();
105}
106
107void ModuloScheduleExpander::generatePipelinedLoop() {
108 LoopInfo = TII->analyzeLoopForPipelining(BB);
109 assert(LoopInfo && "Must be able to analyze loop!");
110
111 // Create a new basic block for the kernel and add it to the CFG.
113
114 unsigned MaxStageCount = Schedule.getNumStages() - 1;
115
116 // Remember the registers that are used in different stages. The index is
117 // the iteration, or stage, that the instruction is scheduled in. This is
118 // a map between register names in the original block and the names created
119 // in each stage of the pipelined loop.
120 ValueMapTy *VRMap = new ValueMapTy[(MaxStageCount + 1) * 2];
121
122 // The renaming destination by Phis for the registers across stages.
123 // This map is updated during Phis generation to point to the most recent
124 // renaming destination.
125 ValueMapTy *VRMapPhi = new ValueMapTy[(MaxStageCount + 1) * 2];
126
127 InstrMapTy InstrMap;
128
130
131 // Generate the prolog instructions that set up the pipeline.
132 generateProlog(MaxStageCount, KernelBB, VRMap, PrologBBs);
133 MF.insert(BB->getIterator(), KernelBB);
134 LIS.insertMBBInMaps(KernelBB);
135
136 // Rearrange the instructions to generate the new, pipelined loop,
137 // and update register names as needed.
138 for (MachineInstr *CI : Schedule.getInstructions()) {
139 if (CI->isPHI())
140 continue;
141 unsigned StageNum = Schedule.getStage(CI);
142 MachineInstr *NewMI = cloneInstr(CI, MaxStageCount, StageNum);
143 updateInstruction(NewMI, false, MaxStageCount, StageNum, VRMap);
144 KernelBB->push_back(NewMI);
145 LIS.InsertMachineInstrInMaps(*NewMI);
146 InstrMap[NewMI] = CI;
147 }
148
149 // Copy any terminator instructions to the new kernel, and update
150 // names as needed.
151 for (MachineInstr &MI : BB->terminators()) {
152 MachineInstr *NewMI = MF.CloneMachineInstr(&MI);
153 updateInstruction(NewMI, false, MaxStageCount, 0, VRMap);
154 KernelBB->push_back(NewMI);
155 LIS.InsertMachineInstrInMaps(*NewMI);
156 InstrMap[NewMI] = &MI;
157 }
158
159 NewKernel = KernelBB;
160 KernelBB->transferSuccessors(BB);
161 KernelBB->replaceSuccessor(BB, KernelBB);
162
163 generateExistingPhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap,
164 InstrMap, MaxStageCount, MaxStageCount, false);
165 generatePhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap, VRMapPhi,
166 InstrMap, MaxStageCount, MaxStageCount, false);
167
168 LLVM_DEBUG(dbgs() << "New block\n"; KernelBB->dump(););
169
170 SmallVector<MachineBasicBlock *, 4> EpilogBBs;
171 // Generate the epilog instructions to complete the pipeline.
172 generateEpilog(MaxStageCount, KernelBB, BB, VRMap, VRMapPhi, EpilogBBs,
173 PrologBBs);
174
175 // We need this step because the register allocation doesn't handle some
176 // situations well, so we insert copies to help out.
177 splitLifetimes(KernelBB, EpilogBBs);
178
179 // Remove dead instructions due to loop induction variables.
180 removeDeadInstructions(KernelBB, EpilogBBs);
181
182 // Add branches between prolog and epilog blocks.
183 addBranches(*Preheader, PrologBBs, KernelBB, EpilogBBs, VRMap);
184
185 delete[] VRMap;
186 delete[] VRMapPhi;
187}
188
190 // Remove the original loop since it's no longer referenced.
191 for (auto &I : *BB)
192 LIS.RemoveMachineInstrFromMaps(I);
193 BB->clear();
194 BB->eraseFromParent();
195}
196
197/// Generate the pipeline prolog code.
198void ModuloScheduleExpander::generateProlog(unsigned LastStage,
199 MachineBasicBlock *KernelBB,
200 ValueMapTy *VRMap,
201 MBBVectorTy &PrologBBs) {
202 MachineBasicBlock *PredBB = Preheader;
203 InstrMapTy InstrMap;
204
205 // Generate a basic block for each stage, not including the last stage,
206 // which will be generated in the kernel. Each basic block may contain
207 // instructions from multiple stages/iterations.
208 for (unsigned i = 0; i < LastStage; ++i) {
209 // Create and insert the prolog basic block prior to the original loop
210 // basic block. The original loop is removed later.
212 PrologBBs.push_back(NewBB);
213 MF.insert(BB->getIterator(), NewBB);
214 NewBB->transferSuccessors(PredBB);
215 PredBB->addSuccessor(NewBB);
216 PredBB = NewBB;
217 LIS.insertMBBInMaps(NewBB);
218
219 // Generate instructions for each appropriate stage. Process instructions
220 // in original program order.
221 for (int StageNum = i; StageNum >= 0; --StageNum) {
223 BBE = BB->getFirstTerminator();
224 BBI != BBE; ++BBI) {
225 if (Schedule.getStage(&*BBI) == StageNum) {
226 if (BBI->isPHI())
227 continue;
228 MachineInstr *NewMI =
229 cloneAndChangeInstr(&*BBI, i, (unsigned)StageNum);
230 updateInstruction(NewMI, false, i, (unsigned)StageNum, VRMap);
231 NewBB->push_back(NewMI);
232 LIS.InsertMachineInstrInMaps(*NewMI);
233 InstrMap[NewMI] = &*BBI;
234 }
235 }
236 }
237 rewritePhiValues(NewBB, i, VRMap, InstrMap);
238 LLVM_DEBUG({
239 dbgs() << "prolog:\n";
240 NewBB->dump();
241 });
242 }
243
244 PredBB->replaceSuccessor(BB, KernelBB);
245
246 // Check if we need to remove the branch from the preheader to the original
247 // loop, and replace it with a branch to the new loop.
248 unsigned numBranches = TII->removeBranch(*Preheader);
249 if (numBranches) {
251 TII->insertBranch(*Preheader, PrologBBs[0], nullptr, Cond, DebugLoc());
252 }
253}
254
255/// Generate the pipeline epilog code. The epilog code finishes the iterations
256/// that were started in either the prolog or the kernel. We create a basic
257/// block for each stage that needs to complete.
258void ModuloScheduleExpander::generateEpilog(
259 unsigned LastStage, MachineBasicBlock *KernelBB, MachineBasicBlock *OrigBB,
260 ValueMapTy *VRMap, ValueMapTy *VRMapPhi, MBBVectorTy &EpilogBBs,
261 MBBVectorTy &PrologBBs) {
262 // We need to change the branch from the kernel to the first epilog block, so
263 // this call to analyze branch uses the kernel rather than the original BB.
264 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
266 bool checkBranch = TII->analyzeBranch(*KernelBB, TBB, FBB, Cond);
267 assert(!checkBranch && "generateEpilog must be able to analyze the branch");
268 if (checkBranch)
269 return;
270
271 MachineBasicBlock::succ_iterator LoopExitI = KernelBB->succ_begin();
272 if (*LoopExitI == KernelBB)
273 ++LoopExitI;
274 assert(LoopExitI != KernelBB->succ_end() && "Expecting a successor");
275 MachineBasicBlock *LoopExitBB = *LoopExitI;
276
277 MachineBasicBlock *PredBB = KernelBB;
278 MachineBasicBlock *EpilogStart = LoopExitBB;
279 InstrMapTy InstrMap;
280
281 // Generate a basic block for each stage, not including the last stage,
282 // which was generated for the kernel. Each basic block may contain
283 // instructions from multiple stages/iterations.
284 int EpilogStage = LastStage + 1;
285 for (unsigned i = LastStage; i >= 1; --i, ++EpilogStage) {
286 MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock();
287 EpilogBBs.push_back(NewBB);
288 MF.insert(BB->getIterator(), NewBB);
289
290 PredBB->replaceSuccessor(LoopExitBB, NewBB);
291 NewBB->addSuccessor(LoopExitBB);
292 LIS.insertMBBInMaps(NewBB);
293
294 if (EpilogStart == LoopExitBB)
295 EpilogStart = NewBB;
296
297 // Add instructions to the epilog depending on the current block.
298 // Process instructions in original program order.
299 for (unsigned StageNum = i; StageNum <= LastStage; ++StageNum) {
300 for (auto &BBI : *BB) {
301 if (BBI.isPHI())
302 continue;
303 MachineInstr *In = &BBI;
304 if ((unsigned)Schedule.getStage(In) == StageNum) {
305 // Instructions with memoperands in the epilog are updated with
306 // conservative values.
307 MachineInstr *NewMI = cloneInstr(In, UINT_MAX, 0);
308 updateInstruction(NewMI, i == 1, EpilogStage, 0, VRMap);
309 NewBB->push_back(NewMI);
310 LIS.InsertMachineInstrInMaps(*NewMI);
311 InstrMap[NewMI] = In;
312 }
313 }
314 }
315 generateExistingPhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap,
316 InstrMap, LastStage, EpilogStage, i == 1);
317 generatePhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap, VRMapPhi,
318 InstrMap, LastStage, EpilogStage, i == 1);
319 PredBB = NewBB;
320
321 LLVM_DEBUG({
322 dbgs() << "epilog:\n";
323 NewBB->dump();
324 });
325 }
326
327 // Fix any Phi nodes in the loop exit block.
328 LoopExitBB->replacePhiUsesWith(BB, PredBB);
329
330 // Create a branch to the new epilog from the kernel.
331 // Remove the original branch and add a new branch to the epilog.
332 TII->removeBranch(*KernelBB);
333 assert((OrigBB == TBB || OrigBB == FBB) &&
334 "Unable to determine looping branch direction");
335 if (OrigBB != TBB)
336 TII->insertBranch(*KernelBB, EpilogStart, KernelBB, Cond, DebugLoc());
337 else
338 TII->insertBranch(*KernelBB, KernelBB, EpilogStart, Cond, DebugLoc());
339 // Add a branch to the loop exit.
340 if (EpilogBBs.size() > 0) {
341 MachineBasicBlock *LastEpilogBB = EpilogBBs.back();
343 TII->insertBranch(*LastEpilogBB, LoopExitBB, nullptr, Cond1, DebugLoc());
344 }
345}
346
347/// Replace all uses of FromReg that appear outside the specified
348/// basic block with ToReg.
349static void replaceRegUsesAfterLoop(Register FromReg, Register ToReg,
352 for (MachineOperand &O :
353 llvm::make_early_inc_range(MRI.use_operands(FromReg)))
354 if (O.getParent()->getParent() != MBB)
355 O.setReg(ToReg);
356}
357
358/// Return true if the register has a use that occurs outside the
359/// specified loop.
362 for (const MachineOperand &MO : MRI.use_operands(Reg))
363 if (MO.getParent()->getParent() != BB)
364 return true;
365 return false;
366}
367
368/// Generate Phis for the specific block in the generated pipelined code.
369/// This function looks at the Phis from the original code to guide the
370/// creation of new Phis.
371void ModuloScheduleExpander::generateExistingPhis(
373 MachineBasicBlock *KernelBB, ValueMapTy *VRMap, InstrMapTy &InstrMap,
374 unsigned LastStageNum, unsigned CurStageNum, bool IsLast) {
375 // Compute the stage number for the initial value of the Phi, which
376 // comes from the prolog. The prolog to use depends on to which kernel/
377 // epilog that we're adding the Phi.
378 unsigned PrologStage = 0;
379 unsigned PrevStage = 0;
380 bool InKernel = (LastStageNum == CurStageNum);
381 if (InKernel) {
382 PrologStage = LastStageNum - 1;
383 PrevStage = CurStageNum;
384 } else {
385 PrologStage = LastStageNum - (CurStageNum - LastStageNum);
386 PrevStage = LastStageNum + (CurStageNum - LastStageNum) - 1;
387 }
388
389 for (MachineBasicBlock::iterator BBI = BB->instr_begin(),
390 BBE = BB->getFirstNonPHI();
391 BBI != BBE; ++BBI) {
392 Register Def = BBI->getOperand(0).getReg();
393
394 Register InitVal;
395 Register LoopVal;
396 getPhiRegs(*BBI, BB, InitVal, LoopVal);
397
398 Register PhiOp1;
399 // The Phi value from the loop body typically is defined in the loop, but
400 // not always. So, we need to check if the value is defined in the loop.
401 Register PhiOp2 = LoopVal;
402 if (auto It = VRMap[LastStageNum].find(LoopVal);
403 It != VRMap[LastStageNum].end())
404 PhiOp2 = It->second;
405
406 int StageScheduled = Schedule.getStage(&*BBI);
407 int LoopValStage = Schedule.getStage(MRI.getVRegDef(LoopVal));
408 unsigned NumStages = getStagesForReg(Def, CurStageNum);
409 if (NumStages == 0) {
410 // We don't need to generate a Phi anymore, but we need to rename any uses
411 // of the Phi value.
412 Register NewReg = VRMap[PrevStage][LoopVal];
413 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, 0, &*BBI, Def,
414 InitVal, NewReg);
415 auto It = VRMap[CurStageNum].find(LoopVal);
416 if (It != VRMap[CurStageNum].end()) {
417 Register Reg = It->second;
418 VRMap[CurStageNum][Def] = Reg;
419 }
420 }
421 // Adjust the number of Phis needed depending on the number of prologs left,
422 // and the distance from where the Phi is first scheduled. The number of
423 // Phis cannot exceed the number of prolog stages. Each stage can
424 // potentially define two values.
425 unsigned MaxPhis = PrologStage + 2;
426 if (!InKernel && (int)PrologStage <= LoopValStage)
427 MaxPhis = std::max((int)MaxPhis - LoopValStage, 1);
428 unsigned NumPhis = std::min(NumStages, MaxPhis);
429
430 Register NewReg;
431 unsigned AccessStage = (LoopValStage != -1) ? LoopValStage : StageScheduled;
432 // In the epilog, we may need to look back one stage to get the correct
433 // Phi name, because the epilog and prolog blocks execute the same stage.
434 // The correct name is from the previous block only when the Phi has
435 // been completely scheduled prior to the epilog, and Phi value is not
436 // needed in multiple stages.
437 int StageDiff = 0;
438 if (!InKernel && StageScheduled >= LoopValStage && AccessStage == 0 &&
439 NumPhis == 1)
440 StageDiff = 1;
441 // Adjust the computations below when the phi and the loop definition
442 // are scheduled in different stages.
443 if (InKernel && LoopValStage != -1 && StageScheduled > LoopValStage)
444 StageDiff = StageScheduled - LoopValStage;
445 for (unsigned np = 0; np < NumPhis; ++np) {
446 // If the Phi hasn't been scheduled, then use the initial Phi operand
447 // value. Otherwise, use the scheduled version of the instruction. This
448 // is a little complicated when a Phi references another Phi.
449 if (np > PrologStage || StageScheduled >= (int)LastStageNum)
450 PhiOp1 = InitVal;
451 // Check if the Phi has already been scheduled in a prolog stage.
452 else if (PrologStage >= AccessStage + StageDiff + np &&
453 VRMap[PrologStage - StageDiff - np].count(LoopVal) != 0)
454 PhiOp1 = VRMap[PrologStage - StageDiff - np][LoopVal];
455 // Check if the Phi has already been scheduled, but the loop instruction
456 // is either another Phi, or doesn't occur in the loop.
457 else if (PrologStage >= AccessStage + StageDiff + np) {
458 // If the Phi references another Phi, we need to examine the other
459 // Phi to get the correct value.
460 PhiOp1 = LoopVal;
461 MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1);
462 int Indirects = 1;
463 while (InstOp1 && InstOp1->isPHI() && InstOp1->getParent() == BB) {
464 int PhiStage = Schedule.getStage(InstOp1);
465 if ((int)(PrologStage - StageDiff - np) < PhiStage + Indirects)
466 PhiOp1 = getInitPhiReg(*InstOp1, BB);
467 else
468 PhiOp1 = getLoopPhiReg(*InstOp1, BB);
469 InstOp1 = MRI.getVRegDef(PhiOp1);
470 int PhiOpStage = Schedule.getStage(InstOp1);
471 int StageAdj = (PhiOpStage != -1 ? PhiStage - PhiOpStage : 0);
472 if (PhiOpStage != -1 && PrologStage - StageAdj >= Indirects + np) {
473 auto &M = VRMap[PrologStage - StageAdj - Indirects - np];
474 if (auto It = M.find(PhiOp1); It != M.end()) {
475 PhiOp1 = It->second;
476 break;
477 }
478 }
479 ++Indirects;
480 }
481 } else
482 PhiOp1 = InitVal;
483 // If this references a generated Phi in the kernel, get the Phi operand
484 // from the incoming block.
485 if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1))
486 if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB)
487 PhiOp1 = getInitPhiReg(*InstOp1, KernelBB);
488
489 MachineInstr *PhiInst = MRI.getVRegDef(LoopVal);
490 bool LoopDefIsPhi = PhiInst && PhiInst->isPHI();
491 // In the epilog, a map lookup is needed to get the value from the kernel,
492 // or previous epilog block. How is does this depends on if the
493 // instruction is scheduled in the previous block.
494 if (!InKernel) {
495 int StageDiffAdj = 0;
496 if (LoopValStage != -1 && StageScheduled > LoopValStage)
497 StageDiffAdj = StageScheduled - LoopValStage;
498 // Use the loop value defined in the kernel, unless the kernel
499 // contains the last definition of the Phi.
500 if (np == 0 && PrevStage == LastStageNum &&
501 (StageScheduled != 0 || LoopValStage != 0) &&
502 VRMap[PrevStage - StageDiffAdj].count(LoopVal))
503 PhiOp2 = VRMap[PrevStage - StageDiffAdj][LoopVal];
504 // Use the value defined by the Phi. We add one because we switch
505 // from looking at the loop value to the Phi definition.
506 else if (np > 0 && PrevStage == LastStageNum &&
507 VRMap[PrevStage - np + 1].count(Def))
508 PhiOp2 = VRMap[PrevStage - np + 1][Def];
509 // Use the loop value defined in the kernel.
510 else if (static_cast<unsigned>(LoopValStage) > PrologStage + 1 &&
511 VRMap[PrevStage - StageDiffAdj - np].count(LoopVal))
512 PhiOp2 = VRMap[PrevStage - StageDiffAdj - np][LoopVal];
513 // Use the value defined by the Phi, unless we're generating the first
514 // epilog and the Phi refers to a Phi in a different stage.
515 else if (VRMap[PrevStage - np].count(Def) &&
516 (!LoopDefIsPhi || (PrevStage != LastStageNum) ||
517 (LoopValStage == StageScheduled)))
518 PhiOp2 = VRMap[PrevStage - np][Def];
519 }
520
521 // Check if we can reuse an existing Phi. This occurs when a Phi
522 // references another Phi, and the other Phi is scheduled in an
523 // earlier stage. We can try to reuse an existing Phi up until the last
524 // stage of the current Phi.
525 if (LoopDefIsPhi) {
526 if (static_cast<int>(PrologStage - np) >= StageScheduled) {
527 int LVNumStages = getStagesForPhi(LoopVal);
528 int StageDiff = (StageScheduled - LoopValStage);
529 LVNumStages -= StageDiff;
530 // Make sure the loop value Phi has been processed already.
531 if (LVNumStages > (int)np && VRMap[CurStageNum].count(LoopVal)) {
532 NewReg = PhiOp2;
533 unsigned ReuseStage = CurStageNum;
534 if (isLoopCarried(*PhiInst))
535 ReuseStage -= LVNumStages;
536 // Check if the Phi to reuse has been generated yet. If not, then
537 // there is nothing to reuse.
538 if (VRMap[ReuseStage - np].count(LoopVal)) {
539 NewReg = VRMap[ReuseStage - np][LoopVal];
540
541 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI,
542 Def, NewReg);
543 // Update the map with the new Phi name.
544 VRMap[CurStageNum - np][Def] = NewReg;
545 PhiOp2 = NewReg;
546 if (VRMap[LastStageNum - np - 1].count(LoopVal))
547 PhiOp2 = VRMap[LastStageNum - np - 1][LoopVal];
548
549 if (IsLast && np == NumPhis - 1)
550 replaceRegUsesAfterLoop(Def, NewReg, BB, MRI);
551 continue;
552 }
553 }
554 }
555 if (InKernel && StageDiff > 0 &&
556 VRMap[CurStageNum - StageDiff - np].count(LoopVal))
557 PhiOp2 = VRMap[CurStageNum - StageDiff - np][LoopVal];
558 }
559
560 const TargetRegisterClass *RC = MRI.getRegClass(Def);
561 NewReg = MRI.createVirtualRegister(RC);
562
563 MachineInstrBuilder NewPhi =
564 BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
565 TII->get(TargetOpcode::PHI), NewReg);
566 NewPhi.addReg(PhiOp1).addMBB(BB1);
567 NewPhi.addReg(PhiOp2).addMBB(BB2);
568 LIS.InsertMachineInstrInMaps(*NewPhi);
569 if (np == 0)
570 InstrMap[NewPhi] = &*BBI;
571
572 // We define the Phis after creating the new pipelined code, so
573 // we need to rename the Phi values in scheduled instructions.
574
575 Register PrevReg;
576 if (InKernel && VRMap[PrevStage - np].count(LoopVal))
577 PrevReg = VRMap[PrevStage - np][LoopVal];
578 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def,
579 NewReg, PrevReg);
580 // If the Phi has been scheduled, use the new name for rewriting.
581 if (VRMap[CurStageNum - np].count(Def)) {
582 Register R = VRMap[CurStageNum - np][Def];
583 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, R,
584 NewReg);
585 }
586
587 // Check if we need to rename any uses that occurs after the loop. The
588 // register to replace depends on whether the Phi is scheduled in the
589 // epilog.
590 if (IsLast && np == NumPhis - 1)
591 replaceRegUsesAfterLoop(Def, NewReg, BB, MRI);
592
593 // In the kernel, a dependent Phi uses the value from this Phi.
594 if (InKernel)
595 PhiOp2 = NewReg;
596
597 // Update the map with the new Phi name.
598 VRMap[CurStageNum - np][Def] = NewReg;
599 }
600
601 while (NumPhis++ < NumStages) {
602 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, NumPhis, &*BBI, Def,
603 NewReg, 0);
604 }
605
606 // Check if we need to rename a Phi that has been eliminated due to
607 // scheduling.
608 if (NumStages == 0 && IsLast) {
609 auto &CurStageMap = VRMap[CurStageNum];
610 auto It = CurStageMap.find(LoopVal);
611 if (It != CurStageMap.end())
612 replaceRegUsesAfterLoop(Def, It->second, BB, MRI);
613 }
614 }
615}
616
617/// Generate Phis for the specified block in the generated pipelined code.
618/// These are new Phis needed because the definition is scheduled after the
619/// use in the pipelined sequence.
620void ModuloScheduleExpander::generatePhis(
622 MachineBasicBlock *KernelBB, ValueMapTy *VRMap, ValueMapTy *VRMapPhi,
623 InstrMapTy &InstrMap, unsigned LastStageNum, unsigned CurStageNum,
624 bool IsLast) {
625 // Compute the stage number that contains the initial Phi value, and
626 // the Phi from the previous stage.
627 unsigned PrologStage = 0;
628 unsigned PrevStage = 0;
629 unsigned StageDiff = CurStageNum - LastStageNum;
630 bool InKernel = (StageDiff == 0);
631 if (InKernel) {
632 PrologStage = LastStageNum - 1;
633 PrevStage = CurStageNum;
634 } else {
635 PrologStage = LastStageNum - StageDiff;
636 PrevStage = LastStageNum + StageDiff - 1;
637 }
638
639 for (MachineBasicBlock::iterator BBI = BB->getFirstNonPHI(),
640 BBE = BB->instr_end();
641 BBI != BBE; ++BBI) {
642 for (unsigned i = 0, e = BBI->getNumOperands(); i != e; ++i) {
643 MachineOperand &MO = BBI->getOperand(i);
644 if (!MO.isReg() || !MO.isDef() || !MO.getReg().isVirtual())
645 continue;
646
647 int StageScheduled = Schedule.getStage(&*BBI);
648 assert(StageScheduled != -1 && "Expecting scheduled instruction.");
649 Register Def = MO.getReg();
650 unsigned NumPhis = getStagesForReg(Def, CurStageNum);
651 // An instruction scheduled in stage 0 and is used after the loop
652 // requires a phi in the epilog for the last definition from either
653 // the kernel or prolog.
654 if (!InKernel && NumPhis == 0 && StageScheduled == 0 &&
655 hasUseAfterLoop(Def, BB, MRI))
656 NumPhis = 1;
657 if (!InKernel && (unsigned)StageScheduled > PrologStage)
658 continue;
659
660 Register PhiOp2;
661 if (InKernel) {
662 PhiOp2 = VRMap[PrevStage][Def];
663 if (MachineInstr *InstOp2 = MRI.getVRegDef(PhiOp2))
664 if (InstOp2->isPHI() && InstOp2->getParent() == NewBB)
665 PhiOp2 = getLoopPhiReg(*InstOp2, BB2);
666 }
667 // The number of Phis can't exceed the number of prolog stages. The
668 // prolog stage number is zero based.
669 if (NumPhis > PrologStage + 1 - StageScheduled)
670 NumPhis = PrologStage + 1 - StageScheduled;
671 for (unsigned np = 0; np < NumPhis; ++np) {
672 // Example for
673 // Org:
674 // %Org = ... (Scheduled at Stage#0, NumPhi = 2)
675 //
676 // Prolog0 (Stage0):
677 // %Clone0 = ...
678 // Prolog1 (Stage1):
679 // %Clone1 = ...
680 // Kernel (Stage2):
681 // %Phi0 = Phi %Clone1, Prolog1, %Clone2, Kernel
682 // %Phi1 = Phi %Clone0, Prolog1, %Phi0, Kernel
683 // %Clone2 = ...
684 // Epilog0 (Stage3):
685 // %Phi2 = Phi %Clone1, Prolog1, %Clone2, Kernel
686 // %Phi3 = Phi %Clone0, Prolog1, %Phi0, Kernel
687 // Epilog1 (Stage4):
688 // %Phi4 = Phi %Clone0, Prolog0, %Phi2, Epilog0
689 //
690 // VRMap = {0: %Clone0, 1: %Clone1, 2: %Clone2}
691 // VRMapPhi (after Kernel) = {0: %Phi1, 1: %Phi0}
692 // VRMapPhi (after Epilog0) = {0: %Phi3, 1: %Phi2}
693
694 Register PhiOp1 = VRMap[PrologStage][Def];
695 if (np <= PrologStage)
696 PhiOp1 = VRMap[PrologStage - np][Def];
697 if (!InKernel) {
698 if (PrevStage == LastStageNum && np == 0)
699 PhiOp2 = VRMap[LastStageNum][Def];
700 else
701 PhiOp2 = VRMapPhi[PrevStage - np][Def];
702 }
703
704 const TargetRegisterClass *RC = MRI.getRegClass(Def);
705 Register NewReg = MRI.createVirtualRegister(RC);
706
707 MachineInstrBuilder NewPhi =
708 BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
709 TII->get(TargetOpcode::PHI), NewReg);
710 NewPhi.addReg(PhiOp1).addMBB(BB1);
711 NewPhi.addReg(PhiOp2).addMBB(BB2);
712 LIS.InsertMachineInstrInMaps(*NewPhi);
713 if (np == 0)
714 InstrMap[NewPhi] = &*BBI;
715
716 // Rewrite uses and update the map. The actions depend upon whether
717 // we generating code for the kernel or epilog blocks.
718 if (InKernel) {
719 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, PhiOp1,
720 NewReg);
721 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, PhiOp2,
722 NewReg);
723
724 PhiOp2 = NewReg;
725 VRMapPhi[PrevStage - np - 1][Def] = NewReg;
726 } else {
727 VRMapPhi[CurStageNum - np][Def] = NewReg;
728 if (np == NumPhis - 1)
729 rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def,
730 NewReg);
731 }
732 if (IsLast && np == NumPhis - 1)
733 replaceRegUsesAfterLoop(Def, NewReg, BB, MRI);
734 }
735 }
736 }
737}
738
739/// Remove instructions that generate values with no uses.
740/// Typically, these are induction variable operations that generate values
741/// used in the loop itself. A dead instruction has a definition with
742/// no uses, or uses that occur in the original loop only.
743void ModuloScheduleExpander::removeDeadInstructions(MachineBasicBlock *KernelBB,
744 MBBVectorTy &EpilogBBs) {
745 // For each epilog block, check that the value defined by each instruction
746 // is used. If not, delete it.
747 for (MachineBasicBlock *MBB : llvm::reverse(EpilogBBs))
749 ME = MBB->instr_rend();
750 MI != ME;) {
751 // From DeadMachineInstructionElem. Don't delete inline assembly.
752 if (MI->isInlineAsm()) {
753 ++MI;
754 continue;
755 }
756 bool SawStore = false;
757 // Check if it's safe to remove the instruction due to side effects.
758 // We can, and want to, remove Phis here.
759 if (!MI->isSafeToMove(SawStore) && !MI->isPHI()) {
760 ++MI;
761 continue;
762 }
763 bool used = true;
764 for (const MachineOperand &MO : MI->all_defs()) {
765 Register reg = MO.getReg();
766 // Assume physical registers are used, unless they are marked dead.
767 if (reg.isPhysical()) {
768 used = !MO.isDead();
769 if (used)
770 break;
771 continue;
772 }
773 unsigned realUses = 0;
774 for (const MachineOperand &U : MRI.use_operands(reg)) {
775 // Check if there are any uses that occur only in the original
776 // loop. If so, that's not a real use.
777 if (U.getParent()->getParent() != BB) {
778 realUses++;
779 used = true;
780 break;
781 }
782 }
783 if (realUses > 0)
784 break;
785 used = false;
786 }
787 if (!used) {
788 LIS.RemoveMachineInstrFromMaps(*MI);
789 MI++->eraseFromParent();
790 continue;
791 }
792 ++MI;
793 }
794 // In the kernel block, check if we can remove a Phi that generates a value
795 // used in an instruction removed in the epilog block.
796 for (MachineInstr &MI : llvm::make_early_inc_range(KernelBB->phis())) {
797 Register reg = MI.getOperand(0).getReg();
798 if (MRI.use_begin(reg) == MRI.use_end()) {
799 LIS.RemoveMachineInstrFromMaps(MI);
800 MI.eraseFromParent();
801 }
802 }
803}
804
805/// For loop carried definitions, we split the lifetime of a virtual register
806/// that has uses past the definition in the next iteration. A copy with a new
807/// virtual register is inserted before the definition, which helps with
808/// generating a better register assignment.
809///
810/// v1 = phi(a, v2) v1 = phi(a, v2)
811/// v2 = phi(b, v3) v2 = phi(b, v3)
812/// v3 = .. v4 = copy v1
813/// .. = V1 v3 = ..
814/// .. = v4
815void ModuloScheduleExpander::splitLifetimes(MachineBasicBlock *KernelBB,
816 MBBVectorTy &EpilogBBs) {
817 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
818 for (auto &PHI : KernelBB->phis()) {
819 Register Def = PHI.getOperand(0).getReg();
820 // Check for any Phi definition that used as an operand of another Phi
821 // in the same block.
822 for (MachineRegisterInfo::use_instr_iterator I = MRI.use_instr_begin(Def),
823 E = MRI.use_instr_end();
824 I != E; ++I) {
825 if (I->isPHI() && I->getParent() == KernelBB) {
826 // Get the loop carried definition.
827 Register LCDef = getLoopPhiReg(PHI, KernelBB);
828 if (!LCDef)
829 continue;
830 MachineInstr *MI = MRI.getVRegDef(LCDef);
831 if (!MI || MI->getParent() != KernelBB || MI->isPHI())
832 continue;
833 // Search through the rest of the block looking for uses of the Phi
834 // definition. If one occurs, then split the lifetime.
835 Register SplitReg;
837 KernelBB->instr_end()))
838 if (BBJ.readsRegister(Def, /*TRI=*/nullptr)) {
839 // We split the lifetime when we find the first use.
840 if (!SplitReg) {
841 SplitReg = MRI.createVirtualRegister(MRI.getRegClass(Def));
842 MachineInstr *newCopy =
843 BuildMI(*KernelBB, MI, MI->getDebugLoc(),
844 TII->get(TargetOpcode::COPY), SplitReg)
845 .addReg(Def);
846 LIS.InsertMachineInstrInMaps(*newCopy);
847 }
848 BBJ.substituteRegister(Def, SplitReg, 0, *TRI);
849 }
850 if (!SplitReg)
851 continue;
852 // Search through each of the epilog blocks for any uses to be renamed.
853 for (auto &Epilog : EpilogBBs)
854 for (auto &I : *Epilog)
855 if (I.readsRegister(Def, /*TRI=*/nullptr))
856 I.substituteRegister(Def, SplitReg, 0, *TRI);
857 break;
858 }
859 }
860 }
861}
862
863/// Create branches from each prolog basic block to the appropriate epilog
864/// block. These edges are needed if the loop ends before reaching the
865/// kernel.
866void ModuloScheduleExpander::addBranches(MachineBasicBlock &PreheaderBB,
867 MBBVectorTy &PrologBBs,
868 MachineBasicBlock *KernelBB,
869 MBBVectorTy &EpilogBBs,
870 ValueMapTy *VRMap) {
871 assert(PrologBBs.size() == EpilogBBs.size() && "Prolog/Epilog mismatch");
872 MachineBasicBlock *LastPro = KernelBB;
873 MachineBasicBlock *LastEpi = KernelBB;
874
875 // Start from the blocks connected to the kernel and work "out"
876 // to the first prolog and the last epilog blocks.
877 unsigned MaxIter = PrologBBs.size() - 1;
878 for (unsigned i = 0, j = MaxIter; i <= MaxIter; ++i, --j) {
879 // Add branches to the prolog that go to the corresponding
880 // epilog, and the fall-thru prolog/kernel block.
881 MachineBasicBlock *Prolog = PrologBBs[j];
882 MachineBasicBlock *Epilog = EpilogBBs[i];
883
885 std::optional<bool> StaticallyGreater =
886 LoopInfo->createTripCountGreaterCondition(j + 1, *Prolog, Cond);
887 unsigned numAdded = 0;
888 if (!StaticallyGreater) {
889 Prolog->addSuccessor(Epilog);
890 numAdded = TII->insertBranch(*Prolog, Epilog, LastPro, Cond, DebugLoc());
891 } else if (*StaticallyGreater == false) {
892 Prolog->addSuccessor(Epilog);
893 Prolog->removeSuccessor(LastPro);
894 LastEpi->removeSuccessor(Epilog);
895 numAdded = TII->insertBranch(*Prolog, Epilog, nullptr, Cond, DebugLoc());
896 Epilog->removePHIsIncomingValuesForPredecessor(*LastEpi);
897 // Remove the blocks that are no longer referenced.
898 if (LastPro != LastEpi) {
899 for (auto &MI : *LastEpi)
900 LIS.RemoveMachineInstrFromMaps(MI);
901 LastEpi->clear();
902 LastEpi->eraseFromParent();
903 }
904 if (LastPro == KernelBB) {
905 LoopInfo->disposed(&LIS);
906 NewKernel = nullptr;
907 }
908 for (auto &MI : *LastPro)
909 LIS.RemoveMachineInstrFromMaps(MI);
910 LastPro->clear();
911 LastPro->eraseFromParent();
912 } else {
913 numAdded = TII->insertBranch(*Prolog, LastPro, nullptr, Cond, DebugLoc());
914 Epilog->removePHIsIncomingValuesForPredecessor(*Prolog);
915 }
916 LastPro = Prolog;
917 LastEpi = Epilog;
919 E = Prolog->instr_rend();
920 I != E && numAdded > 0; ++I, --numAdded)
921 updateInstruction(&*I, false, j, 0, VRMap);
922 }
923
924 if (NewKernel) {
925 LoopInfo->setPreheader(PrologBBs[MaxIter]);
926 LoopInfo->adjustTripCount(-(MaxIter + 1));
927 }
928}
929
930/// Return true if we can compute the amount the instruction changes
931/// during each iteration. Set Delta to the amount of the change.
932bool ModuloScheduleExpander::computeDelta(MachineInstr &MI, unsigned &Delta) {
933 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
934 const MachineOperand *BaseOp;
935 int64_t Offset;
936 bool OffsetIsScalable;
937 if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, TRI))
938 return false;
939
940 // FIXME: This algorithm assumes instructions have fixed-size offsets.
941 if (OffsetIsScalable)
942 return false;
943
944 if (!BaseOp->isReg())
945 return false;
946
947 Register BaseReg = BaseOp->getReg();
948
949 MachineRegisterInfo &MRI = MF.getRegInfo();
950 // Check if there is a Phi. If so, get the definition in the loop.
951 MachineInstr *BaseDef = MRI.getVRegDef(BaseReg);
952 if (BaseDef && BaseDef->isPHI()) {
953 BaseReg = getLoopPhiReg(*BaseDef, MI.getParent());
954 BaseDef = MRI.getVRegDef(BaseReg);
955 }
956 if (!BaseDef)
957 return false;
958
959 int D = 0;
960 if (!TII->getIncrementValue(*BaseDef, D) && D >= 0)
961 return false;
962
963 Delta = D;
964 return true;
965}
966
967/// Update the memory operand with a new offset when the pipeliner
968/// generates a new copy of the instruction that refers to a
969/// different memory location.
970void ModuloScheduleExpander::updateMemOperands(MachineInstr &NewMI,
971 MachineInstr &OldMI,
972 unsigned Num) {
973 if (Num == 0)
974 return;
975 // If the instruction has memory operands, then adjust the offset
976 // when the instruction appears in different stages.
977 if (NewMI.memoperands_empty())
978 return;
980 for (MachineMemOperand *MMO : NewMI.memoperands()) {
981 // TODO: Figure out whether isAtomic is really necessary (see D57601).
982 if (MMO->isVolatile() || MMO->isAtomic() ||
983 (MMO->isInvariant() && MMO->isDereferenceable()) ||
984 (!MMO->getValue())) {
985 NewMMOs.push_back(MMO);
986 continue;
987 }
988 unsigned Delta;
989 if (Num != UINT_MAX && computeDelta(OldMI, Delta)) {
990 int64_t AdjOffset = Delta * Num;
991 NewMMOs.push_back(
992 MF.getMachineMemOperand(MMO, AdjOffset, MMO->getSize()));
993 } else {
994 NewMMOs.push_back(MF.getMachineMemOperand(
996 }
997 }
998 NewMI.setMemRefs(MF, NewMMOs);
999}
1000
1001/// Clone the instruction for the new pipelined loop and update the
1002/// memory operands, if needed.
1003MachineInstr *ModuloScheduleExpander::cloneInstr(MachineInstr *OldMI,
1004 unsigned CurStageNum,
1005 unsigned InstStageNum) {
1006 MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
1007 updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
1008 return NewMI;
1009}
1010
1011/// Clone the instruction for the new pipelined loop. If needed, this
1012/// function updates the instruction using the values saved in the
1013/// InstrChanges structure.
1014MachineInstr *ModuloScheduleExpander::cloneAndChangeInstr(
1015 MachineInstr *OldMI, unsigned CurStageNum, unsigned InstStageNum) {
1016 MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
1017 auto It = InstrChanges.find(OldMI);
1018 if (It != InstrChanges.end()) {
1019 std::pair<Register, int64_t> RegAndOffset = It->second;
1020 unsigned BasePos, OffsetPos;
1021 if (!TII->getBaseAndOffsetPosition(*OldMI, BasePos, OffsetPos))
1022 return nullptr;
1023 int64_t NewOffset = OldMI->getOperand(OffsetPos).getImm();
1024 MachineInstr *LoopDef = findDefInLoop(RegAndOffset.first);
1025 if (Schedule.getStage(LoopDef) > (signed)InstStageNum)
1026 NewOffset += RegAndOffset.second * (CurStageNum - InstStageNum);
1027 NewMI->getOperand(OffsetPos).setImm(NewOffset);
1028 }
1029 updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
1030 return NewMI;
1031}
1032
1033/// Update the machine instruction with new virtual registers. This
1034/// function may change the definitions and/or uses.
1035void ModuloScheduleExpander::updateInstruction(MachineInstr *NewMI,
1036 bool LastDef,
1037 unsigned CurStageNum,
1038 unsigned InstrStageNum,
1039 ValueMapTy *VRMap) {
1040 for (MachineOperand &MO : NewMI->operands()) {
1041 if (!MO.isReg() || !MO.getReg().isVirtual())
1042 continue;
1043 Register reg = MO.getReg();
1044 if (MO.isDef()) {
1045 // Create a new virtual register for the definition.
1046 const TargetRegisterClass *RC = MRI.getRegClass(reg);
1047 Register NewReg = MRI.createVirtualRegister(RC);
1048 MO.setReg(NewReg);
1049 VRMap[CurStageNum][reg] = NewReg;
1050 if (LastDef)
1051 replaceRegUsesAfterLoop(reg, NewReg, BB, MRI);
1052 } else if (MO.isUse()) {
1053 MachineInstr *Def = MRI.getVRegDef(reg);
1054 // Compute the stage that contains the last definition for instruction.
1055 int DefStageNum = Schedule.getStage(Def);
1056 unsigned StageNum = CurStageNum;
1057 if (DefStageNum != -1 && (int)InstrStageNum > DefStageNum) {
1058 // Compute the difference in stages between the defintion and the use.
1059 unsigned StageDiff = (InstrStageNum - DefStageNum);
1060 // Make an adjustment to get the last definition.
1061 StageNum -= StageDiff;
1062 }
1063 if (auto It = VRMap[StageNum].find(reg); It != VRMap[StageNum].end())
1064 MO.setReg(It->second);
1065 }
1066 }
1067}
1068
1069/// Return the instruction in the loop that defines the register.
1070/// If the definition is a Phi, then follow the Phi operand to
1071/// the instruction in the loop.
1072MachineInstr *ModuloScheduleExpander::findDefInLoop(Register Reg) {
1073 SmallPtrSet<MachineInstr *, 8> Visited;
1074 MachineInstr *Def = MRI.getVRegDef(Reg);
1075 while (Def->isPHI()) {
1076 if (!Visited.insert(Def).second)
1077 break;
1078 for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2)
1079 if (Def->getOperand(i + 1).getMBB() == BB) {
1080 Def = MRI.getVRegDef(Def->getOperand(i).getReg());
1081 break;
1082 }
1083 }
1084 return Def;
1085}
1086
1087/// Return the new name for the value from the previous stage.
1088Register ModuloScheduleExpander::getPrevMapVal(
1089 unsigned StageNum, unsigned PhiStage, Register LoopVal, unsigned LoopStage,
1090 ValueMapTy *VRMap, MachineBasicBlock *BB) {
1091 Register PrevVal;
1092 if (StageNum > PhiStage) {
1093 MachineInstr *LoopInst = MRI.getVRegDef(LoopVal);
1094 if (PhiStage == LoopStage && VRMap[StageNum - 1].count(LoopVal))
1095 // The name is defined in the previous stage.
1096 PrevVal = VRMap[StageNum - 1][LoopVal];
1097 else if (VRMap[StageNum].count(LoopVal))
1098 // The previous name is defined in the current stage when the instruction
1099 // order is swapped.
1100 PrevVal = VRMap[StageNum][LoopVal];
1101 else if (!LoopInst->isPHI() || LoopInst->getParent() != BB)
1102 // The loop value hasn't yet been scheduled.
1103 PrevVal = LoopVal;
1104 else if (StageNum == PhiStage + 1)
1105 // The loop value is another phi, which has not been scheduled.
1106 PrevVal = getInitPhiReg(*LoopInst, BB);
1107 else if (StageNum > PhiStage + 1 && LoopInst->getParent() == BB)
1108 // The loop value is another phi, which has been scheduled.
1109 PrevVal =
1110 getPrevMapVal(StageNum - 1, PhiStage, getLoopPhiReg(*LoopInst, BB),
1111 LoopStage, VRMap, BB);
1112 }
1113 return PrevVal;
1114}
1115
1116/// Rewrite the Phi values in the specified block to use the mappings
1117/// from the initial operand. Once the Phi is scheduled, we switch
1118/// to using the loop value instead of the Phi value, so those names
1119/// do not need to be rewritten.
1120void ModuloScheduleExpander::rewritePhiValues(MachineBasicBlock *NewBB,
1121 unsigned StageNum,
1122 ValueMapTy *VRMap,
1123 InstrMapTy &InstrMap) {
1124 for (auto &PHI : BB->phis()) {
1125 Register InitVal;
1126 Register LoopVal;
1127 getPhiRegs(PHI, BB, InitVal, LoopVal);
1128 Register PhiDef = PHI.getOperand(0).getReg();
1129
1130 unsigned PhiStage = (unsigned)Schedule.getStage(MRI.getVRegDef(PhiDef));
1131 unsigned LoopStage = (unsigned)Schedule.getStage(MRI.getVRegDef(LoopVal));
1132 unsigned NumPhis = getStagesForPhi(PhiDef);
1133 if (NumPhis > StageNum)
1134 NumPhis = StageNum;
1135 for (unsigned np = 0; np <= NumPhis; ++np) {
1136 Register NewVal =
1137 getPrevMapVal(StageNum - np, PhiStage, LoopVal, LoopStage, VRMap, BB);
1138 if (!NewVal)
1139 NewVal = InitVal;
1140 rewriteScheduledInstr(NewBB, InstrMap, StageNum - np, np, &PHI, PhiDef,
1141 NewVal);
1142 }
1143 }
1144}
1145
1146/// Rewrite a previously scheduled instruction to use the register value
1147/// from the new instruction. Make sure the instruction occurs in the
1148/// basic block, and we don't change the uses in the new instruction.
1149void ModuloScheduleExpander::rewriteScheduledInstr(
1150 MachineBasicBlock *BB, InstrMapTy &InstrMap, unsigned CurStageNum,
1151 unsigned PhiNum, MachineInstr *Phi, Register OldReg, Register NewReg,
1152 Register PrevReg) {
1153 bool InProlog = (CurStageNum < (unsigned)Schedule.getNumStages() - 1);
1154 int StagePhi = Schedule.getStage(Phi) + PhiNum;
1155 // Rewrite uses that have been scheduled already to use the new
1156 // Phi register.
1157 for (MachineOperand &UseOp :
1158 llvm::make_early_inc_range(MRI.use_operands(OldReg))) {
1159 MachineInstr *UseMI = UseOp.getParent();
1160 if (UseMI->getParent() != BB)
1161 continue;
1162 if (UseMI->isPHI()) {
1163 if (!Phi->isPHI() && UseMI->getOperand(0).getReg() == NewReg)
1164 continue;
1165 if (getLoopPhiReg(*UseMI, BB) != OldReg)
1166 continue;
1167 }
1168 InstrMapTy::iterator OrigInstr = InstrMap.find(UseMI);
1169 assert(OrigInstr != InstrMap.end() && "Instruction not scheduled.");
1170 MachineInstr *OrigMI = OrigInstr->second;
1171 int StageSched = Schedule.getStage(OrigMI);
1172 int CycleSched = Schedule.getCycle(OrigMI);
1173 Register ReplaceReg;
1174 // This is the stage for the scheduled instruction.
1175 if (StagePhi == StageSched && Phi->isPHI()) {
1176 int CyclePhi = Schedule.getCycle(Phi);
1177 if (PrevReg && InProlog)
1178 ReplaceReg = PrevReg;
1179 else if (PrevReg && !isLoopCarried(*Phi) &&
1180 (CyclePhi <= CycleSched || OrigMI->isPHI()))
1181 ReplaceReg = PrevReg;
1182 else
1183 ReplaceReg = NewReg;
1184 }
1185 // The scheduled instruction occurs before the scheduled Phi, and the
1186 // Phi is not loop carried.
1187 if (!InProlog && StagePhi + 1 == StageSched && !isLoopCarried(*Phi))
1188 ReplaceReg = NewReg;
1189 if (StagePhi > StageSched && Phi->isPHI())
1190 ReplaceReg = NewReg;
1191 if (!InProlog && !Phi->isPHI() && StagePhi < StageSched)
1192 ReplaceReg = NewReg;
1193 if (ReplaceReg) {
1194 const TargetRegisterClass *NRC =
1195 MRI.constrainRegClass(ReplaceReg, MRI.getRegClass(OldReg));
1196 if (NRC)
1197 UseOp.setReg(ReplaceReg);
1198 else {
1199 Register SplitReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg));
1200 MachineInstr *newCopy = BuildMI(*BB, UseMI, UseMI->getDebugLoc(),
1201 TII->get(TargetOpcode::COPY), SplitReg)
1202 .addReg(ReplaceReg);
1203 UseOp.setReg(SplitReg);
1204 LIS.InsertMachineInstrInMaps(*newCopy);
1205 }
1206 }
1207 }
1208}
1209
1210bool ModuloScheduleExpander::isLoopCarried(MachineInstr &Phi) {
1211 if (!Phi.isPHI())
1212 return false;
1213 int DefCycle = Schedule.getCycle(&Phi);
1214 int DefStage = Schedule.getStage(&Phi);
1215
1216 Register InitVal;
1217 Register LoopVal;
1218 getPhiRegs(Phi, Phi.getParent(), InitVal, LoopVal);
1219 MachineInstr *Use = MRI.getVRegDef(LoopVal);
1220 if (!Use || Use->isPHI())
1221 return true;
1222 int LoopCycle = Schedule.getCycle(Use);
1223 int LoopStage = Schedule.getStage(Use);
1224 return (LoopCycle > DefCycle) || (LoopStage <= DefStage);
1225}
1226
1227//===----------------------------------------------------------------------===//
1228// PeelingModuloScheduleExpander implementation
1229//===----------------------------------------------------------------------===//
1230// This is a reimplementation of ModuloScheduleExpander that works by creating
1231// a fully correct steady-state kernel and peeling off the prolog and epilogs.
1232//===----------------------------------------------------------------------===//
1233
1234namespace {
1235// Remove any dead phis in MBB. Dead phis either have only one block as input
1236// (in which case they are the identity) or have no uses.
1237void EliminateDeadPhis(MachineBasicBlock *MBB, MachineRegisterInfo &MRI,
1238 LiveIntervals *LIS, bool KeepSingleSrcPhi = false) {
1239 bool Changed = true;
1240 while (Changed) {
1241 Changed = false;
1243 assert(MI.isPHI());
1244 if (MRI.use_empty(MI.getOperand(0).getReg())) {
1245 if (LIS)
1247 MI.eraseFromParent();
1248 Changed = true;
1249 } else if (!KeepSingleSrcPhi && MI.getNumExplicitOperands() == 3) {
1250 const TargetRegisterClass *ConstrainRegClass =
1251 MRI.constrainRegClass(MI.getOperand(1).getReg(),
1252 MRI.getRegClass(MI.getOperand(0).getReg()));
1253 assert(ConstrainRegClass &&
1254 "Expected a valid constrained register class!");
1255 (void)ConstrainRegClass;
1256 MRI.replaceRegWith(MI.getOperand(0).getReg(),
1257 MI.getOperand(1).getReg());
1258 if (LIS)
1260 MI.eraseFromParent();
1261 Changed = true;
1262 }
1263 }
1264 }
1265}
1266
1267/// Rewrites the kernel block in-place to adhere to the given schedule.
1268/// KernelRewriter holds all of the state required to perform the rewriting.
1269class KernelRewriter {
1270 ModuloSchedule &S;
1271 MachineBasicBlock *BB;
1272 MachineBasicBlock *PreheaderBB, *ExitBB;
1273 MachineRegisterInfo &MRI;
1274 const TargetInstrInfo *TII;
1275 LiveIntervals *LIS;
1276
1277 // Map from register class to canonical undef register for that class.
1278 DenseMap<const TargetRegisterClass *, Register> Undefs;
1279 // Map from <LoopReg, InitReg> to phi register for all created phis. Note that
1280 // this map is only used when InitReg is non-undef.
1281 DenseMap<std::pair<Register, Register>, Register> Phis;
1282 // Map from LoopReg to phi register where the InitReg is undef.
1283 DenseMap<Register, Register> UndefPhis;
1284
1285 // Reg is used by MI. Return the new register MI should use to adhere to the
1286 // schedule. Insert phis as necessary.
1287 Register remapUse(Register Reg, MachineInstr &MI);
1288 // Insert a phi that carries LoopReg from the loop body and InitReg otherwise.
1289 // If InitReg is not given it is chosen arbitrarily. It will either be undef
1290 // or will be chosen so as to share another phi.
1291 Register phi(Register LoopReg, std::optional<Register> InitReg = {},
1292 const TargetRegisterClass *RC = nullptr);
1293 // Create an undef register of the given register class.
1294 Register undef(const TargetRegisterClass *RC);
1295
1296public:
1297 KernelRewriter(MachineLoop &L, ModuloSchedule &S, MachineBasicBlock *LoopBB,
1298 LiveIntervals *LIS = nullptr);
1299 void rewrite();
1300};
1301} // namespace
1302
1303KernelRewriter::KernelRewriter(MachineLoop &L, ModuloSchedule &S,
1304 MachineBasicBlock *LoopBB, LiveIntervals *LIS)
1305 : S(S), BB(LoopBB), PreheaderBB(L.getLoopPreheader()),
1306 ExitBB(L.getExitBlock()), MRI(BB->getParent()->getRegInfo()),
1307 TII(BB->getParent()->getSubtarget().getInstrInfo()), LIS(LIS) {
1308 PreheaderBB = *BB->pred_begin();
1309 if (PreheaderBB == BB)
1310 PreheaderBB = *std::next(BB->pred_begin());
1311}
1312
1313void KernelRewriter::rewrite() {
1314 // Rearrange the loop to be in schedule order. Note that the schedule may
1315 // contain instructions that are not owned by the loop block (InstrChanges and
1316 // friends), so we gracefully handle unowned instructions and delete any
1317 // instructions that weren't in the schedule.
1318 auto InsertPt = BB->getFirstTerminator();
1319 MachineInstr *FirstMI = nullptr;
1320 for (MachineInstr *MI : S.getInstructions()) {
1321 if (MI->isPHI())
1322 continue;
1323 if (MI->getParent())
1324 MI->removeFromParent();
1325 BB->insert(InsertPt, MI);
1326 if (!FirstMI)
1327 FirstMI = MI;
1328 }
1329 assert(FirstMI && "Failed to find first MI in schedule");
1330
1331 // At this point all of the scheduled instructions are between FirstMI
1332 // and the end of the block. Kill from the first non-phi to FirstMI.
1333 for (auto I = BB->getFirstNonPHI(); I != FirstMI->getIterator();) {
1334 if (LIS)
1336 (I++)->eraseFromParent();
1337 }
1338
1339 // Now remap every instruction in the loop.
1340 for (MachineInstr &MI : *BB) {
1341 if (MI.isPHI() || MI.isTerminator())
1342 continue;
1343 for (MachineOperand &MO : MI.uses()) {
1344 if (!MO.isReg() || MO.getReg().isPhysical() || MO.isImplicit())
1345 continue;
1346 Register Reg = remapUse(MO.getReg(), MI);
1347 MO.setReg(Reg);
1348 }
1349 }
1350 EliminateDeadPhis(BB, MRI, LIS);
1351
1352 // Ensure a phi exists for all instructions that are either referenced by
1353 // an illegal phi or by an instruction outside the loop. This allows us to
1354 // treat remaps of these values the same as "normal" values that come from
1355 // loop-carried phis.
1356 for (auto MI = BB->getFirstNonPHI(); MI != BB->end(); ++MI) {
1357 if (MI->isPHI()) {
1358 Register R = MI->getOperand(0).getReg();
1359 phi(R);
1360 continue;
1361 }
1362
1363 for (MachineOperand &Def : MI->defs()) {
1364 for (MachineInstr &MI : MRI.use_instructions(Def.getReg())) {
1365 if (MI.getParent() != BB) {
1366 phi(Def.getReg());
1367 break;
1368 }
1369 }
1370 }
1371 }
1372}
1373
1374Register KernelRewriter::remapUse(Register Reg, MachineInstr &MI) {
1375 MachineInstr *Producer = MRI.getUniqueVRegDef(Reg);
1376 if (!Producer)
1377 return Reg;
1378
1379 int ConsumerStage = S.getStage(&MI);
1380 if (!Producer->isPHI()) {
1381 // Non-phi producers are simple to remap. Insert as many phis as the
1382 // difference between the consumer and producer stages.
1383 if (Producer->getParent() != BB)
1384 // Producer was not inside the loop. Use the register as-is.
1385 return Reg;
1386 int ProducerStage = S.getStage(Producer);
1387 assert(ConsumerStage != -1 &&
1388 "In-loop consumer should always be scheduled!");
1389 assert(ConsumerStage >= ProducerStage);
1390 unsigned StageDiff = ConsumerStage - ProducerStage;
1391
1392 for (unsigned I = 0; I < StageDiff; ++I)
1393 Reg = phi(Reg);
1394 return Reg;
1395 }
1396
1397 // First, dive through the phi chain to find the defaults for the generated
1398 // phis.
1400 Register LoopReg = Reg;
1401 auto LoopProducer = Producer;
1402 while (LoopProducer->isPHI() && LoopProducer->getParent() == BB) {
1403 LoopReg = getLoopPhiReg(*LoopProducer, BB);
1404 Defaults.emplace_back(getInitPhiReg(*LoopProducer, BB));
1405 LoopProducer = MRI.getUniqueVRegDef(LoopReg);
1406 assert(LoopProducer);
1407 }
1408 int LoopProducerStage = S.getStage(LoopProducer);
1409
1410 std::optional<Register> IllegalPhiDefault;
1411
1412 if (LoopProducerStage == -1) {
1413 // Do nothing.
1414 } else if (LoopProducerStage > ConsumerStage) {
1415 // This schedule is only representable if ProducerStage == ConsumerStage+1.
1416 // In addition, Consumer's cycle must be scheduled after Producer in the
1417 // rescheduled loop. This is enforced by the pipeliner's ASAP and ALAP
1418 // functions.
1419#ifndef NDEBUG // Silence unused variables in non-asserts mode.
1420 int LoopProducerCycle = S.getCycle(LoopProducer);
1421 int ConsumerCycle = S.getCycle(&MI);
1422#endif
1423 assert(LoopProducerCycle <= ConsumerCycle);
1424 assert(LoopProducerStage == ConsumerStage + 1);
1425 // Peel off the first phi from Defaults and insert a phi between producer
1426 // and consumer. This phi will not be at the front of the block so we
1427 // consider it illegal. It will only exist during the rewrite process; it
1428 // needs to exist while we peel off prologs because these could take the
1429 // default value. After that we can replace all uses with the loop producer
1430 // value.
1431 IllegalPhiDefault = Defaults.front();
1432 Defaults.erase(Defaults.begin());
1433 } else {
1434 assert(ConsumerStage >= LoopProducerStage);
1435 int StageDiff = ConsumerStage - LoopProducerStage;
1436 if (StageDiff > 0) {
1437 LLVM_DEBUG(dbgs() << " -- padding defaults array from " << Defaults.size()
1438 << " to " << (Defaults.size() + StageDiff) << "\n");
1439 // If we need more phis than we have defaults for, pad out with undefs for
1440 // the earliest phis, which are at the end of the defaults chain (the
1441 // chain is in reverse order).
1442 Defaults.resize(Defaults.size() + StageDiff,
1443 Defaults.empty() ? std::optional<Register>()
1444 : Defaults.back());
1445 }
1446 }
1447
1448 // Now we know the number of stages to jump back, insert the phi chain.
1449 auto DefaultI = Defaults.rbegin();
1450 while (DefaultI != Defaults.rend())
1451 LoopReg = phi(LoopReg, *DefaultI++, MRI.getRegClass(Reg));
1452
1453 if (IllegalPhiDefault) {
1454 // The consumer optionally consumes LoopProducer in the same iteration
1455 // (because the producer is scheduled at an earlier cycle than the consumer)
1456 // or the initial value. To facilitate this we create an illegal block here
1457 // by embedding a phi in the middle of the block. We will fix this up
1458 // immediately prior to pruning.
1459 auto RC = MRI.getRegClass(Reg);
1460 Register R = MRI.createVirtualRegister(RC);
1461 MachineInstr *IllegalPhi =
1462 BuildMI(*BB, MI, DebugLoc(), TII->get(TargetOpcode::PHI), R)
1463 .addReg(*IllegalPhiDefault)
1464 .addMBB(PreheaderBB) // Block choice is arbitrary and has no effect.
1465 .addReg(LoopReg)
1466 .addMBB(BB); // Block choice is arbitrary and has no effect.
1467 // Illegal phi should belong to the producer stage so that it can be
1468 // filtered correctly during peeling.
1469 S.setStage(IllegalPhi, LoopProducerStage);
1470 return R;
1471 }
1472
1473 return LoopReg;
1474}
1475
1476Register KernelRewriter::phi(Register LoopReg, std::optional<Register> InitReg,
1477 const TargetRegisterClass *RC) {
1478 // If the init register is not undef, try and find an existing phi.
1479 if (InitReg) {
1480 auto I = Phis.find({LoopReg, *InitReg});
1481 if (I != Phis.end())
1482 return I->second;
1483 } else {
1484 for (auto &KV : Phis) {
1485 if (KV.first.first == LoopReg)
1486 return KV.second;
1487 }
1488 }
1489
1490 // InitReg is either undef or no existing phi takes InitReg as input. Try and
1491 // find a phi that takes undef as input.
1492 auto I = UndefPhis.find(LoopReg);
1493 if (I != UndefPhis.end()) {
1494 Register R = I->second;
1495 if (!InitReg)
1496 // Found a phi taking undef as input, and this input is undef so return
1497 // without any more changes.
1498 return R;
1499 // Found a phi taking undef as input, so rewrite it to take InitReg.
1500 MachineInstr *MI = MRI.getVRegDef(R);
1501 MI->getOperand(1).setReg(*InitReg);
1502 Phis.insert({{LoopReg, *InitReg}, R});
1503 const TargetRegisterClass *ConstrainRegClass =
1504 MRI.constrainRegClass(R, MRI.getRegClass(*InitReg));
1505 assert(ConstrainRegClass && "Expected a valid constrained register class!");
1506 (void)ConstrainRegClass;
1507 UndefPhis.erase(I);
1508 return R;
1509 }
1510
1511 // Failed to find any existing phi to reuse, so create a new one.
1512 if (!RC)
1513 RC = MRI.getRegClass(LoopReg);
1514 Register R = MRI.createVirtualRegister(RC);
1515 if (InitReg) {
1516 const TargetRegisterClass *ConstrainRegClass =
1517 MRI.constrainRegClass(R, MRI.getRegClass(*InitReg));
1518 assert(ConstrainRegClass && "Expected a valid constrained register class!");
1519 (void)ConstrainRegClass;
1520 }
1521 BuildMI(*BB, BB->getFirstNonPHI(), DebugLoc(), TII->get(TargetOpcode::PHI), R)
1522 .addReg(InitReg ? *InitReg : undef(RC))
1523 .addMBB(PreheaderBB)
1524 .addReg(LoopReg)
1525 .addMBB(BB);
1526 if (!InitReg)
1527 UndefPhis[LoopReg] = R;
1528 else
1529 Phis[{LoopReg, *InitReg}] = R;
1530 return R;
1531}
1532
1533Register KernelRewriter::undef(const TargetRegisterClass *RC) {
1534 Register &R = Undefs[RC];
1535 if (R == 0) {
1536 // Create an IMPLICIT_DEF that defines this register if we need it.
1537 // All uses of this should be removed by the time we have finished unrolling
1538 // prologs and epilogs.
1539 R = MRI.createVirtualRegister(RC);
1540 auto *InsertBB = &PreheaderBB->getParent()->front();
1541 BuildMI(*InsertBB, InsertBB->getFirstTerminator(), DebugLoc(),
1542 TII->get(TargetOpcode::IMPLICIT_DEF), R);
1543 }
1544 return R;
1545}
1546
1547namespace {
1548/// Describes an operand in the kernel of a pipelined loop. Characteristics of
1549/// the operand are discovered, such as how many in-loop PHIs it has to jump
1550/// through and defaults for these phis.
1551class KernelOperandInfo {
1552 MachineBasicBlock *BB;
1553 MachineRegisterInfo &MRI;
1554 SmallVector<Register, 4> PhiDefaults;
1555 MachineOperand *Source;
1556 MachineOperand *Target;
1557
1558public:
1559 KernelOperandInfo(MachineOperand *MO, MachineRegisterInfo &MRI,
1560 const SmallPtrSetImpl<MachineInstr *> &IllegalPhis)
1561 : MRI(MRI) {
1562 Source = MO;
1563 BB = MO->getParent()->getParent();
1564 while (isRegInLoop(MO)) {
1565 MachineInstr *MI = MRI.getVRegDef(MO->getReg());
1566 if (MI->isFullCopy()) {
1567 MO = &MI->getOperand(1);
1568 continue;
1569 }
1570 if (!MI->isPHI())
1571 break;
1572 // If this is an illegal phi, don't count it in distance.
1573 if (IllegalPhis.count(MI)) {
1574 MO = &MI->getOperand(3);
1575 continue;
1576 }
1577
1579 MO = MI->getOperand(2).getMBB() == BB ? &MI->getOperand(1)
1580 : &MI->getOperand(3);
1581 PhiDefaults.push_back(Default);
1582 }
1583 Target = MO;
1584 }
1585
1586 bool operator==(const KernelOperandInfo &Other) const {
1587 return PhiDefaults.size() == Other.PhiDefaults.size();
1588 }
1589
1590 void print(raw_ostream &OS) const {
1591 OS << "use of " << *Source << ": distance(" << PhiDefaults.size() << ") in "
1592 << *Source->getParent();
1593 }
1594
1595private:
1596 bool isRegInLoop(MachineOperand *MO) {
1597 return MO->isReg() && MO->getReg().isVirtual() &&
1598 MRI.getVRegDef(MO->getReg())->getParent() == BB;
1599 }
1600};
1601} // namespace
1602
1603MachineBasicBlock *
1606 if (LPD == LPD_Front)
1607 PeeledFront.push_back(NewBB);
1608 else
1609 PeeledBack.push_front(NewBB);
1610 for (auto I = BB->begin(), NI = NewBB->begin(); !I->isTerminator();
1611 ++I, ++NI) {
1612 CanonicalMIs[&*I] = &*I;
1613 CanonicalMIs[&*NI] = &*I;
1614 BlockMIs[{NewBB, &*I}] = &*NI;
1615 BlockMIs[{BB, &*I}] = &*I;
1616 }
1617 return NewBB;
1618}
1619
1621 int MinStage) {
1622 for (auto I = MB->getFirstInstrTerminator()->getReverseIterator();
1623 I != std::next(MB->getFirstNonPHI()->getReverseIterator());) {
1624 MachineInstr *MI = &*I++;
1625 int Stage = getStage(MI);
1626 if (Stage == -1 || Stage >= MinStage)
1627 continue;
1628
1629 for (MachineOperand &DefMO : MI->defs()) {
1631 for (MachineInstr &UseMI : MRI.use_instructions(DefMO.getReg())) {
1632 // Only PHIs can use values from this block by construction.
1633 // Match with the equivalent PHI in B.
1634 assert(UseMI.isPHI());
1635 Register Reg = getEquivalentRegisterIn(UseMI.getOperand(0).getReg(),
1636 MI->getParent());
1637 Subs.emplace_back(&UseMI, Reg);
1638 }
1639 for (auto &Sub : Subs)
1640 Sub.first->substituteRegister(DefMO.getReg(), Sub.second, /*SubIdx=*/0,
1641 *MRI.getTargetRegisterInfo());
1642 }
1643 if (LIS)
1644 LIS->RemoveMachineInstrFromMaps(*MI);
1645 MI->eraseFromParent();
1646 }
1647}
1648
1650 MachineBasicBlock *DestBB, MachineBasicBlock *SourceBB, unsigned Stage) {
1651 auto InsertPt = DestBB->getFirstNonPHI();
1654 llvm::make_range(SourceBB->getFirstNonPHI(), SourceBB->end()))) {
1655 if (MI.isPHI()) {
1656 // This is an illegal PHI. If we move any instructions using an illegal
1657 // PHI, we need to create a legal Phi.
1658 if (getStage(&MI) != Stage) {
1659 // The legal Phi is not necessary if the illegal phi's stage
1660 // is being moved.
1661 Register PhiR = MI.getOperand(0).getReg();
1662 auto RC = MRI.getRegClass(PhiR);
1663 Register NR = MRI.createVirtualRegister(RC);
1664 MachineInstr *NI = BuildMI(*DestBB, DestBB->getFirstNonPHI(),
1665 DebugLoc(), TII->get(TargetOpcode::PHI), NR)
1666 .addReg(PhiR)
1667 .addMBB(SourceBB);
1668 BlockMIs[{DestBB, CanonicalMIs[&MI]}] = NI;
1670 Remaps[PhiR] = NR;
1671 }
1672 }
1673 if (getStage(&MI) != Stage)
1674 continue;
1675 MI.removeFromParent();
1676 DestBB->insert(InsertPt, &MI);
1677 auto *KernelMI = CanonicalMIs[&MI];
1678 BlockMIs[{DestBB, KernelMI}] = &MI;
1679 BlockMIs.erase({SourceBB, KernelMI});
1680 }
1682 for (MachineInstr &MI : DestBB->phis()) {
1683 assert(MI.getNumOperands() == 3);
1684 MachineInstr *Def = MRI.getVRegDef(MI.getOperand(1).getReg());
1685 // If the instruction referenced by the phi is moved inside the block
1686 // we don't need the phi anymore.
1687 if (getStage(Def) == Stage) {
1688 Register PhiReg = MI.getOperand(0).getReg();
1689 assert(Def->findRegisterDefOperandIdx(MI.getOperand(1).getReg(),
1690 /*TRI=*/nullptr) != -1);
1691 MRI.replaceRegWith(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
1692 MI.getOperand(0).setReg(PhiReg);
1693 PhiToDelete.push_back(&MI);
1694 }
1695 }
1696 for (auto *P : PhiToDelete)
1697 P->eraseFromParent();
1698 InsertPt = DestBB->getFirstNonPHI();
1699 // Helper to clone Phi instructions into the destination block. We clone Phi
1700 // greedily to avoid combinatorial explosion of Phi instructions.
1701 auto clonePhi = [&](MachineInstr *Phi) {
1702 MachineInstr *NewMI = MF.CloneMachineInstr(Phi);
1703 DestBB->insert(InsertPt, NewMI);
1704 Register OrigR = Phi->getOperand(0).getReg();
1705 Register R = MRI.createVirtualRegister(MRI.getRegClass(OrigR));
1706 NewMI->getOperand(0).setReg(R);
1707 NewMI->getOperand(1).setReg(OrigR);
1708 NewMI->getOperand(2).setMBB(*DestBB->pred_begin());
1709 Remaps[OrigR] = R;
1710 CanonicalMIs[NewMI] = CanonicalMIs[Phi];
1711 BlockMIs[{DestBB, CanonicalMIs[Phi]}] = NewMI;
1713 return R;
1714 };
1715 for (auto I = DestBB->getFirstNonPHI(); I != DestBB->end(); ++I) {
1716 for (MachineOperand &MO : I->uses()) {
1717 if (!MO.isReg())
1718 continue;
1719 if (auto It = Remaps.find(MO.getReg()); It != Remaps.end())
1720 MO.setReg(It->second);
1721 else {
1722 // If we are using a phi from the source block we need to add a new phi
1723 // pointing to the old one.
1724 MachineInstr *Use = MRI.getUniqueVRegDef(MO.getReg());
1725 if (Use && Use->isPHI() && Use->getParent() == SourceBB) {
1726 Register R = clonePhi(Use);
1727 MO.setReg(R);
1728 }
1729 }
1730 }
1731 }
1732}
1733
1736 MachineInstr *Phi) {
1737 unsigned distance = PhiNodeLoopIteration[Phi];
1738 MachineInstr *CanonicalUse = CanonicalPhi;
1739 Register CanonicalUseReg = CanonicalUse->getOperand(0).getReg();
1740 for (unsigned I = 0; I < distance; ++I) {
1741 assert(CanonicalUse->isPHI());
1742 assert(CanonicalUse->getNumOperands() == 5);
1743 unsigned LoopRegIdx = 3, InitRegIdx = 1;
1744 if (CanonicalUse->getOperand(2).getMBB() == CanonicalUse->getParent())
1745 std::swap(LoopRegIdx, InitRegIdx);
1746 CanonicalUseReg = CanonicalUse->getOperand(LoopRegIdx).getReg();
1747 CanonicalUse = MRI.getVRegDef(CanonicalUseReg);
1748 }
1749 return CanonicalUseReg;
1750}
1751
1753 BitVector LS(Schedule.getNumStages(), true);
1754 BitVector AS(Schedule.getNumStages(), true);
1755 LiveStages[BB] = LS;
1756 AvailableStages[BB] = AS;
1757
1758 // Peel out the prologs.
1759 LS.reset();
1760 for (int I = 0; I < Schedule.getNumStages() - 1; ++I) {
1761 LS[I] = true;
1762 Prologs.push_back(peelKernel(LPD_Front));
1763 LiveStages[Prologs.back()] = LS;
1764 AvailableStages[Prologs.back()] = LS;
1765 }
1766
1767 // Create a block that will end up as the new loop exiting block (dominated by
1768 // all prologs and epilogs). It will only contain PHIs, in the same order as
1769 // BB's PHIs. This gives us a poor-man's LCSSA with the inductive property
1770 // that the exiting block is a (sub) clone of BB. This in turn gives us the
1771 // property that any value deffed in BB but used outside of BB is used by a
1772 // PHI in the exiting block.
1774 EliminateDeadPhis(ExitingBB, MRI, LIS, /*KeepSingleSrcPhi=*/true);
1775 // Push out the epilogs, again in reverse order.
1776 // We can't assume anything about the minumum loop trip count at this point,
1777 // so emit a fairly complex epilog.
1778
1779 // We first peel number of stages minus one epilogue. Then we remove dead
1780 // stages and reorder instructions based on their stage. If we have 3 stages
1781 // we generate first:
1782 // E0[3, 2, 1]
1783 // E1[3', 2']
1784 // E2[3'']
1785 // And then we move instructions based on their stages to have:
1786 // E0[3]
1787 // E1[2, 3']
1788 // E2[1, 2', 3'']
1789 // The transformation is legal because we only move instructions past
1790 // instructions of a previous loop iteration.
1791 for (int I = 1; I <= Schedule.getNumStages() - 1; ++I) {
1792 Epilogs.push_back(peelKernel(LPD_Back));
1793 MachineBasicBlock *B = Epilogs.back();
1794 filterInstructions(B, Schedule.getNumStages() - I);
1795 // Keep track at which iteration each phi belongs to. We need it to know
1796 // what version of the variable to use during prologue/epilogue stitching.
1797 EliminateDeadPhis(B, MRI, LIS, /*KeepSingleSrcPhi=*/true);
1798 for (MachineInstr &Phi : B->phis())
1799 PhiNodeLoopIteration[&Phi] = Schedule.getNumStages() - I;
1800 }
1801 for (size_t I = 0; I < Epilogs.size(); I++) {
1802 LS.reset();
1803 for (size_t J = I; J < Epilogs.size(); J++) {
1804 int Iteration = J;
1805 unsigned Stage = Schedule.getNumStages() - 1 + I - J;
1806 // Move stage one block at a time so that Phi nodes are updated correctly.
1807 for (size_t K = Iteration; K > I; K--)
1808 moveStageBetweenBlocks(Epilogs[K - 1], Epilogs[K], Stage);
1809 LS[Stage] = true;
1810 }
1811 LiveStages[Epilogs[I]] = LS;
1812 AvailableStages[Epilogs[I]] = AS;
1813 }
1814
1815 // Now we've defined all the prolog and epilog blocks as a fallthrough
1816 // sequence, add the edges that will be followed if the loop trip count is
1817 // lower than the number of stages (connecting prologs directly with epilogs).
1818 auto PI = Prologs.begin();
1819 auto EI = Epilogs.begin();
1820 assert(Prologs.size() == Epilogs.size());
1821 for (; PI != Prologs.end(); ++PI, ++EI) {
1822 MachineBasicBlock *Pred = *(*EI)->pred_begin();
1823 (*PI)->addSuccessor(*EI);
1824 for (MachineInstr &MI : (*EI)->phis()) {
1825 Register Reg = MI.getOperand(1).getReg();
1826 MachineInstr *Use = MRI.getUniqueVRegDef(Reg);
1827 if (Use && Use->getParent() == Pred) {
1828 MachineInstr *CanonicalUse = CanonicalMIs[Use];
1829 if (CanonicalUse->isPHI()) {
1830 // If the use comes from a phi we need to skip as many phi as the
1831 // distance between the epilogue and the kernel. Trace through the phi
1832 // chain to find the right value.
1833 Reg = getPhiCanonicalReg(CanonicalUse, Use);
1834 }
1835 Reg = getEquivalentRegisterIn(Reg, *PI);
1836 }
1837 MI.addOperand(MachineOperand::CreateReg(Reg, /*isDef=*/false));
1838 MI.addOperand(MachineOperand::CreateMBB(*PI));
1839 }
1840 }
1841
1842 // Create a list of all blocks in order.
1845 Blocks.push_back(BB);
1847
1848 // Iterate in reverse order over all instructions, remapping as we go.
1849 for (MachineBasicBlock *B : reverse(Blocks)) {
1850 for (auto I = B->instr_rbegin();
1851 I != std::next(B->getFirstNonPHI()->getReverseIterator());) {
1853 rewriteUsesOf(&*MI);
1854 }
1855 }
1856 for (auto *MI : IllegalPhisToDelete) {
1857 if (LIS)
1858 LIS->RemoveMachineInstrFromMaps(*MI);
1859 MI->eraseFromParent();
1860 }
1861 IllegalPhisToDelete.clear();
1862
1863 // Now all remapping has been done, we're free to optimize the generated code.
1864 for (MachineBasicBlock *B : reverse(Blocks))
1865 EliminateDeadPhis(B, MRI, LIS);
1866 EliminateDeadPhis(ExitingBB, MRI, LIS);
1867}
1868
1870 MachineFunction &MF = *BB->getParent();
1871 MachineBasicBlock *Exit = *BB->succ_begin();
1872 if (Exit == BB)
1873 Exit = *std::next(BB->succ_begin());
1874
1875 MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock(BB->getBasicBlock());
1876 MF.insert(std::next(BB->getIterator()), NewBB);
1877
1878 // Clone all phis in BB into NewBB and rewrite.
1879 for (MachineInstr &MI : BB->phis()) {
1880 auto RC = MRI.getRegClass(MI.getOperand(0).getReg());
1881 Register OldR = MI.getOperand(3).getReg();
1882 Register R = MRI.createVirtualRegister(RC);
1884 for (MachineInstr &Use : MRI.use_instructions(OldR))
1885 if (Use.getParent() != BB)
1886 Uses.push_back(&Use);
1887 for (MachineInstr *Use : Uses)
1888 Use->substituteRegister(OldR, R, /*SubIdx=*/0,
1889 *MRI.getTargetRegisterInfo());
1890 MachineInstr *NI = BuildMI(NewBB, DebugLoc(), TII->get(TargetOpcode::PHI), R)
1891 .addReg(OldR)
1892 .addMBB(BB);
1893 BlockMIs[{NewBB, &MI}] = NI;
1894 CanonicalMIs[NI] = &MI;
1895 }
1896 BB->replaceSuccessor(Exit, NewBB);
1897 Exit->replacePhiUsesWith(BB, NewBB);
1898 NewBB->addSuccessor(Exit);
1899
1900 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
1902 bool CanAnalyzeBr = !TII->analyzeBranch(*BB, TBB, FBB, Cond);
1903 (void)CanAnalyzeBr;
1904 assert(CanAnalyzeBr && "Must be able to analyze the loop branch!");
1905 TII->removeBranch(*BB);
1906 TII->insertBranch(*BB, TBB == Exit ? NewBB : TBB, FBB == Exit ? NewBB : FBB,
1907 Cond, DebugLoc());
1908 TII->insertUnconditionalBranch(*NewBB, Exit, DebugLoc());
1909 return NewBB;
1910}
1911
1915 MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
1916 unsigned OpIdx = MI->findRegisterDefOperandIdx(Reg, /*TRI=*/nullptr);
1917 return BlockMIs[{BB, CanonicalMIs[MI]}]->getOperand(OpIdx).getReg();
1918}
1919
1921 if (MI->isPHI()) {
1922 // This is an illegal PHI. The loop-carried (desired) value is operand 3,
1923 // and it is produced by this block.
1924 Register PhiR = MI->getOperand(0).getReg();
1925 Register R = MI->getOperand(3).getReg();
1926 int RMIStage = getStage(MRI.getUniqueVRegDef(R));
1927 if (RMIStage != -1 && !AvailableStages[MI->getParent()].test(RMIStage))
1928 R = MI->getOperand(1).getReg();
1929 MRI.setRegClass(R, MRI.getRegClass(PhiR));
1930 MRI.replaceRegWith(PhiR, R);
1931 // Postpone deleting the Phi as it may be referenced by BlockMIs and used
1932 // later to figure out how to remap registers.
1933 MI->getOperand(0).setReg(PhiR);
1934 IllegalPhisToDelete.push_back(MI);
1935 return;
1936 }
1937
1938 int Stage = getStage(MI);
1939 if (Stage == -1 || LiveStages.count(MI->getParent()) == 0 ||
1940 LiveStages[MI->getParent()].test(Stage))
1941 // Instruction is live, no rewriting to do.
1942 return;
1943
1944 for (MachineOperand &DefMO : MI->defs()) {
1946 for (MachineInstr &UseMI : MRI.use_instructions(DefMO.getReg())) {
1947 // Only PHIs can use values from this block by construction.
1948 // Match with the equivalent PHI in B.
1949 assert(UseMI.isPHI());
1950 Register Reg = getEquivalentRegisterIn(UseMI.getOperand(0).getReg(),
1951 MI->getParent());
1952 Subs.emplace_back(&UseMI, Reg);
1953 }
1954 for (auto &Sub : Subs)
1955 Sub.first->substituteRegister(DefMO.getReg(), Sub.second, /*SubIdx=*/0,
1956 *MRI.getTargetRegisterInfo());
1957 }
1958 if (LIS)
1959 LIS->RemoveMachineInstrFromMaps(*MI);
1960 MI->eraseFromParent();
1961}
1962
1964 // Work outwards from the kernel.
1965 bool KernelDisposed = false;
1966 int TC = Schedule.getNumStages() - 1;
1967 for (auto PI = Prologs.rbegin(), EI = Epilogs.rbegin(); PI != Prologs.rend();
1968 ++PI, ++EI, --TC) {
1970 MachineBasicBlock *Fallthrough = *Prolog->succ_begin();
1973 TII->removeBranch(*Prolog);
1974 std::optional<bool> StaticallyGreater =
1975 LoopInfo->createTripCountGreaterCondition(TC, *Prolog, Cond);
1976 if (!StaticallyGreater) {
1977 LLVM_DEBUG(dbgs() << "Dynamic: TC > " << TC << "\n");
1978 // Dynamically branch based on Cond.
1979 TII->insertBranch(*Prolog, Epilog, Fallthrough, Cond, DebugLoc());
1980 } else if (*StaticallyGreater == false) {
1981 LLVM_DEBUG(dbgs() << "Static-false: TC > " << TC << "\n");
1982 // Prolog never falls through; branch to epilog and orphan interior
1983 // blocks. Leave it to unreachable-block-elim to clean up.
1984 Prolog->removeSuccessor(Fallthrough);
1985 for (MachineInstr &P : Fallthrough->phis()) {
1986 P.removeOperand(2);
1987 P.removeOperand(1);
1988 }
1989 TII->insertUnconditionalBranch(*Prolog, Epilog, DebugLoc());
1990 KernelDisposed = true;
1991 } else {
1992 LLVM_DEBUG(dbgs() << "Static-true: TC > " << TC << "\n");
1993 // Prolog always falls through; remove incoming values in epilog.
1994 Prolog->removeSuccessor(Epilog);
1995 for (MachineInstr &P : Epilog->phis()) {
1996 P.removeOperand(4);
1997 P.removeOperand(3);
1998 }
1999 }
2000 }
2001
2002 if (!KernelDisposed) {
2003 LoopInfo->adjustTripCount(-(Schedule.getNumStages() - 1));
2004 LoopInfo->setPreheader(Prologs.back());
2005 } else {
2006 LoopInfo->disposed();
2007 }
2008}
2009
2011 KernelRewriter KR(*Schedule.getLoop(), Schedule, BB);
2012 KR.rewrite();
2013}
2014
2016 BB = Schedule.getLoop()->getTopBlock();
2017 Preheader = Schedule.getLoop()->getLoopPreheader();
2018 LLVM_DEBUG(Schedule.dump());
2019 LoopInfo = TII->analyzeLoopForPipelining(BB);
2021
2022 rewriteKernel();
2024 fixupBranches();
2025}
2026
2028 BB = Schedule.getLoop()->getTopBlock();
2029 Preheader = Schedule.getLoop()->getLoopPreheader();
2030
2031 // Dump the schedule before we invalidate and remap all its instructions.
2032 // Stash it in a string so we can print it if we found an error.
2033 std::string ScheduleDump;
2034 raw_string_ostream OS(ScheduleDump);
2035 Schedule.print(OS);
2036 OS.flush();
2037
2038 // First, run the normal ModuleScheduleExpander. We don't support any
2039 // InstrChanges.
2040 assert(LIS && "Requires LiveIntervals!");
2043 MSE.expand();
2044 MachineBasicBlock *ExpandedKernel = MSE.getRewrittenKernel();
2045 if (!ExpandedKernel) {
2046 // The expander optimized away the kernel. We can't do any useful checking.
2047 MSE.cleanup();
2048 return;
2049 }
2050 // Before running the KernelRewriter, re-add BB into the CFG.
2051 Preheader->addSuccessor(BB);
2052
2053 // Now run the new expansion algorithm.
2054 KernelRewriter KR(*Schedule.getLoop(), Schedule, BB);
2055 KR.rewrite();
2057
2058 // Collect all illegal phis that the new algorithm created. We'll give these
2059 // to KernelOperandInfo.
2061 for (auto NI = BB->getFirstNonPHI(); NI != BB->end(); ++NI) {
2062 if (NI->isPHI())
2063 IllegalPhis.insert(&*NI);
2064 }
2065
2066 // Co-iterate across both kernels. We expect them to be identical apart from
2067 // phis and full COPYs (we look through both).
2069 auto OI = ExpandedKernel->begin();
2070 auto NI = BB->begin();
2071 for (; !OI->isTerminator() && !NI->isTerminator(); ++OI, ++NI) {
2072 while (OI->isPHI() || OI->isFullCopy())
2073 ++OI;
2074 while (NI->isPHI() || NI->isFullCopy())
2075 ++NI;
2076 assert(OI->getOpcode() == NI->getOpcode() && "Opcodes don't match?!");
2077 // Analyze every operand separately.
2078 for (auto OOpI = OI->operands_begin(), NOpI = NI->operands_begin();
2079 OOpI != OI->operands_end(); ++OOpI, ++NOpI)
2080 KOIs.emplace_back(KernelOperandInfo(&*OOpI, MRI, IllegalPhis),
2081 KernelOperandInfo(&*NOpI, MRI, IllegalPhis));
2082 }
2083
2084 bool Failed = false;
2085 for (auto &OldAndNew : KOIs) {
2086 if (OldAndNew.first == OldAndNew.second)
2087 continue;
2088 Failed = true;
2089 errs() << "Modulo kernel validation error: [\n";
2090 errs() << " [golden] ";
2091 OldAndNew.first.print(errs());
2092 errs() << " ";
2093 OldAndNew.second.print(errs());
2094 errs() << "]\n";
2095 }
2096
2097 if (Failed) {
2098 errs() << "Golden reference kernel:\n";
2099 ExpandedKernel->print(errs());
2100 errs() << "New kernel:\n";
2101 BB->print(errs());
2102 errs() << ScheduleDump;
2104 "Modulo kernel validation (-pipeliner-experimental-cg) failed");
2105 }
2106
2107 // Cleanup by removing BB from the CFG again as the original
2108 // ModuloScheduleExpander intended.
2109 Preheader->removeSuccessor(BB);
2110 MSE.cleanup();
2111}
2112
2113MachineInstr *ModuloScheduleExpanderMVE::cloneInstr(MachineInstr *OldMI) {
2114 MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
2115
2116 // TODO: Offset information needs to be corrected.
2117 NewMI->dropMemRefs(MF);
2118
2119 return NewMI;
2120}
2121
2122/// Create a dedicated exit for Loop. Exit is the original exit for Loop.
2123/// If it is already dedicated exit, return it. Otherwise, insert a new
2124/// block between them and return the new block.
2126 MachineBasicBlock *Exit,
2127 LiveIntervals &LIS) {
2128 if (Exit->pred_size() == 1)
2129 return Exit;
2130
2131 MachineFunction *MF = Loop->getParent();
2133
2134 MachineBasicBlock *NewExit =
2135 MF->CreateMachineBasicBlock(Loop->getBasicBlock());
2136 MF->insert(Loop->getIterator(), NewExit);
2137 LIS.insertMBBInMaps(NewExit);
2138
2139 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
2141 TII->analyzeBranch(*Loop, TBB, FBB, Cond);
2142 if (TBB == Loop)
2143 FBB = NewExit;
2144 else if (FBB == Loop)
2145 TBB = NewExit;
2146 else
2147 llvm_unreachable("unexpected loop structure");
2148 TII->removeBranch(*Loop);
2149 TII->insertBranch(*Loop, TBB, FBB, Cond, DebugLoc());
2150 Loop->replaceSuccessor(Exit, NewExit);
2151 TII->insertUnconditionalBranch(*NewExit, Exit, DebugLoc());
2152 NewExit->addSuccessor(Exit);
2153
2154 Exit->replacePhiUsesWith(Loop, NewExit);
2155
2156 return NewExit;
2157}
2158
2159/// Insert branch code into the end of MBB. It branches to GreaterThan if the
2160/// remaining trip count for instructions in LastStage0Insts is greater than
2161/// RequiredTC, and to Otherwise otherwise.
2162void ModuloScheduleExpanderMVE::insertCondBranch(MachineBasicBlock &MBB,
2163 int RequiredTC,
2164 InstrMapTy &LastStage0Insts,
2165 MachineBasicBlock &GreaterThan,
2166 MachineBasicBlock &Otherwise) {
2168 LoopInfo->createRemainingIterationsGreaterCondition(RequiredTC, MBB, Cond,
2169 LastStage0Insts);
2170
2172 // Set SwapBranchTargetsMVE to true if a target prefers to replace TBB and
2173 // FBB for optimal performance.
2175 llvm_unreachable("can not reverse branch condition");
2176 TII->insertBranch(MBB, &Otherwise, &GreaterThan, Cond, DebugLoc());
2177 } else {
2178 TII->insertBranch(MBB, &GreaterThan, &Otherwise, Cond, DebugLoc());
2179 }
2180}
2181
2182/// Generate a pipelined loop that is unrolled by using MVE algorithm and any
2183/// other necessary blocks. The control flow is modified to execute the
2184/// pipelined loop if the trip count satisfies the condition, otherwise the
2185/// original loop. The original loop is also used to execute the remainder
2186/// iterations which occur due to unrolling.
2187void ModuloScheduleExpanderMVE::generatePipelinedLoop() {
2188 // The control flow for pipelining with MVE:
2189 //
2190 // OrigPreheader:
2191 // // The block that is originally the loop preheader
2192 // goto Check
2193 //
2194 // Check:
2195 // // Check whether the trip count satisfies the requirements to pipeline.
2196 // if (LoopCounter > NumStages + NumUnroll - 2)
2197 // // The minimum number of iterations to pipeline =
2198 // // iterations executed in prolog/epilog (NumStages-1) +
2199 // // iterations executed in one kernel run (NumUnroll)
2200 // goto Prolog
2201 // // fallback to the original loop
2202 // goto NewPreheader
2203 //
2204 // Prolog:
2205 // // All prolog stages. There are no direct branches to the epilogue.
2206 // goto NewKernel
2207 //
2208 // NewKernel:
2209 // // NumUnroll copies of the kernel
2210 // if (LoopCounter > MVE-1)
2211 // goto NewKernel
2212 // goto Epilog
2213 //
2214 // Epilog:
2215 // // All epilog stages.
2216 // if (LoopCounter > 0)
2217 // // The remainder is executed in the original loop
2218 // goto NewPreheader
2219 // goto NewExit
2220 //
2221 // NewPreheader:
2222 // // Newly created preheader for the original loop.
2223 // // The initial values of the phis in the loop are merged from two paths.
2224 // NewInitVal = Phi OrigInitVal, Check, PipelineLastVal, Epilog
2225 // goto OrigKernel
2226 //
2227 // OrigKernel:
2228 // // The original loop block.
2229 // if (LoopCounter != 0)
2230 // goto OrigKernel
2231 // goto NewExit
2232 //
2233 // NewExit:
2234 // // Newly created dedicated exit for the original loop.
2235 // // Merge values which are referenced after the loop
2236 // Merged = Phi OrigVal, OrigKernel, PipelineVal, Epilog
2237 // goto OrigExit
2238 //
2239 // OrigExit:
2240 // // The block that is originally the loop exit.
2241 // // If it is already deicated exit, NewExit is not created.
2242
2243 // An example of where each stage is executed:
2244 // Assume #Stages 3, #MVE 4, #Iterations 12
2245 // Iter 0 1 2 3 4 5 6 7 8 9 10-11
2246 // -------------------------------------------------
2247 // Stage 0 Prolog#0
2248 // Stage 1 0 Prolog#1
2249 // Stage 2 1 0 Kernel Unroll#0 Iter#0
2250 // Stage 2 1 0 Kernel Unroll#1 Iter#0
2251 // Stage 2 1 0 Kernel Unroll#2 Iter#0
2252 // Stage 2 1 0 Kernel Unroll#3 Iter#0
2253 // Stage 2 1 0 Kernel Unroll#0 Iter#1
2254 // Stage 2 1 0 Kernel Unroll#1 Iter#1
2255 // Stage 2 1 0 Kernel Unroll#2 Iter#1
2256 // Stage 2 1 0 Kernel Unroll#3 Iter#1
2257 // Stage 2 1 Epilog#0
2258 // Stage 2 Epilog#1
2259 // Stage 0-2 OrigKernel
2260
2261 LoopInfo = TII->analyzeLoopForPipelining(OrigKernel);
2262 assert(LoopInfo && "Must be able to analyze loop!");
2263
2264 calcNumUnroll();
2265
2266 Check = MF.CreateMachineBasicBlock(OrigKernel->getBasicBlock());
2267 Prolog = MF.CreateMachineBasicBlock(OrigKernel->getBasicBlock());
2268 NewKernel = MF.CreateMachineBasicBlock(OrigKernel->getBasicBlock());
2269 Epilog = MF.CreateMachineBasicBlock(OrigKernel->getBasicBlock());
2270 NewPreheader = MF.CreateMachineBasicBlock(OrigKernel->getBasicBlock());
2271
2272 MF.insert(OrigKernel->getIterator(), Check);
2274 MF.insert(OrigKernel->getIterator(), Prolog);
2276 MF.insert(OrigKernel->getIterator(), NewKernel);
2277 LIS.insertMBBInMaps(NewKernel);
2278 MF.insert(OrigKernel->getIterator(), Epilog);
2280 MF.insert(OrigKernel->getIterator(), NewPreheader);
2281 LIS.insertMBBInMaps(NewPreheader);
2282
2283 NewExit = createDedicatedExit(OrigKernel, OrigExit, LIS);
2284
2285 NewPreheader->transferSuccessorsAndUpdatePHIs(OrigPreheader);
2286 TII->insertUnconditionalBranch(*NewPreheader, OrigKernel, DebugLoc());
2287
2288 OrigPreheader->addSuccessor(Check);
2289 TII->removeBranch(*OrigPreheader);
2290 TII->insertUnconditionalBranch(*OrigPreheader, Check, DebugLoc());
2291
2292 Check->addSuccessor(Prolog);
2293 Check->addSuccessor(NewPreheader);
2294
2295 Prolog->addSuccessor(NewKernel);
2296
2297 NewKernel->addSuccessor(NewKernel);
2298 NewKernel->addSuccessor(Epilog);
2299
2300 Epilog->addSuccessor(NewPreheader);
2301 Epilog->addSuccessor(NewExit);
2302
2303 InstrMapTy LastStage0Insts;
2304 insertCondBranch(*Check, Schedule.getNumStages() + NumUnroll - 2,
2305 LastStage0Insts, *Prolog, *NewPreheader);
2306
2307 // VRMaps map (prolog/kernel/epilog phase#, original register#) to new
2308 // register#
2309 SmallVector<ValueMapTy> PrologVRMap, KernelVRMap, EpilogVRMap;
2310 generateProlog(PrologVRMap);
2311 generateKernel(PrologVRMap, KernelVRMap, LastStage0Insts);
2312 generateEpilog(KernelVRMap, EpilogVRMap, LastStage0Insts);
2313}
2314
2315/// Replace MI's use operands according to the maps.
2316void ModuloScheduleExpanderMVE::updateInstrUse(
2317 MachineInstr *MI, int StageNum, int PhaseNum,
2318 SmallVectorImpl<ValueMapTy> &CurVRMap,
2319 SmallVectorImpl<ValueMapTy> *PrevVRMap) {
2320 // If MI is in the prolog/kernel/epilog block, CurVRMap is
2321 // PrologVRMap/KernelVRMap/EpilogVRMap respectively.
2322 // PrevVRMap is nullptr/PhiVRMap/KernelVRMap respectively.
2323 // Refer to the appropriate map according to the stage difference between
2324 // MI and the definition of an operand.
2325
2326 for (MachineOperand &UseMO : MI->uses()) {
2327 if (!UseMO.isReg() || !UseMO.getReg().isVirtual())
2328 continue;
2329 int DiffStage = 0;
2330 Register OrigReg = UseMO.getReg();
2331 MachineInstr *DefInst = MRI.getVRegDef(OrigReg);
2332 if (!DefInst || DefInst->getParent() != OrigKernel)
2333 continue;
2334 Register InitReg;
2335 Register DefReg = OrigReg;
2336 if (DefInst->isPHI()) {
2337 ++DiffStage;
2338 Register LoopReg;
2339 getPhiRegs(*DefInst, OrigKernel, InitReg, LoopReg);
2340 // LoopReg is guaranteed to be defined within the loop by canApply()
2341 DefReg = LoopReg;
2342 DefInst = MRI.getVRegDef(LoopReg);
2343 }
2344 unsigned DefStageNum = Schedule.getStage(DefInst);
2345 DiffStage += StageNum - DefStageNum;
2346 Register NewReg;
2347 if (PhaseNum >= DiffStage && CurVRMap[PhaseNum - DiffStage].count(DefReg))
2348 // NewReg is defined in a previous phase of the same block
2349 NewReg = CurVRMap[PhaseNum - DiffStage][DefReg];
2350 else if (!PrevVRMap)
2351 // Since this is the first iteration, refer the initial register of the
2352 // loop
2353 NewReg = InitReg;
2354 else
2355 // Cases where DiffStage is larger than PhaseNum.
2356 // If MI is in the kernel block, the value is defined by the previous
2357 // iteration and PhiVRMap is referenced. If MI is in the epilog block, the
2358 // value is defined in the kernel block and KernelVRMap is referenced.
2359 NewReg = (*PrevVRMap)[PrevVRMap->size() - (DiffStage - PhaseNum)][DefReg];
2360
2361 const TargetRegisterClass *NRC =
2362 MRI.constrainRegClass(NewReg, MRI.getRegClass(OrigReg));
2363 if (NRC)
2364 UseMO.setReg(NewReg);
2365 else {
2366 Register SplitReg = MRI.createVirtualRegister(MRI.getRegClass(OrigReg));
2367 MachineInstr *NewCopy = BuildMI(*OrigKernel, MI, MI->getDebugLoc(),
2368 TII->get(TargetOpcode::COPY), SplitReg)
2369 .addReg(NewReg);
2370 LIS.InsertMachineInstrInMaps(*NewCopy);
2371 UseMO.setReg(SplitReg);
2372 }
2373 }
2374}
2375
2376/// Return a phi if Reg is referenced by the phi.
2377/// canApply() guarantees that at most only one such phi exists.
2379 for (MachineInstr &Phi : Loop->phis()) {
2380 Register InitVal, LoopVal;
2381 getPhiRegs(Phi, Loop, InitVal, LoopVal);
2382 if (LoopVal == Reg)
2383 return &Phi;
2384 }
2385 return nullptr;
2386}
2387
2388/// Generate phis for registers defined by OrigMI.
2389void ModuloScheduleExpanderMVE::generatePhi(
2390 MachineInstr *OrigMI, int UnrollNum,
2391 SmallVectorImpl<ValueMapTy> &PrologVRMap,
2392 SmallVectorImpl<ValueMapTy> &KernelVRMap,
2393 SmallVectorImpl<ValueMapTy> &PhiVRMap) {
2394 int StageNum = Schedule.getStage(OrigMI);
2395 bool UsePrologReg;
2396 if (Schedule.getNumStages() - NumUnroll + UnrollNum - 1 >= StageNum)
2397 UsePrologReg = true;
2398 else if (Schedule.getNumStages() - NumUnroll + UnrollNum == StageNum)
2399 UsePrologReg = false;
2400 else
2401 return;
2402
2403 // Examples that show which stages are merged by phi.
2404 // Meaning of the symbol following the stage number:
2405 // a/b: Stages with the same letter are merged (UsePrologReg == true)
2406 // +: Merged with the initial value (UsePrologReg == false)
2407 // *: No phis required
2408 //
2409 // #Stages 3, #MVE 4
2410 // Iter 0 1 2 3 4 5 6 7 8
2411 // -----------------------------------------
2412 // Stage 0a Prolog#0
2413 // Stage 1a 0b Prolog#1
2414 // Stage 2* 1* 0* Kernel Unroll#0
2415 // Stage 2* 1* 0+ Kernel Unroll#1
2416 // Stage 2* 1+ 0a Kernel Unroll#2
2417 // Stage 2+ 1a 0b Kernel Unroll#3
2418 //
2419 // #Stages 3, #MVE 2
2420 // Iter 0 1 2 3 4 5 6 7 8
2421 // -----------------------------------------
2422 // Stage 0a Prolog#0
2423 // Stage 1a 0b Prolog#1
2424 // Stage 2* 1+ 0a Kernel Unroll#0
2425 // Stage 2+ 1a 0b Kernel Unroll#1
2426 //
2427 // #Stages 3, #MVE 1
2428 // Iter 0 1 2 3 4 5 6 7 8
2429 // -----------------------------------------
2430 // Stage 0* Prolog#0
2431 // Stage 1a 0b Prolog#1
2432 // Stage 2+ 1a 0b Kernel Unroll#0
2433
2434 for (MachineOperand &DefMO : OrigMI->defs()) {
2435 if (!DefMO.isReg() || DefMO.isDead())
2436 continue;
2437 Register OrigReg = DefMO.getReg();
2438 auto NewReg = KernelVRMap[UnrollNum].find(OrigReg);
2439 if (NewReg == KernelVRMap[UnrollNum].end())
2440 continue;
2441 Register CorrespondReg;
2442 if (UsePrologReg) {
2443 int PrologNum = Schedule.getNumStages() - NumUnroll + UnrollNum - 1;
2444 CorrespondReg = PrologVRMap[PrologNum][OrigReg];
2445 } else {
2446 MachineInstr *Phi = getLoopPhiUser(OrigReg, OrigKernel);
2447 if (!Phi)
2448 continue;
2449 CorrespondReg = getInitPhiReg(*Phi, OrigKernel);
2450 }
2451
2452 assert(CorrespondReg.isValid());
2453 Register PhiReg = MRI.createVirtualRegister(MRI.getRegClass(OrigReg));
2454 MachineInstr *NewPhi =
2455 BuildMI(*NewKernel, NewKernel->getFirstNonPHI(), DebugLoc(),
2456 TII->get(TargetOpcode::PHI), PhiReg)
2457 .addReg(NewReg->second)
2458 .addMBB(NewKernel)
2459 .addReg(CorrespondReg)
2460 .addMBB(Prolog);
2461 LIS.InsertMachineInstrInMaps(*NewPhi);
2462 PhiVRMap[UnrollNum][OrigReg] = PhiReg;
2463 }
2464}
2465
2466static void replacePhiSrc(MachineInstr &Phi, Register OrigReg, Register NewReg,
2467 MachineBasicBlock *NewMBB) {
2468 for (unsigned Idx = 1; Idx < Phi.getNumOperands(); Idx += 2) {
2469 if (Phi.getOperand(Idx).getReg() == OrigReg) {
2470 Phi.getOperand(Idx).setReg(NewReg);
2471 Phi.getOperand(Idx + 1).setMBB(NewMBB);
2472 return;
2473 }
2474 }
2475}
2476
2477/// Generate phis that merge values from multiple routes
2478void ModuloScheduleExpanderMVE::mergeRegUsesAfterPipeline(Register OrigReg,
2479 Register NewReg) {
2480 SmallVector<MachineOperand *> UsesAfterLoop;
2482 for (MachineRegisterInfo::use_iterator I = MRI.use_begin(OrigReg),
2483 E = MRI.use_end();
2484 I != E; ++I) {
2485 MachineOperand &O = *I;
2486 if (O.getParent()->getParent() != OrigKernel &&
2487 O.getParent()->getParent() != Prolog &&
2488 O.getParent()->getParent() != NewKernel &&
2489 O.getParent()->getParent() != Epilog)
2490 UsesAfterLoop.push_back(&O);
2491 if (O.getParent()->getParent() == OrigKernel && O.getParent()->isPHI())
2492 LoopPhis.push_back(O.getParent());
2493 }
2494
2495 // Merge the route that only execute the pipelined loop (when there are no
2496 // remaining iterations) with the route that execute the original loop.
2497 if (!UsesAfterLoop.empty()) {
2498 Register PhiReg = MRI.createVirtualRegister(MRI.getRegClass(OrigReg));
2499 MachineInstr *NewPhi =
2500 BuildMI(*NewExit, NewExit->getFirstNonPHI(), DebugLoc(),
2501 TII->get(TargetOpcode::PHI), PhiReg)
2502 .addReg(OrigReg)
2503 .addMBB(OrigKernel)
2504 .addReg(NewReg)
2505 .addMBB(Epilog);
2506 LIS.InsertMachineInstrInMaps(*NewPhi);
2507
2508 for (MachineOperand *MO : UsesAfterLoop)
2509 MO->setReg(PhiReg);
2510
2511 // The interval of OrigReg is invalid and should be recalculated when
2512 // LiveInterval::getInterval() is called.
2513 if (LIS.hasInterval(OrigReg))
2514 LIS.removeInterval(OrigReg);
2515 }
2516
2517 // Merge routes from the pipelined loop and the bypassed route before the
2518 // original loop
2519 if (!LoopPhis.empty()) {
2520 for (MachineInstr *Phi : LoopPhis) {
2521 Register InitReg, LoopReg;
2522 getPhiRegs(*Phi, OrigKernel, InitReg, LoopReg);
2523 Register NewInit = MRI.createVirtualRegister(MRI.getRegClass(InitReg));
2524 MachineInstr *NewPhi =
2525 BuildMI(*NewPreheader, NewPreheader->getFirstNonPHI(),
2526 Phi->getDebugLoc(), TII->get(TargetOpcode::PHI), NewInit)
2527 .addReg(InitReg)
2528 .addMBB(Check)
2529 .addReg(NewReg)
2530 .addMBB(Epilog);
2531 LIS.InsertMachineInstrInMaps(*NewPhi);
2532 replacePhiSrc(*Phi, InitReg, NewInit, NewPreheader);
2533 }
2534 }
2535}
2536
2537void ModuloScheduleExpanderMVE::generateProlog(
2538 SmallVectorImpl<ValueMapTy> &PrologVRMap) {
2539 PrologVRMap.clear();
2540 PrologVRMap.resize(Schedule.getNumStages() - 1);
2541 DenseMap<MachineInstr *, std::pair<int, int>> NewMIMap;
2542 for (int PrologNum = 0; PrologNum < Schedule.getNumStages() - 1;
2543 ++PrologNum) {
2544 for (MachineInstr *MI : Schedule.getInstructions()) {
2545 if (MI->isPHI())
2546 continue;
2547 int StageNum = Schedule.getStage(MI);
2548 if (StageNum > PrologNum)
2549 continue;
2550 MachineInstr *NewMI = cloneInstr(MI);
2551 updateInstrDef(NewMI, PrologVRMap[PrologNum], false);
2552 NewMIMap[NewMI] = {PrologNum, StageNum};
2553 Prolog->push_back(NewMI);
2554 LIS.InsertMachineInstrInMaps(*NewMI);
2555 }
2556 }
2557
2558 for (auto I : NewMIMap) {
2559 MachineInstr *MI = I.first;
2560 int PrologNum = I.second.first;
2561 int StageNum = I.second.second;
2562 updateInstrUse(MI, StageNum, PrologNum, PrologVRMap, nullptr);
2563 }
2564
2565 LLVM_DEBUG({
2566 dbgs() << "prolog:\n";
2567 Prolog->dump();
2568 });
2569}
2570
2571void ModuloScheduleExpanderMVE::generateKernel(
2572 SmallVectorImpl<ValueMapTy> &PrologVRMap,
2573 SmallVectorImpl<ValueMapTy> &KernelVRMap, InstrMapTy &LastStage0Insts) {
2574 KernelVRMap.clear();
2575 KernelVRMap.resize(NumUnroll);
2576 SmallVector<ValueMapTy> PhiVRMap;
2577 PhiVRMap.resize(NumUnroll);
2578 DenseMap<MachineInstr *, std::pair<int, int>> NewMIMap;
2579 for (int UnrollNum = 0; UnrollNum < NumUnroll; ++UnrollNum) {
2580 for (MachineInstr *MI : Schedule.getInstructions()) {
2581 if (MI->isPHI())
2582 continue;
2583 int StageNum = Schedule.getStage(MI);
2584 MachineInstr *NewMI = cloneInstr(MI);
2585 if (UnrollNum == NumUnroll - 1)
2586 LastStage0Insts[MI] = NewMI;
2587 updateInstrDef(NewMI, KernelVRMap[UnrollNum],
2588 (UnrollNum == NumUnroll - 1 && StageNum == 0));
2589 generatePhi(MI, UnrollNum, PrologVRMap, KernelVRMap, PhiVRMap);
2590 NewMIMap[NewMI] = {UnrollNum, StageNum};
2591 NewKernel->push_back(NewMI);
2592 LIS.InsertMachineInstrInMaps(*NewMI);
2593 }
2594 }
2595
2596 for (auto I : NewMIMap) {
2597 MachineInstr *MI = I.first;
2598 int UnrollNum = I.second.first;
2599 int StageNum = I.second.second;
2600 updateInstrUse(MI, StageNum, UnrollNum, KernelVRMap, &PhiVRMap);
2601 }
2602
2603 // If remaining trip count is greater than NumUnroll-1, loop continues
2604 insertCondBranch(*NewKernel, NumUnroll - 1, LastStage0Insts, *NewKernel,
2605 *Epilog);
2606
2607 LLVM_DEBUG({
2608 dbgs() << "kernel:\n";
2609 NewKernel->dump();
2610 });
2611}
2612
2613void ModuloScheduleExpanderMVE::generateEpilog(
2614 SmallVectorImpl<ValueMapTy> &KernelVRMap,
2615 SmallVectorImpl<ValueMapTy> &EpilogVRMap, InstrMapTy &LastStage0Insts) {
2616 EpilogVRMap.clear();
2617 EpilogVRMap.resize(Schedule.getNumStages() - 1);
2618 DenseMap<MachineInstr *, std::pair<int, int>> NewMIMap;
2619 for (int EpilogNum = 0; EpilogNum < Schedule.getNumStages() - 1;
2620 ++EpilogNum) {
2621 for (MachineInstr *MI : Schedule.getInstructions()) {
2622 if (MI->isPHI())
2623 continue;
2624 int StageNum = Schedule.getStage(MI);
2625 if (StageNum <= EpilogNum)
2626 continue;
2627 MachineInstr *NewMI = cloneInstr(MI);
2628 updateInstrDef(NewMI, EpilogVRMap[EpilogNum], StageNum - 1 == EpilogNum);
2629 NewMIMap[NewMI] = {EpilogNum, StageNum};
2630 Epilog->push_back(NewMI);
2631 LIS.InsertMachineInstrInMaps(*NewMI);
2632 }
2633 }
2634
2635 for (auto I : NewMIMap) {
2636 MachineInstr *MI = I.first;
2637 int EpilogNum = I.second.first;
2638 int StageNum = I.second.second;
2639 updateInstrUse(MI, StageNum, EpilogNum, EpilogVRMap, &KernelVRMap);
2640 }
2641
2642 // If there are remaining iterations, they are executed in the original loop.
2643 // Instructions related to loop control, such as loop counter comparison,
2644 // are indicated by shouldIgnoreForPipelining() and are assumed to be placed
2645 // in stage 0. Thus, the map is for the last one in the kernel.
2646 insertCondBranch(*Epilog, 0, LastStage0Insts, *NewPreheader, *NewExit);
2647
2648 LLVM_DEBUG({
2649 dbgs() << "epilog:\n";
2650 Epilog->dump();
2651 });
2652}
2653
2654/// Calculate the number of unroll required and set it to NumUnroll
2655void ModuloScheduleExpanderMVE::calcNumUnroll() {
2656 DenseMap<MachineInstr *, unsigned> Inst2Idx;
2657 NumUnroll = 1;
2658 for (unsigned I = 0; I < Schedule.getInstructions().size(); ++I)
2659 Inst2Idx[Schedule.getInstructions()[I]] = I;
2660
2661 for (MachineInstr *MI : Schedule.getInstructions()) {
2662 if (MI->isPHI())
2663 continue;
2664 int StageNum = Schedule.getStage(MI);
2665 for (const MachineOperand &MO : MI->uses()) {
2666 if (!MO.isReg() || !MO.getReg().isVirtual())
2667 continue;
2668 MachineInstr *DefMI = MRI.getVRegDef(MO.getReg());
2669 if (DefMI->getParent() != OrigKernel)
2670 continue;
2671
2672 int NumUnrollLocal = 1;
2673 if (DefMI->isPHI()) {
2674 ++NumUnrollLocal;
2675 // canApply() guarantees that DefMI is not phi and is an instruction in
2676 // the loop
2677 DefMI = MRI.getVRegDef(getLoopPhiReg(*DefMI, OrigKernel));
2678 }
2679 NumUnrollLocal += StageNum - Schedule.getStage(DefMI);
2680 if (Inst2Idx[MI] <= Inst2Idx[DefMI])
2681 --NumUnrollLocal;
2682 NumUnroll = std::max(NumUnroll, NumUnrollLocal);
2683 }
2684 }
2685 LLVM_DEBUG(dbgs() << "NumUnroll: " << NumUnroll << "\n");
2686}
2687
2688/// Create new virtual registers for definitions of NewMI and update NewMI.
2689/// If the definitions are referenced after the pipelined loop, phis are
2690/// created to merge with other routes.
2691void ModuloScheduleExpanderMVE::updateInstrDef(MachineInstr *NewMI,
2692 ValueMapTy &VRMap,
2693 bool LastDef) {
2694 for (MachineOperand &MO : NewMI->all_defs()) {
2695 if (!MO.getReg().isVirtual())
2696 continue;
2697 Register Reg = MO.getReg();
2698 const TargetRegisterClass *RC = MRI.getRegClass(Reg);
2699 Register NewReg = MRI.createVirtualRegister(RC);
2700 MO.setReg(NewReg);
2701 VRMap[Reg] = NewReg;
2702 if (LastDef)
2703 mergeRegUsesAfterPipeline(Reg, NewReg);
2704 }
2705}
2706
2708 OrigKernel = Schedule.getLoop()->getTopBlock();
2709 OrigPreheader = Schedule.getLoop()->getLoopPreheader();
2710 OrigExit = Schedule.getLoop()->getExitBlock();
2711
2712 LLVM_DEBUG(Schedule.dump());
2713
2714 generatePipelinedLoop();
2715}
2716
2717/// Check if ModuloScheduleExpanderMVE can be applied to L
2719 if (!L.getExitBlock()) {
2720 LLVM_DEBUG(dbgs() << "Can not apply MVE expander: No single exit block.\n");
2721 return false;
2722 }
2723
2724 MachineBasicBlock *BB = L.getTopBlock();
2726
2727 // Put some constraints on the operands of the phis to simplify the
2728 // transformation
2729 DenseSet<Register> UsedByPhi;
2730 for (MachineInstr &MI : BB->phis()) {
2731 // Registers defined by phis must be used only inside the loop and be never
2732 // used by phis.
2733 for (MachineOperand &MO : MI.defs())
2734 if (MO.isReg())
2735 for (MachineInstr &Ref : MRI.use_instructions(MO.getReg()))
2736 if (Ref.getParent() != BB || Ref.isPHI()) {
2737 LLVM_DEBUG(dbgs() << "Can not apply MVE expander: A phi result is "
2738 "referenced outside of the loop or by phi.\n");
2739 return false;
2740 }
2741
2742 // A source register from the loop block must be defined inside the loop.
2743 // A register defined inside the loop must be referenced by only one phi at
2744 // most.
2745 Register InitVal, LoopVal;
2746 getPhiRegs(MI, MI.getParent(), InitVal, LoopVal);
2747 if (!Register(LoopVal).isVirtual() ||
2748 MRI.getVRegDef(LoopVal)->getParent() != BB) {
2749 LLVM_DEBUG(
2750 dbgs() << "Can not apply MVE expander: A phi source value coming "
2751 "from the loop is not defined in the loop.\n");
2752 return false;
2753 }
2754 if (UsedByPhi.count(LoopVal)) {
2755 LLVM_DEBUG(dbgs() << "Can not apply MVE expander: A value defined in the "
2756 "loop is referenced by two or more phis.\n");
2757 return false;
2758 }
2759 UsedByPhi.insert(LoopVal);
2760 }
2761
2762 return true;
2763}
2764
2765//===----------------------------------------------------------------------===//
2766// ModuloScheduleTestPass implementation
2767//===----------------------------------------------------------------------===//
2768// This pass constructs a ModuloSchedule from its module and runs
2769// ModuloScheduleExpander.
2770//
2771// The module is expected to contain a single-block analyzable loop.
2772// The total order of instructions is taken from the loop as-is.
2773// Instructions are expected to be annotated with a PostInstrSymbol.
2774// This PostInstrSymbol must have the following format:
2775// "Stage=%d Cycle=%d".
2776//===----------------------------------------------------------------------===//
2777
2778namespace {
2779class ModuloScheduleTest : public MachineFunctionPass {
2780public:
2781 static char ID;
2782
2783 ModuloScheduleTest() : MachineFunctionPass(ID) {
2785 }
2786
2787 bool runOnMachineFunction(MachineFunction &MF) override;
2788 void runOnLoop(MachineFunction &MF, MachineLoop &L);
2789
2790 void getAnalysisUsage(AnalysisUsage &AU) const override {
2794 }
2795};
2796} // namespace
2797
2798char ModuloScheduleTest::ID = 0;
2799
2800INITIALIZE_PASS_BEGIN(ModuloScheduleTest, "modulo-schedule-test",
2801 "Modulo Schedule test pass", false, false)
2804INITIALIZE_PASS_END(ModuloScheduleTest, "modulo-schedule-test",
2805 "Modulo Schedule test pass", false, false)
2806
2807bool ModuloScheduleTest::runOnMachineFunction(MachineFunction &MF) {
2808 MachineLoopInfo &MLI = getAnalysis<MachineLoopInfoWrapperPass>().getLI();
2809 for (auto *L : MLI) {
2810 if (L->getTopBlock() != L->getBottomBlock())
2811 continue;
2812 runOnLoop(MF, *L);
2813 return false;
2814 }
2815 return false;
2816}
2817
2818static void parseSymbolString(StringRef S, int &Cycle, int &Stage) {
2819 std::pair<StringRef, StringRef> StageAndCycle = getToken(S, "_");
2820 std::pair<StringRef, StringRef> StageTokenAndValue =
2821 getToken(StageAndCycle.first, "-");
2822 std::pair<StringRef, StringRef> CycleTokenAndValue =
2823 getToken(StageAndCycle.second, "-");
2824 if (StageTokenAndValue.first != "Stage" ||
2825 CycleTokenAndValue.first != "_Cycle") {
2827 "Bad post-instr symbol syntax: see comment in ModuloScheduleTest");
2828 return;
2829 }
2830
2831 StageTokenAndValue.second.drop_front().getAsInteger(10, Stage);
2832 CycleTokenAndValue.second.drop_front().getAsInteger(10, Cycle);
2833
2834 dbgs() << " Stage=" << Stage << ", Cycle=" << Cycle << "\n";
2835}
2836
2837void ModuloScheduleTest::runOnLoop(MachineFunction &MF, MachineLoop &L) {
2838 LiveIntervals &LIS = getAnalysis<LiveIntervalsWrapperPass>().getLIS();
2839 MachineBasicBlock *BB = L.getTopBlock();
2840 dbgs() << "--- ModuloScheduleTest running on BB#" << BB->getNumber() << "\n";
2841
2842 DenseMap<MachineInstr *, int> Cycle, Stage;
2843 std::vector<MachineInstr *> Instrs;
2844 for (MachineInstr &MI : *BB) {
2845 if (MI.isTerminator())
2846 continue;
2847 Instrs.push_back(&MI);
2848 if (MCSymbol *Sym = MI.getPostInstrSymbol()) {
2849 dbgs() << "Parsing post-instr symbol for " << MI;
2850 parseSymbolString(Sym->getName(), Cycle[&MI], Stage[&MI]);
2851 }
2852 }
2853
2854 ModuloSchedule MS(MF, &L, std::move(Instrs), std::move(Cycle),
2855 std::move(Stage));
2856 ModuloScheduleExpander MSE(
2857 MF, MS, LIS, /*InstrChanges=*/ModuloScheduleExpander::InstrChangesTy());
2858 MSE.expand();
2859 MSE.cleanup();
2860}
2861
2862//===----------------------------------------------------------------------===//
2863// ModuloScheduleTestAnnotater implementation
2864//===----------------------------------------------------------------------===//
2865
2867 for (MachineInstr *MI : S.getInstructions()) {
2869 raw_svector_ostream OS(SV);
2870 OS << "Stage-" << S.getStage(MI) << "_Cycle-" << S.getCycle(MI);
2871 MCSymbol *Sym = MF.getContext().getOrCreateSymbol(OS.str());
2872 MI->setPostInstrSymbol(MF, Sym);
2873 }
2874}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
static Register cloneInstr(const MachineInstr *MI, unsigned ReplaceOprNum, Register ReplaceReg, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertTo)
Clone an instruction from MI.
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Rewrite undef for PHI
MachineBasicBlock & MBB
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static const Function * getParent(const Value *V)
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
@ Default
#define Check(C,...)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition MD5.cpp:58
static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop, Register &InitVal, Register &LoopVal)
Return the register values for the operands of a Phi instruction.
static Register getLoopPhiReg(const MachineInstr &Phi, const MachineBasicBlock *LoopBB)
Return the Phi register value that comes the loop block.
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
This file provides utility analysis objects describing memory locations.
static bool hasUseAfterLoop(Register Reg, MachineBasicBlock *BB, MachineRegisterInfo &MRI)
Return true if the register has a use that occurs outside the specified loop.
static void replaceRegUsesAfterLoop(Register FromReg, Register ToReg, MachineBasicBlock *MBB, MachineRegisterInfo &MRI)
Replace all uses of FromReg that appear outside the specified basic block with ToReg.
static void replacePhiSrc(MachineInstr &Phi, Register OrigReg, Register NewReg, MachineBasicBlock *NewMBB)
static MachineInstr * getLoopPhiUser(Register Reg, MachineBasicBlock *Loop)
Return a phi if Reg is referenced by the phi.
static MachineBasicBlock * createDedicatedExit(MachineBasicBlock *Loop, MachineBasicBlock *Exit, LiveIntervals &LIS)
Create a dedicated exit for Loop.
static void parseSymbolString(StringRef S, int &Cycle, int &Stage)
static cl::opt< bool > SwapBranchTargetsMVE("pipeliner-swap-branch-targets-mve", cl::Hidden, cl::init(false), cl::desc("Swap target blocks of a conditional branch for MVE expander"))
static Register getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB)
Return the Phi register value that comes from the incoming block.
MachineInstr unsigned OpIdx
#define P(N)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
This file contains some functions that are useful when dealing with strings.
#define LLVM_DEBUG(...)
Definition Debug.h:114
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
A debug info location.
Definition DebugLoc.h:124
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:167
bool erase(const KeyT &Val)
Definition DenseMap.h:311
DenseMapIterator< KeyT, ValueT, KeyInfoT, BucketT > iterator
Definition DenseMap.h:74
iterator end()
Definition DenseMap.h:81
Implements a dense probed hash-table based set.
Definition DenseSet.h:279
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
Remove the branching code at the end of the specific MBB.
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
Reverses the branch condition of the specified condition list, returning false on success and true if...
std::unique_ptr< PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override
Analyze loop L, which must be a single-basic-block loop, and if the conditions can be understood enou...
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
Insert branch code into the end of the specified MachineBasicBlock.
bool hasInterval(Register Reg) const
SlotIndex InsertMachineInstrInMaps(MachineInstr &MI)
void insertMBBInMaps(MachineBasicBlock *MBB)
void RemoveMachineInstrFromMaps(MachineInstr &MI)
void removeInterval(Register Reg)
Interval removal.
static constexpr LocationSize beforeOrAfterPointer()
Any location before or after the base pointer (but still within the underlying object).
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition MCSymbol.h:42
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1569
LLVM_ABI void replacePhiUsesWith(MachineBasicBlock *Old, MachineBasicBlock *New)
Update all phi nodes in this basic block to refer to basic block New instead of basic block Old.
LLVM_ABI void replaceSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New)
Replace successor OLD with NEW and update probability info.
LLVM_ABI void transferSuccessors(MachineBasicBlock *FromMBB)
Transfers all the successors from MBB to this machine basic block (i.e., copies all the successors Fr...
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator_range< iterator > phis()
Returns a range that iterates over the phis in the basic block.
reverse_instr_iterator instr_rbegin()
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
void push_back(MachineInstr *MI)
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI void dump() const
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
SmallVectorImpl< MachineBasicBlock * >::iterator succ_iterator
LLVM_ABI void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
LLVM_ABI iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
LLVM_ABI void print(raw_ostream &OS, const SlotIndexes *=nullptr, bool IsStandalone=true) const
reverse_instr_iterator instr_rend()
Instructions::iterator instr_iterator
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< iterator > terminators()
LLVM_ABI instr_iterator getFirstInstrTerminator()
Same getFirstTerminator but it ignores bundles and return an instr_iterator instead.
MachineInstrBundleIterator< MachineInstr > iterator
Instructions::reverse_iterator reverse_instr_iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineBasicBlock & front() const
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
mop_range defs()
Returns all explicit operands that are register definitions.
const MachineBasicBlock * getParent() const
filtered_mop_range all_defs()
Returns an iterator range over all operands that are (explicit or implicit) register defs.
unsigned getNumOperands() const
Retuns the total number of operands.
bool memoperands_empty() const
Return true if we don't have any memory operands which described the memory access done by this instr...
LLVM_ABI void setMemRefs(MachineFunction &MF, ArrayRef< MachineMemOperand * > MemRefs)
Assign this MachineInstr's memory reference descriptor list.
LLVM_ABI void dropMemRefs(MachineFunction &MF)
Clear this MachineInstr's memory reference descriptor list.
mop_range operands()
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
void setImm(int64_t immVal)
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setMBB(MachineBasicBlock *MBB)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
static MachineOperand CreateMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
defusechain_instr_iterator< true, false, false, true > use_instr_iterator
use_instr_iterator/use_instr_begin/use_instr_end - Walk all uses of the specified register,...
defusechain_iterator< true, false, false, true, false > use_iterator
use_iterator/use_begin/use_end - Walk all uses of the specified register.
static bool canApply(MachineLoop &L)
Check if ModuloScheduleExpanderMVE can be applied to L.
The ModuloScheduleExpander takes a ModuloSchedule and expands it in-place, rewriting the old loop and...
MachineBasicBlock * getRewrittenKernel()
Returns the newly rewritten kernel block, or nullptr if this was optimized away.
void cleanup()
Performs final cleanup after expansion.
void expand()
Performs the actual expansion.
DenseMap< MachineInstr *, std::pair< Register, int64_t > > InstrChangesTy
void annotate()
Performs the annotation.
Represents a schedule for a single-block loop.
int getNumStages() const
Return the number of stages contained in this schedule, which is the largest stage index + 1.
ArrayRef< MachineInstr * > getInstructions()
Return the rescheduled instructions in order.
void print(raw_ostream &OS)
int getCycle(MachineInstr *MI)
Return the cycle that MI is scheduled at, or -1.
void setStage(MachineInstr *MI, int MIStage)
Set the stage of a newly created instruction.
int getStage(MachineInstr *MI)
Return the stage that MI is scheduled in, or -1.
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
std::deque< MachineBasicBlock * > PeeledBack
SmallVector< MachineInstr *, 4 > IllegalPhisToDelete
Illegal phis that need to be deleted once we re-link stages.
DenseMap< MachineInstr *, MachineInstr * > CanonicalMIs
CanonicalMIs and BlockMIs form a bidirectional map between any of the loop kernel clones.
SmallVector< MachineBasicBlock *, 4 > Prologs
All prolog and epilog blocks.
MachineBasicBlock * peelKernel(LoopPeelDirection LPD)
Peels one iteration of the rewritten kernel (BB) in the specified direction.
std::deque< MachineBasicBlock * > PeeledFront
State passed from peelKernel to peelPrologAndEpilogs().
unsigned getStage(MachineInstr *MI)
Helper to get the stage of an instruction in the schedule.
void rewriteUsesOf(MachineInstr *MI)
Change all users of MI, if MI is predicated out (LiveStages[MI->getParent()] == false).
SmallVector< MachineBasicBlock *, 4 > Epilogs
DenseMap< MachineBasicBlock *, BitVector > AvailableStages
For every block, the stages that are available.
std::unique_ptr< TargetInstrInfo::PipelinerLoopInfo > LoopInfo
Target loop info before kernel peeling.
DenseMap< std::pair< MachineBasicBlock *, MachineInstr * >, MachineInstr * > BlockMIs
Register getEquivalentRegisterIn(Register Reg, MachineBasicBlock *BB)
All prolog and epilog blocks are clones of the kernel, so any produced register in one block has an c...
MachineBasicBlock * Preheader
The original loop preheader.
void rewriteKernel()
Converts BB from the original loop body to the rewritten, pipelined steady-state.
DenseMap< MachineInstr *, unsigned > PhiNodeLoopIteration
When peeling the epilogue keep track of the distance between the phi nodes and the kernel.
DenseMap< MachineBasicBlock *, BitVector > LiveStages
For every block, the stages that are produced.
void filterInstructions(MachineBasicBlock *MB, int MinStage)
void peelPrologAndEpilogs()
Peel the kernel forwards and backwards to produce prologs and epilogs, and stitch them together.
MachineBasicBlock * BB
The original loop block that gets rewritten in-place.
void fixupBranches()
Insert branches between prologs, kernel and epilogs.
MachineBasicBlock * CreateLCSSAExitingBlock()
Create a poor-man's LCSSA by cloning only the PHIs from the kernel block to a block dominated by all ...
void validateAgainstModuloScheduleExpander()
Runs ModuloScheduleExpander and treats it as a golden input to validate aspects of the code generated...
Register getPhiCanonicalReg(MachineInstr *CanonicalPhi, MachineInstr *Phi)
Helper function to find the right canonical register for a phi instruction coming from a peeled out p...
void moveStageBetweenBlocks(MachineBasicBlock *DestBB, MachineBasicBlock *SourceBB, unsigned Stage)
Wrapper class representing virtual and physical registers.
Definition Register.h:19
constexpr bool isValid() const
Definition Register.h:107
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:74
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:78
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
reference emplace_back(ArgTypes &&... Args)
iterator erase(const_iterator CI)
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
TargetInstrInfo - Interface to description of machine instruction set.
virtual const TargetInstrInfo * getInstrInfo() const
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:202
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition DenseSet.h:180
self_iterator getIterator()
Definition ilist_node.h:123
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
A raw_ostream that writes to an std::string.
A raw_ostream that writes to an SmallVector or SmallString.
StringRef str() const
Return a StringRef for the vector contents.
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
initializer< Ty > init(const Ty &Val)
constexpr double phi
Definition MathExtras.h:61
NodeAddr< DefNode * > Def
Definition RDFGraph.h:384
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
iterator end() const
Definition BasicBlock.h:89
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
Definition SFrame.h:77
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:477
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1731
MachineBasicBlock * PeelSingleBlockLoop(LoopPeelDirection Direction, MachineBasicBlock *Loop, MachineRegisterInfo &MRI, const TargetInstrInfo *TII)
Peels a single block loop.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1657
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI std::pair< StringRef, StringRef > getToken(StringRef Source, StringRef Delimiters=" \t\n\v\f\r")
getToken - This function extracts one token from source, ignoring any leading characters that appear ...
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition Error.h:198
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2116
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:634
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
CycleInfo::CycleT Cycle
Definition CycleInfo.h:24
LLVM_ABI void initializeModuloScheduleTestPass(PassRegistry &)
auto reverse(ContainerTy &&C)
Definition STLExtras.h:408
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
@ Ref
The access may reference the value stored in memory.
Definition ModRef.h:32
@ Other
Any other memory.
Definition ModRef.h:68
@ Sub
Subtraction of integers.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:1934
DWARFExpression::Operation Op
@ LPD_Back
Peel the last iteration of the loop.
@ LPD_Front
Peel the first iteration of the loop.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872