Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit c904c18

Browse files
committed
[π˜€π—½π—Ώ] initial version
Created using spr 1.3.5
2 parents 5eb5f0d + a9235b5 commit c904c18

File tree

4 files changed

+388
-30
lines changed

4 files changed

+388
-30
lines changed

β€Žllvm/lib/Transforms/IPO/AlwaysInliner.cppβ€Ž

Lines changed: 305 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -12,24 +12,79 @@
1212
//===----------------------------------------------------------------------===//
1313

1414
#include "llvm/Transforms/IPO/AlwaysInliner.h"
15+
#include "llvm/ADT/DenseMap.h"
16+
#include "llvm/ADT/MapVector.h"
1517
#include "llvm/ADT/SetVector.h"
18+
#include "llvm/ADT/SmallPtrSet.h"
19+
#include "llvm/ADT/Statistic.h"
1620
#include "llvm/Analysis/AliasAnalysis.h"
1721
#include "llvm/Analysis/AssumptionCache.h"
22+
#include "llvm/Analysis/DominanceFrontier.h"
1823
#include "llvm/Analysis/InlineAdvisor.h"
1924
#include "llvm/Analysis/InlineCost.h"
2025
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
2126
#include "llvm/Analysis/ProfileSummaryInfo.h"
27+
#include "llvm/Analysis/ValueTracking.h"
28+
#include "llvm/IR/BasicBlock.h"
29+
#include "llvm/IR/Dominators.h"
2230
#include "llvm/IR/Module.h"
31+
#include "llvm/IR/ValueHandle.h"
2332
#include "llvm/InitializePasses.h"
33+
#include "llvm/Support/CommandLine.h"
2434
#include "llvm/Transforms/Utils/Cloning.h"
2535
#include "llvm/Transforms/Utils/ModuleUtils.h"
36+
#include "llvm/Transforms/Utils/PromoteMemToReg.h"
37+
2638

2739
using namespace llvm;
2840

2941
#define DEBUG_TYPE "inline"
42+
static cl::opt<bool> EnableMem2RegInterleaving(
43+
"enable-always-inliner-mem2reg", cl::init(true), cl::Hidden,
44+
cl::desc("Enable interleaving always-inlining with alloca promotion"));
45+
46+
STATISTIC(NumAllocasPromoted,
47+
"Number of allocas promoted to registers after inlining");
3048

3149
namespace {
3250

51+
bool canInlineCallBase(CallBase *CB) {
52+
return CB->hasFnAttr(Attribute::AlwaysInline) &&
53+
!CB->getAttributes().hasFnAttr(Attribute::NoInline);
54+
}
55+
56+
bool attemptInlineFunction(
57+
Function &F, CallBase *CB, bool InsertLifetime,
58+
function_ref<AAResults &(Function &)> &GetAAR,
59+
function_ref<AssumptionCache &(Function &)> &GetAssumptionCache,
60+
ProfileSummaryInfo &PSI) {
61+
Function *Caller = CB->getCaller();
62+
OptimizationRemarkEmitter ORE(Caller);
63+
DebugLoc DLoc = CB->getDebugLoc();
64+
BasicBlock *Block = CB->getParent();
65+
66+
InlineFunctionInfo IFI(GetAssumptionCache, &PSI, nullptr, nullptr);
67+
InlineResult Res = InlineFunction(*CB, IFI, /*MergeAttributes=*/true,
68+
&GetAAR(F), InsertLifetime);
69+
if (!Res.isSuccess()) {
70+
ORE.emit([&]() {
71+
return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, Block)
72+
<< "'" << ore::NV("Callee", &F) << "' is not inlined into '"
73+
<< ore::NV("Caller", Caller)
74+
<< "': " << ore::NV("Reason", Res.getFailureReason());
75+
});
76+
return false;
77+
}
78+
79+
emitInlinedIntoBasedOnCost(ORE, DLoc, Block, F, *Caller,
80+
InlineCost::getAlways("always inline attribute"),
81+
/*ForProfileContext=*/false, DEBUG_TYPE);
82+
83+
return true;
84+
}
85+
/// This function inlines all functions that are marked with the always_inline
86+
/// attribute. It also removes the inlined functions if they are dead after the
87+
/// inlining process.
3388
bool AlwaysInlineImpl(
3489
Module &M, bool InsertLifetime, ProfileSummaryInfo &PSI,
3590
FunctionAnalysisManager *FAM,
@@ -50,36 +105,13 @@ bool AlwaysInlineImpl(
50105

51106
for (User *U : F.users())
52107
if (auto *CB = dyn_cast<CallBase>(U))
53-
if (CB->getCalledFunction() == &F &&
54-
CB->hasFnAttr(Attribute::AlwaysInline) &&
55-
!CB->getAttributes().hasFnAttr(Attribute::NoInline))
108+
if (CB->getCalledFunction() == &F && canInlineCallBase(CB))
56109
Calls.insert(CB);
57110

58111
for (CallBase *CB : Calls) {
59112
Function *Caller = CB->getCaller();
60-
OptimizationRemarkEmitter ORE(Caller);
61-
DebugLoc DLoc = CB->getDebugLoc();
62-
BasicBlock *Block = CB->getParent();
63-
64-
InlineFunctionInfo IFI(GetAssumptionCache, &PSI, nullptr, nullptr);
65-
InlineResult Res = InlineFunction(*CB, IFI, /*MergeAttributes=*/true,
66-
&GetAAR(F), InsertLifetime);
67-
if (!Res.isSuccess()) {
68-
ORE.emit([&]() {
69-
return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, Block)
70-
<< "'" << ore::NV("Callee", &F) << "' is not inlined into '"
71-
<< ore::NV("Caller", Caller)
72-
<< "': " << ore::NV("Reason", Res.getFailureReason());
73-
});
74-
continue;
75-
}
76-
77-
emitInlinedIntoBasedOnCost(
78-
ORE, DLoc, Block, F, *Caller,
79-
InlineCost::getAlways("always inline attribute"),
80-
/*ForProfileContext=*/false, DEBUG_TYPE);
81-
82-
Changed = true;
113+
Changed |= attemptInlineFunction(F, CB, InsertLifetime, GetAAR,
114+
GetAssumptionCache, PSI);
83115
if (FAM)
84116
FAM->invalidate(*Caller, PreservedAnalyses::none());
85117
}
@@ -115,6 +147,245 @@ bool AlwaysInlineImpl(
115147
return Changed;
116148
}
117149

150+
/// Promote allocas to registers if possible.
151+
static void promoteAllocas(
152+
Function *Caller, SmallPtrSetImpl<AllocaInst *> &AllocasToPromote,
153+
function_ref<AssumptionCache &(Function &)> &GetAssumptionCache) {
154+
if (AllocasToPromote.empty())
155+
return;
156+
157+
SmallVector<AllocaInst *, 4> PromotableAllocas;
158+
llvm::copy_if(AllocasToPromote, std::back_inserter(PromotableAllocas),
159+
isAllocaPromotable);
160+
if (PromotableAllocas.empty())
161+
return;
162+
163+
DominatorTree DT(*Caller);
164+
AssumptionCache &AC = GetAssumptionCache(*Caller);
165+
PromoteMemToReg(PromotableAllocas, DT, &AC);
166+
NumAllocasPromoted += PromotableAllocas.size();
167+
// Emit a remark for the promotion.
168+
OptimizationRemarkEmitter ORE(Caller);
169+
DebugLoc DLoc = Caller->getEntryBlock().getTerminator()->getDebugLoc();
170+
ORE.emit([&]() {
171+
return OptimizationRemark(DEBUG_TYPE, "PromoteAllocas", DLoc,
172+
&Caller->getEntryBlock())
173+
<< "Promoting " << ore::NV("NumAlloca", PromotableAllocas.size())
174+
<< " allocas to SSA registers in function '"
175+
<< ore::NV("Function", Caller) << "'";
176+
});
177+
LLVM_DEBUG(dbgs() << "Promoted " << PromotableAllocas.size()
178+
<< " allocas to registers in function " << Caller->getName()
179+
<< "\n");
180+
}
181+
182+
/// We use a different visitation order of functions here to solve a phase
183+
/// ordering problem. After inlining, a caller function may have allocas that
184+
/// were previously used for passing reference arguments to the callee that
185+
/// are now promotable to registers, using SROA/mem2reg. However if we just let
186+
/// the AlwaysInliner continue inlining everything at once, the later SROA pass
187+
/// in the pipeline will end up placing phis for these allocas into blocks along
188+
/// the dominance frontier which may extend further than desired (e.g. loop
189+
/// headers). This can happen when the caller is then inlined into another
190+
/// caller, and the allocas end up hoisted further before SROA is run.
191+
///
192+
/// Instead what we want is to try to do, as best as we can, is to inline leaf
193+
/// functions into callers, and then run PromoteMemToReg() on the allocas that
194+
/// were passed into the callee before it was inlined.
195+
///
196+
/// We want to do this *before* the caller is inlined into another caller
197+
/// because we want the alloca promotion to happen before its scope extends too
198+
/// far because of further inlining.
199+
///
200+
/// Here's a simple pseudo-example:
201+
/// outermost_caller() {
202+
/// for (...) {
203+
/// middle_caller();
204+
/// }
205+
/// }
206+
///
207+
/// middle_caller() {
208+
/// int stack_var;
209+
/// inner_callee(&stack_var);
210+
/// }
211+
///
212+
/// inner_callee(int *x) {
213+
/// // Do something with x.
214+
/// }
215+
///
216+
/// In this case, we want to inline inner_callee() into middle_caller() and
217+
/// then promote stack_var to a register before we inline middle_caller() into
218+
/// outermost_caller(). The regular always_inliner would inline everything at
219+
/// once, and then SROA/mem2reg would promote stack_var to a register but in
220+
/// the context of outermost_caller() which is not what we want.
221+
bool AlwaysInlineInterleavedMem2RegImpl(
222+
Module &M, bool InsertLifetime, ProfileSummaryInfo &PSI,
223+
FunctionAnalysisManager &FAM,
224+
function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
225+
function_ref<AAResults &(Function &)> GetAAR) {
226+
227+
bool Changed = false;
228+
229+
// Use SetVector as we may rely on the deterministic iteration order for
230+
// finding candidates later.
231+
SetVector<Function *> AlwaysInlineFunctions;
232+
233+
MapVector<Function *, SmallVector<WeakVH>> CalleeToCallSites;
234+
// Incoming always-inline calls for a function.
235+
DenseMap<Function *, unsigned> IncomingAICount;
236+
// Outgoing always-inline calls for a function.
237+
DenseMap<Function *, unsigned> OutgoingAICount;
238+
// First collect all always_inline functions.
239+
for (Function &F : M) {
240+
if (F.isDeclaration() || !F.hasFnAttribute(Attribute::AlwaysInline) ||
241+
!isInlineViable(F).isSuccess())
242+
continue;
243+
if (F.isPresplitCoroutine())
244+
continue;
245+
AlwaysInlineFunctions.insert(&F);
246+
}
247+
248+
DenseSet<Function *> ProcessedFunctions;
249+
SmallVector<Function *> InlinedComdatFns;
250+
// Build the call graph of always_inline functions.
251+
for (Function *F : AlwaysInlineFunctions) {
252+
for (User *U : F->users()) {
253+
if (auto *CB = dyn_cast<CallBase>(U)) {
254+
if (CB->getCalledFunction() != F || !canInlineCallBase(CB))
255+
continue;
256+
CalleeToCallSites[F].push_back(WeakVH(CB));
257+
// Keep track of the number of incoming calls to this function.
258+
// This is used to determine the order in which we inline functions.
259+
IncomingAICount[F]++;
260+
if (AlwaysInlineFunctions.count(CB->getCaller()))
261+
OutgoingAICount[CB->getCaller()]++;
262+
}
263+
}
264+
}
265+
266+
SmallVector<Function *, 16> Worklist;
267+
for (Function *F : AlwaysInlineFunctions) {
268+
// If this is a always_inline leaf function, we select it for inlining.
269+
if (OutgoingAICount.lookup(F) == 0)
270+
Worklist.push_back(F);
271+
}
272+
273+
while (!Worklist.empty()) {
274+
Function *Callee = Worklist.pop_back_val();
275+
auto &Calls = CalleeToCallSites[Callee];
276+
277+
// Group the calls by their caller. This allows us to collect all allocas
278+
// which need to be promoted together.
279+
MapVector<Function *, SmallVector<WeakVH>> CallerToCalls;
280+
281+
for (WeakVH &WH : Calls)
282+
if (auto *CB = dyn_cast_or_null<CallBase>(WH))
283+
CallerToCalls[CB->getCaller()].push_back(WH);
284+
285+
// Now collect the allocas.
286+
for (auto &CallerAndCalls : CallerToCalls) {
287+
Function *Caller = CallerAndCalls.first;
288+
SmallVector<WeakVH> &CallerCalls = CallerAndCalls.second;
289+
SmallPtrSet<AllocaInst *, 4> AllocasToPromote;
290+
291+
for (WeakVH &WH : CallerCalls) {
292+
if (auto *CB = dyn_cast_or_null<CallBase>(WH)) {
293+
for (Value *Arg : CB->args())
294+
if (auto *AI = dyn_cast<AllocaInst>(getUnderlyingObject(Arg)))
295+
AllocasToPromote.insert(AI);
296+
}
297+
}
298+
299+
// Do the actual inlining.
300+
bool InlinedAny = false;
301+
SmallVector<WeakVH> SuccessfullyInlinedCalls;
302+
303+
for (WeakVH &WH : CallerCalls) {
304+
if (auto *CB = dyn_cast_or_null<CallBase>(WH)) {
305+
if (attemptInlineFunction(*Callee, CB, InsertLifetime, GetAAR,
306+
GetAssumptionCache, PSI)) {
307+
Changed = true;
308+
InlinedAny = true;
309+
SuccessfullyInlinedCalls.push_back(WH);
310+
}
311+
}
312+
}
313+
314+
if (!InlinedAny)
315+
continue;
316+
317+
// Promote any allocas that were used by the just-inlined call site.
318+
promoteAllocas(Caller, AllocasToPromote, GetAssumptionCache);
319+
320+
unsigned InlinedCountForCaller = SuccessfullyInlinedCalls.size();
321+
if (!AlwaysInlineFunctions.contains(Caller))
322+
continue; // Caller wasn't part of our always-inline call graph.
323+
unsigned OldOutgoing = OutgoingAICount[Caller];
324+
assert(OldOutgoing >= InlinedCountForCaller &&
325+
"Inlined more calls than we had outgoing calls!");
326+
OutgoingAICount[Caller] = OldOutgoing - InlinedCountForCaller;
327+
// If these were the last outgoing calls in the caller, we can now
328+
// consider it a leaf function and add it to the worklist.
329+
if (OutgoingAICount[Caller] == 0 && !ProcessedFunctions.count(Caller))
330+
Worklist.push_back(Caller);
331+
}
332+
333+
ProcessedFunctions.insert(Callee);
334+
AlwaysInlineFunctions.remove(Callee);
335+
CalleeToCallSites.erase(Callee);
336+
337+
Callee->removeDeadConstantUsers();
338+
if (Callee->hasFnAttribute(Attribute::AlwaysInline) &&
339+
Callee->isDefTriviallyDead()) {
340+
if (Callee->hasComdat()) {
341+
InlinedComdatFns.push_back(Callee);
342+
} else {
343+
M.getFunctionList().erase(Callee);
344+
Changed = true;
345+
}
346+
}
347+
348+
if (AlwaysInlineFunctions.empty())
349+
break;
350+
351+
// If we have no more leaf functions to inline, we use a greedy heuristic
352+
// that selects the function with the most incoming calls. The intuition is
353+
// inlining this function will eliminate the most call sites and give the
354+
// highest chance of creating new leaf functions.
355+
if (Worklist.empty()) {
356+
Function *BestFunc = nullptr;
357+
unsigned MaxIncoming = 0;
358+
for (Function *F : AlwaysInlineFunctions) {
359+
if (ProcessedFunctions.count(F))
360+
continue;
361+
362+
unsigned CurrentIncoming = IncomingAICount.lookup(F);
363+
if (!BestFunc || CurrentIncoming > MaxIncoming) {
364+
BestFunc = F;
365+
MaxIncoming = CurrentIncoming;
366+
}
367+
}
368+
Worklist.push_back(BestFunc);
369+
}
370+
}
371+
372+
if (!InlinedComdatFns.empty()) {
373+
filterDeadComdatFunctions(InlinedComdatFns);
374+
for (Function *F : InlinedComdatFns) {
375+
M.getFunctionList().erase(F);
376+
Changed = true;
377+
}
378+
}
379+
380+
// We may have missed some call sites that were marked as always_inline but
381+
// for which the callee function itself wasn't always_inline. Call the
382+
// standard handler here to deal with those.
383+
Changed |= AlwaysInlineImpl(M, InsertLifetime, PSI, &FAM, GetAssumptionCache,
384+
GetAAR);
385+
return Changed;
386+
}
387+
388+
118389
struct AlwaysInlinerLegacyPass : public ModulePass {
119390
bool InsertLifetime;
120391

@@ -177,8 +448,14 @@ PreservedAnalyses AlwaysInlinerPass::run(Module &M,
177448
};
178449
auto &PSI = MAM.getResult<ProfileSummaryAnalysis>(M);
179450

180-
bool Changed = AlwaysInlineImpl(M, InsertLifetime, PSI, &FAM,
181-
GetAssumptionCache, GetAAR);
451+
bool Changed = false;
452+
if (EnableMem2RegInterleaving) {
453+
Changed = AlwaysInlineInterleavedMem2RegImpl(M, InsertLifetime, PSI, FAM,
454+
GetAssumptionCache, GetAAR);
455+
} else {
456+
Changed = AlwaysInlineImpl(M, InsertLifetime, PSI, &FAM, GetAssumptionCache,
457+
GetAAR);
458+
}
182459
if (!Changed)
183460
return PreservedAnalyses::all();
184461

0 commit comments

Comments
Β (0)