12
12
// ===----------------------------------------------------------------------===//
13
13
14
14
#include " llvm/Transforms/IPO/AlwaysInliner.h"
15
+ #include " llvm/ADT/DenseMap.h"
16
+ #include " llvm/ADT/MapVector.h"
15
17
#include " llvm/ADT/SetVector.h"
18
+ #include " llvm/ADT/SmallPtrSet.h"
19
+ #include " llvm/ADT/Statistic.h"
16
20
#include " llvm/Analysis/AliasAnalysis.h"
17
21
#include " llvm/Analysis/AssumptionCache.h"
22
+ #include " llvm/Analysis/DominanceFrontier.h"
18
23
#include " llvm/Analysis/InlineAdvisor.h"
19
24
#include " llvm/Analysis/InlineCost.h"
20
25
#include " llvm/Analysis/OptimizationRemarkEmitter.h"
21
26
#include " llvm/Analysis/ProfileSummaryInfo.h"
27
+ #include " llvm/Analysis/ValueTracking.h"
28
+ #include " llvm/IR/BasicBlock.h"
29
+ #include " llvm/IR/Dominators.h"
22
30
#include " llvm/IR/Module.h"
31
+ #include " llvm/IR/ValueHandle.h"
23
32
#include " llvm/InitializePasses.h"
33
+ #include " llvm/Support/CommandLine.h"
24
34
#include " llvm/Transforms/Utils/Cloning.h"
25
35
#include " llvm/Transforms/Utils/ModuleUtils.h"
36
+ #include " llvm/Transforms/Utils/PromoteMemToReg.h"
37
+
26
38
27
39
using namespace llvm ;
28
40
29
41
#define DEBUG_TYPE " inline"
42
+ static cl::opt<bool > EnableMem2RegInterleaving (
43
+ " enable-always-inliner-mem2reg" , cl::init(true ), cl::Hidden,
44
+ cl::desc(" Enable interleaving always-inlining with alloca promotion" ));
45
+
46
+ STATISTIC (NumAllocasPromoted,
47
+ " Number of allocas promoted to registers after inlining" );
30
48
31
49
namespace {
32
50
51
+ bool canInlineCallBase (CallBase *CB) {
52
+ return CB->hasFnAttr (Attribute::AlwaysInline) &&
53
+ !CB->getAttributes ().hasFnAttr (Attribute::NoInline);
54
+ }
55
+
56
+ bool attemptInlineFunction (
57
+ Function &F, CallBase *CB, bool InsertLifetime,
58
+ function_ref<AAResults &(Function &)> &GetAAR,
59
+ function_ref<AssumptionCache &(Function &)> &GetAssumptionCache,
60
+ ProfileSummaryInfo &PSI) {
61
+ Function *Caller = CB->getCaller ();
62
+ OptimizationRemarkEmitter ORE (Caller);
63
+ DebugLoc DLoc = CB->getDebugLoc ();
64
+ BasicBlock *Block = CB->getParent ();
65
+
66
+ InlineFunctionInfo IFI (GetAssumptionCache, &PSI, nullptr , nullptr );
67
+ InlineResult Res = InlineFunction (*CB, IFI, /* MergeAttributes=*/ true ,
68
+ &GetAAR (F), InsertLifetime);
69
+ if (!Res.isSuccess ()) {
70
+ ORE.emit ([&]() {
71
+ return OptimizationRemarkMissed (DEBUG_TYPE, " NotInlined" , DLoc, Block)
72
+ << " '" << ore::NV (" Callee" , &F) << " ' is not inlined into '"
73
+ << ore::NV (" Caller" , Caller)
74
+ << " ': " << ore::NV (" Reason" , Res.getFailureReason ());
75
+ });
76
+ return false ;
77
+ }
78
+
79
+ emitInlinedIntoBasedOnCost (ORE, DLoc, Block, F, *Caller,
80
+ InlineCost::getAlways (" always inline attribute" ),
81
+ /* ForProfileContext=*/ false , DEBUG_TYPE);
82
+
83
+ return true ;
84
+ }
85
+ // / This function inlines all functions that are marked with the always_inline
86
+ // / attribute. It also removes the inlined functions if they are dead after the
87
+ // / inlining process.
33
88
bool AlwaysInlineImpl (
34
89
Module &M, bool InsertLifetime, ProfileSummaryInfo &PSI,
35
90
FunctionAnalysisManager *FAM,
@@ -50,36 +105,13 @@ bool AlwaysInlineImpl(
50
105
51
106
for (User *U : F.users ())
52
107
if (auto *CB = dyn_cast<CallBase>(U))
53
- if (CB->getCalledFunction () == &F &&
54
- CB->hasFnAttr (Attribute::AlwaysInline) &&
55
- !CB->getAttributes ().hasFnAttr (Attribute::NoInline))
108
+ if (CB->getCalledFunction () == &F && canInlineCallBase (CB))
56
109
Calls.insert (CB);
57
110
58
111
for (CallBase *CB : Calls) {
59
112
Function *Caller = CB->getCaller ();
60
- OptimizationRemarkEmitter ORE (Caller);
61
- DebugLoc DLoc = CB->getDebugLoc ();
62
- BasicBlock *Block = CB->getParent ();
63
-
64
- InlineFunctionInfo IFI (GetAssumptionCache, &PSI, nullptr , nullptr );
65
- InlineResult Res = InlineFunction (*CB, IFI, /* MergeAttributes=*/ true ,
66
- &GetAAR (F), InsertLifetime);
67
- if (!Res.isSuccess ()) {
68
- ORE.emit ([&]() {
69
- return OptimizationRemarkMissed (DEBUG_TYPE, " NotInlined" , DLoc, Block)
70
- << " '" << ore::NV (" Callee" , &F) << " ' is not inlined into '"
71
- << ore::NV (" Caller" , Caller)
72
- << " ': " << ore::NV (" Reason" , Res.getFailureReason ());
73
- });
74
- continue ;
75
- }
76
-
77
- emitInlinedIntoBasedOnCost (
78
- ORE, DLoc, Block, F, *Caller,
79
- InlineCost::getAlways (" always inline attribute" ),
80
- /* ForProfileContext=*/ false , DEBUG_TYPE);
81
-
82
- Changed = true ;
113
+ Changed |= attemptInlineFunction (F, CB, InsertLifetime, GetAAR,
114
+ GetAssumptionCache, PSI);
83
115
if (FAM)
84
116
FAM->invalidate (*Caller, PreservedAnalyses::none ());
85
117
}
@@ -115,6 +147,245 @@ bool AlwaysInlineImpl(
115
147
return Changed;
116
148
}
117
149
150
+ // / Promote allocas to registers if possible.
151
+ static void promoteAllocas (
152
+ Function *Caller, SmallPtrSetImpl<AllocaInst *> &AllocasToPromote,
153
+ function_ref<AssumptionCache &(Function &)> &GetAssumptionCache) {
154
+ if (AllocasToPromote.empty ())
155
+ return ;
156
+
157
+ SmallVector<AllocaInst *, 4 > PromotableAllocas;
158
+ llvm::copy_if (AllocasToPromote, std::back_inserter (PromotableAllocas),
159
+ isAllocaPromotable);
160
+ if (PromotableAllocas.empty ())
161
+ return ;
162
+
163
+ DominatorTree DT (*Caller);
164
+ AssumptionCache &AC = GetAssumptionCache (*Caller);
165
+ PromoteMemToReg (PromotableAllocas, DT, &AC);
166
+ NumAllocasPromoted += PromotableAllocas.size ();
167
+ // Emit a remark for the promotion.
168
+ OptimizationRemarkEmitter ORE (Caller);
169
+ DebugLoc DLoc = Caller->getEntryBlock ().getTerminator ()->getDebugLoc ();
170
+ ORE.emit ([&]() {
171
+ return OptimizationRemark (DEBUG_TYPE, " PromoteAllocas" , DLoc,
172
+ &Caller->getEntryBlock ())
173
+ << " Promoting " << ore::NV (" NumAlloca" , PromotableAllocas.size ())
174
+ << " allocas to SSA registers in function '"
175
+ << ore::NV (" Function" , Caller) << " '" ;
176
+ });
177
+ LLVM_DEBUG (dbgs () << " Promoted " << PromotableAllocas.size ()
178
+ << " allocas to registers in function " << Caller->getName ()
179
+ << " \n " );
180
+ }
181
+
182
+ // / We use a different visitation order of functions here to solve a phase
183
+ // / ordering problem. After inlining, a caller function may have allocas that
184
+ // / were previously used for passing reference arguments to the callee that
185
+ // / are now promotable to registers, using SROA/mem2reg. However if we just let
186
+ // / the AlwaysInliner continue inlining everything at once, the later SROA pass
187
+ // / in the pipeline will end up placing phis for these allocas into blocks along
188
+ // / the dominance frontier which may extend further than desired (e.g. loop
189
+ // / headers). This can happen when the caller is then inlined into another
190
+ // / caller, and the allocas end up hoisted further before SROA is run.
191
+ // /
192
+ // / Instead what we want is to try to do, as best as we can, is to inline leaf
193
+ // / functions into callers, and then run PromoteMemToReg() on the allocas that
194
+ // / were passed into the callee before it was inlined.
195
+ // /
196
+ // / We want to do this *before* the caller is inlined into another caller
197
+ // / because we want the alloca promotion to happen before its scope extends too
198
+ // / far because of further inlining.
199
+ // /
200
+ // / Here's a simple pseudo-example:
201
+ // / outermost_caller() {
202
+ // / for (...) {
203
+ // / middle_caller();
204
+ // / }
205
+ // / }
206
+ // /
207
+ // / middle_caller() {
208
+ // / int stack_var;
209
+ // / inner_callee(&stack_var);
210
+ // / }
211
+ // /
212
+ // / inner_callee(int *x) {
213
+ // / // Do something with x.
214
+ // / }
215
+ // /
216
+ // / In this case, we want to inline inner_callee() into middle_caller() and
217
+ // / then promote stack_var to a register before we inline middle_caller() into
218
+ // / outermost_caller(). The regular always_inliner would inline everything at
219
+ // / once, and then SROA/mem2reg would promote stack_var to a register but in
220
+ // / the context of outermost_caller() which is not what we want.
221
+ bool AlwaysInlineInterleavedMem2RegImpl (
222
+ Module &M, bool InsertLifetime, ProfileSummaryInfo &PSI,
223
+ FunctionAnalysisManager &FAM,
224
+ function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
225
+ function_ref<AAResults &(Function &)> GetAAR) {
226
+
227
+ bool Changed = false ;
228
+
229
+ // Use SetVector as we may rely on the deterministic iteration order for
230
+ // finding candidates later.
231
+ SetVector<Function *> AlwaysInlineFunctions;
232
+
233
+ MapVector<Function *, SmallVector<WeakVH>> CalleeToCallSites;
234
+ // Incoming always-inline calls for a function.
235
+ DenseMap<Function *, unsigned > IncomingAICount;
236
+ // Outgoing always-inline calls for a function.
237
+ DenseMap<Function *, unsigned > OutgoingAICount;
238
+ // First collect all always_inline functions.
239
+ for (Function &F : M) {
240
+ if (F.isDeclaration () || !F.hasFnAttribute (Attribute::AlwaysInline) ||
241
+ !isInlineViable (F).isSuccess ())
242
+ continue ;
243
+ if (F.isPresplitCoroutine ())
244
+ continue ;
245
+ AlwaysInlineFunctions.insert (&F);
246
+ }
247
+
248
+ DenseSet<Function *> ProcessedFunctions;
249
+ SmallVector<Function *> InlinedComdatFns;
250
+ // Build the call graph of always_inline functions.
251
+ for (Function *F : AlwaysInlineFunctions) {
252
+ for (User *U : F->users ()) {
253
+ if (auto *CB = dyn_cast<CallBase>(U)) {
254
+ if (CB->getCalledFunction () != F || !canInlineCallBase (CB))
255
+ continue ;
256
+ CalleeToCallSites[F].push_back (WeakVH (CB));
257
+ // Keep track of the number of incoming calls to this function.
258
+ // This is used to determine the order in which we inline functions.
259
+ IncomingAICount[F]++;
260
+ if (AlwaysInlineFunctions.count (CB->getCaller ()))
261
+ OutgoingAICount[CB->getCaller ()]++;
262
+ }
263
+ }
264
+ }
265
+
266
+ SmallVector<Function *, 16 > Worklist;
267
+ for (Function *F : AlwaysInlineFunctions) {
268
+ // If this is a always_inline leaf function, we select it for inlining.
269
+ if (OutgoingAICount.lookup (F) == 0 )
270
+ Worklist.push_back (F);
271
+ }
272
+
273
+ while (!Worklist.empty ()) {
274
+ Function *Callee = Worklist.pop_back_val ();
275
+ auto &Calls = CalleeToCallSites[Callee];
276
+
277
+ // Group the calls by their caller. This allows us to collect all allocas
278
+ // which need to be promoted together.
279
+ MapVector<Function *, SmallVector<WeakVH>> CallerToCalls;
280
+
281
+ for (WeakVH &WH : Calls)
282
+ if (auto *CB = dyn_cast_or_null<CallBase>(WH))
283
+ CallerToCalls[CB->getCaller ()].push_back (WH);
284
+
285
+ // Now collect the allocas.
286
+ for (auto &CallerAndCalls : CallerToCalls) {
287
+ Function *Caller = CallerAndCalls.first ;
288
+ SmallVector<WeakVH> &CallerCalls = CallerAndCalls.second ;
289
+ SmallPtrSet<AllocaInst *, 4 > AllocasToPromote;
290
+
291
+ for (WeakVH &WH : CallerCalls) {
292
+ if (auto *CB = dyn_cast_or_null<CallBase>(WH)) {
293
+ for (Value *Arg : CB->args ())
294
+ if (auto *AI = dyn_cast<AllocaInst>(getUnderlyingObject (Arg)))
295
+ AllocasToPromote.insert (AI);
296
+ }
297
+ }
298
+
299
+ // Do the actual inlining.
300
+ bool InlinedAny = false ;
301
+ SmallVector<WeakVH> SuccessfullyInlinedCalls;
302
+
303
+ for (WeakVH &WH : CallerCalls) {
304
+ if (auto *CB = dyn_cast_or_null<CallBase>(WH)) {
305
+ if (attemptInlineFunction (*Callee, CB, InsertLifetime, GetAAR,
306
+ GetAssumptionCache, PSI)) {
307
+ Changed = true ;
308
+ InlinedAny = true ;
309
+ SuccessfullyInlinedCalls.push_back (WH);
310
+ }
311
+ }
312
+ }
313
+
314
+ if (!InlinedAny)
315
+ continue ;
316
+
317
+ // Promote any allocas that were used by the just-inlined call site.
318
+ promoteAllocas (Caller, AllocasToPromote, GetAssumptionCache);
319
+
320
+ unsigned InlinedCountForCaller = SuccessfullyInlinedCalls.size ();
321
+ if (!AlwaysInlineFunctions.contains (Caller))
322
+ continue ; // Caller wasn't part of our always-inline call graph.
323
+ unsigned OldOutgoing = OutgoingAICount[Caller];
324
+ assert (OldOutgoing >= InlinedCountForCaller &&
325
+ " Inlined more calls than we had outgoing calls!" );
326
+ OutgoingAICount[Caller] = OldOutgoing - InlinedCountForCaller;
327
+ // If these were the last outgoing calls in the caller, we can now
328
+ // consider it a leaf function and add it to the worklist.
329
+ if (OutgoingAICount[Caller] == 0 && !ProcessedFunctions.count (Caller))
330
+ Worklist.push_back (Caller);
331
+ }
332
+
333
+ ProcessedFunctions.insert (Callee);
334
+ AlwaysInlineFunctions.remove (Callee);
335
+ CalleeToCallSites.erase (Callee);
336
+
337
+ Callee->removeDeadConstantUsers ();
338
+ if (Callee->hasFnAttribute (Attribute::AlwaysInline) &&
339
+ Callee->isDefTriviallyDead ()) {
340
+ if (Callee->hasComdat ()) {
341
+ InlinedComdatFns.push_back (Callee);
342
+ } else {
343
+ M.getFunctionList ().erase (Callee);
344
+ Changed = true ;
345
+ }
346
+ }
347
+
348
+ if (AlwaysInlineFunctions.empty ())
349
+ break ;
350
+
351
+ // If we have no more leaf functions to inline, we use a greedy heuristic
352
+ // that selects the function with the most incoming calls. The intuition is
353
+ // inlining this function will eliminate the most call sites and give the
354
+ // highest chance of creating new leaf functions.
355
+ if (Worklist.empty ()) {
356
+ Function *BestFunc = nullptr ;
357
+ unsigned MaxIncoming = 0 ;
358
+ for (Function *F : AlwaysInlineFunctions) {
359
+ if (ProcessedFunctions.count (F))
360
+ continue ;
361
+
362
+ unsigned CurrentIncoming = IncomingAICount.lookup (F);
363
+ if (!BestFunc || CurrentIncoming > MaxIncoming) {
364
+ BestFunc = F;
365
+ MaxIncoming = CurrentIncoming;
366
+ }
367
+ }
368
+ Worklist.push_back (BestFunc);
369
+ }
370
+ }
371
+
372
+ if (!InlinedComdatFns.empty ()) {
373
+ filterDeadComdatFunctions (InlinedComdatFns);
374
+ for (Function *F : InlinedComdatFns) {
375
+ M.getFunctionList ().erase (F);
376
+ Changed = true ;
377
+ }
378
+ }
379
+
380
+ // We may have missed some call sites that were marked as always_inline but
381
+ // for which the callee function itself wasn't always_inline. Call the
382
+ // standard handler here to deal with those.
383
+ Changed |= AlwaysInlineImpl (M, InsertLifetime, PSI, &FAM, GetAssumptionCache,
384
+ GetAAR);
385
+ return Changed;
386
+ }
387
+
388
+
118
389
struct AlwaysInlinerLegacyPass : public ModulePass {
119
390
bool InsertLifetime;
120
391
@@ -177,8 +448,14 @@ PreservedAnalyses AlwaysInlinerPass::run(Module &M,
177
448
};
178
449
auto &PSI = MAM.getResult <ProfileSummaryAnalysis>(M);
179
450
180
- bool Changed = AlwaysInlineImpl (M, InsertLifetime, PSI, &FAM,
181
- GetAssumptionCache, GetAAR);
451
+ bool Changed = false ;
452
+ if (EnableMem2RegInterleaving) {
453
+ Changed = AlwaysInlineInterleavedMem2RegImpl (M, InsertLifetime, PSI, FAM,
454
+ GetAssumptionCache, GetAAR);
455
+ } else {
456
+ Changed = AlwaysInlineImpl (M, InsertLifetime, PSI, &FAM, GetAssumptionCache,
457
+ GetAAR);
458
+ }
182
459
if (!Changed)
183
460
return PreservedAnalyses::all ();
184
461
0 commit comments