Thanks to visit codestin.com
Credit goes to llvm.org

LLVM 22.0.0git
SampleProfile.cpp
Go to the documentation of this file.
1//===- SampleProfile.cpp - Incorporate sample profiles into the IR --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the SampleProfileLoader transformation. This pass
10// reads a profile file generated by a sampling profiler (e.g. Linux Perf -
11// http://perf.wiki.kernel.org/) and generates IR metadata to reflect the
12// profile information in the given profile.
13//
14// This pass generates branch weight annotations on the IR:
15//
16// - prof: Represents branch weights. This annotation is added to branches
17// to indicate the weights of each edge coming out of the branch.
18// The weight of each edge is the weight of the target block for
19// that edge. The weight of a block B is computed as the maximum
20// number of samples found in B.
21//
22//===----------------------------------------------------------------------===//
23
25#include "llvm/ADT/ArrayRef.h"
26#include "llvm/ADT/DenseMap.h"
27#include "llvm/ADT/DenseSet.h"
28#include "llvm/ADT/MapVector.h"
32#include "llvm/ADT/Statistic.h"
33#include "llvm/ADT/StringRef.h"
34#include "llvm/ADT/Twine.h"
45#include "llvm/IR/BasicBlock.h"
46#include "llvm/IR/DebugLoc.h"
48#include "llvm/IR/Function.h"
49#include "llvm/IR/GlobalValue.h"
50#include "llvm/IR/InstrTypes.h"
51#include "llvm/IR/Instruction.h"
54#include "llvm/IR/LLVMContext.h"
55#include "llvm/IR/MDBuilder.h"
56#include "llvm/IR/Module.h"
57#include "llvm/IR/PassManager.h"
59#include "llvm/IR/PseudoProbe.h"
66#include "llvm/Support/Debug.h"
70#include "llvm/Transforms/IPO.h"
81#include <algorithm>
82#include <cassert>
83#include <cstdint>
84#include <functional>
85#include <limits>
86#include <map>
87#include <memory>
88#include <queue>
89#include <string>
90#include <system_error>
91#include <utility>
92#include <vector>
93
94using namespace llvm;
95using namespace sampleprof;
96using namespace llvm::sampleprofutil;
98#define DEBUG_TYPE "sample-profile"
99#define CSINLINE_DEBUG DEBUG_TYPE "-inline"
100
101STATISTIC(NumCSInlined,
102 "Number of functions inlined with context sensitive profile");
103STATISTIC(NumCSNotInlined,
104 "Number of functions not inlined with context sensitive profile");
105STATISTIC(NumMismatchedProfile,
106 "Number of functions with CFG mismatched profile");
107STATISTIC(NumMatchedProfile, "Number of functions with CFG matched profile");
108STATISTIC(NumDuplicatedInlinesite,
109 "Number of inlined callsites with a partial distribution factor");
110
111STATISTIC(NumCSInlinedHitMinLimit,
112 "Number of functions with FDO inline stopped due to min size limit");
113STATISTIC(NumCSInlinedHitMaxLimit,
114 "Number of functions with FDO inline stopped due to max size limit");
116 NumCSInlinedHitGrowthLimit,
117 "Number of functions with FDO inline stopped due to growth size limit");
118
119namespace llvm {
120
121// Command line option to specify the file to read samples from. This is
122// mainly used for debugging.
124 "sample-profile-file", cl::init(""), cl::value_desc("filename"),
125 cl::desc("Profile file loaded by -sample-profile"), cl::Hidden);
126
127// The named file contains a set of transformations that may have been applied
128// to the symbol names between the program from which the sample data was
129// collected and the current program's symbols.
131 "sample-profile-remapping-file", cl::init(""), cl::value_desc("filename"),
132 cl::desc("Profile remapping file loaded by -sample-profile"), cl::Hidden);
133
135 "salvage-stale-profile", cl::Hidden, cl::init(false),
136 cl::desc("Salvage stale profile by fuzzy matching and use the remapped "
137 "location for sample profile query."));
139 SalvageUnusedProfile("salvage-unused-profile", cl::Hidden, cl::init(false),
140 cl::desc("Salvage unused profile by matching with new "
141 "functions on call graph."));
142
144 "report-profile-staleness", cl::Hidden, cl::init(false),
145 cl::desc("Compute and report stale profile statistical metrics."));
146
148 "persist-profile-staleness", cl::Hidden, cl::init(false),
149 cl::desc("Compute stale profile statistical metrics and write it into the "
150 "native object file(.llvm_stats section)."));
151
153 "profile-sample-accurate", cl::Hidden, cl::init(false),
154 cl::desc("If the sample profile is accurate, we will mark all un-sampled "
155 "callsite and function as having 0 samples. Otherwise, treat "
156 "un-sampled callsites and functions conservatively as unknown. "));
157
159 "profile-sample-block-accurate", cl::Hidden, cl::init(false),
160 cl::desc("If the sample profile is accurate, we will mark all un-sampled "
161 "branches and calls as having 0 samples. Otherwise, treat "
162 "them conservatively as unknown. "));
163
165 "profile-accurate-for-symsinlist", cl::Hidden, cl::init(true),
166 cl::desc("For symbols in profile symbol list, regard their profiles to "
167 "be accurate. It may be overridden by profile-sample-accurate. "));
168
170 "sample-profile-merge-inlinee", cl::Hidden, cl::init(true),
171 cl::desc("Merge past inlinee's profile to outline version if sample "
172 "profile loader decided not to inline a call site. It will "
173 "only be enabled when top-down order of profile loading is "
174 "enabled. "));
175
177 "sample-profile-top-down-load", cl::Hidden, cl::init(true),
178 cl::desc("Do profile annotation and inlining for functions in top-down "
179 "order of call graph during sample profile loading. It only "
180 "works for new pass manager. "));
181
182static cl::opt<bool>
183 UseProfiledCallGraph("use-profiled-call-graph", cl::init(true), cl::Hidden,
184 cl::desc("Process functions in a top-down order "
185 "defined by the profiled call graph when "
186 "-sample-profile-top-down-load is on."));
187
189 "sample-profile-inline-size", cl::Hidden, cl::init(false),
190 cl::desc("Inline cold call sites in profile loader if it's beneficial "
191 "for code size."));
192
193// Since profiles are consumed by many passes, turning on this option has
194// side effects. For instance, pre-link SCC inliner would see merged profiles
195// and inline the hot functions (that are skipped in this pass).
197 "disable-sample-loader-inlining", cl::Hidden, cl::init(false),
198 cl::desc(
199 "If true, artificially skip inline transformation in sample-loader "
200 "pass, and merge (or scale) profiles (as configured by "
201 "--sample-profile-merge-inlinee)."));
202
204 SortProfiledSCC("sort-profiled-scc-member", cl::init(true), cl::Hidden,
205 cl::desc("Sort profiled recursion by edge weights."));
206
208 "sample-profile-inline-growth-limit", cl::Hidden, cl::init(12),
209 cl::desc("The size growth ratio limit for proirity-based sample profile "
210 "loader inlining."));
211
213 "sample-profile-inline-limit-min", cl::Hidden, cl::init(100),
214 cl::desc("The lower bound of size growth limit for "
215 "proirity-based sample profile loader inlining."));
216
218 "sample-profile-inline-limit-max", cl::Hidden, cl::init(10000),
219 cl::desc("The upper bound of size growth limit for "
220 "proirity-based sample profile loader inlining."));
221
223 "sample-profile-hot-inline-threshold", cl::Hidden, cl::init(3000),
224 cl::desc("Hot callsite threshold for proirity-based sample profile loader "
225 "inlining."));
226
228 "sample-profile-cold-inline-threshold", cl::Hidden, cl::init(45),
229 cl::desc("Threshold for inlining cold callsites"));
230} // namespace llvm
231
233 "sample-profile-icp-relative-hotness", cl::Hidden, cl::init(25),
234 cl::desc(
235 "Relative hotness percentage threshold for indirect "
236 "call promotion in proirity-based sample profile loader inlining."));
237
239 "sample-profile-icp-relative-hotness-skip", cl::Hidden, cl::init(1),
240 cl::desc(
241 "Skip relative hotness check for ICP up to given number of targets."));
242
244 "hot-func-cutoff-for-staleness-error", cl::Hidden, cl::init(800000),
245 cl::desc("A function is considered hot for staleness error check if its "
246 "total sample count is above the specified percentile"));
247
249 "min-functions-for-staleness-error", cl::Hidden, cl::init(50),
250 cl::desc("Skip the check if the number of hot functions is smaller than "
251 "the specified number."));
252
254 "precent-mismatch-for-staleness-error", cl::Hidden, cl::init(80),
255 cl::desc("Reject the profile if the mismatch percent is higher than the "
256 "given number."));
257
259 "sample-profile-prioritized-inline", cl::Hidden,
260 cl::desc("Use call site prioritized inlining for sample profile loader. "
261 "Currently only CSSPGO is supported."));
262
264 "sample-profile-use-preinliner", cl::Hidden,
265 cl::desc("Use the preinliner decisions stored in profile context."));
266
268 "sample-profile-recursive-inline", cl::Hidden,
269 cl::desc("Allow sample loader inliner to inline recursive calls."));
270
272 "sample-profile-remove-probe", cl::Hidden, cl::init(false),
273 cl::desc("Remove pseudo-probe after sample profile annotation."));
274
276 "sample-profile-inline-replay", cl::init(""), cl::value_desc("filename"),
277 cl::desc(
278 "Optimization remarks file containing inline remarks to be replayed "
279 "by inlining from sample profile loader."),
280 cl::Hidden);
281
283 "sample-profile-inline-replay-scope",
286 "Replay on functions that have remarks associated "
287 "with them (default)"),
289 "Replay on the entire module")),
290 cl::desc("Whether inline replay should be applied to the entire "
291 "Module or just the Functions (default) that are present as "
292 "callers in remarks during sample profile inlining."),
293 cl::Hidden);
294
296 "sample-profile-inline-replay-fallback",
301 "All decisions not in replay send to original advisor (default)"),
303 "AlwaysInline", "All decisions not in replay are inlined"),
305 "All decisions not in replay are not inlined")),
306 cl::desc("How sample profile inline replay treats sites that don't come "
307 "from the replay. Original: defers to original advisor, "
308 "AlwaysInline: inline all sites not in replay, NeverInline: "
309 "inline no sites not in replay"),
310 cl::Hidden);
311
313 "sample-profile-inline-replay-format",
316 clEnumValN(CallSiteFormat::Format::Line, "Line", "<Line Number>"),
318 "<Line Number>:<Column Number>"),
320 "LineDiscriminator", "<Line Number>.<Discriminator>"),
322 "LineColumnDiscriminator",
323 "<Line Number>:<Column Number>.<Discriminator> (default)")),
324 cl::desc("How sample profile inline replay file is formatted"), cl::Hidden);
325
327 MaxNumPromotions("sample-profile-icp-max-prom", cl::init(3), cl::Hidden,
328 cl::desc("Max number of promotions for a single indirect "
329 "call callsite in sample profile loader"));
330
332 "overwrite-existing-weights", cl::Hidden, cl::init(false),
333 cl::desc("Ignore existing branch weights on IR and always overwrite."));
334
336 "annotate-sample-profile-inline-phase", cl::Hidden, cl::init(false),
337 cl::desc("Annotate LTO phase (prelink / postlink), or main (no LTO) for "
338 "sample-profile inline pass name."));
339
340namespace llvm {
342}
343
344namespace {
345
346using BlockWeightMap = DenseMap<const BasicBlock *, uint64_t>;
347using EquivalenceClassMap = DenseMap<const BasicBlock *, const BasicBlock *>;
348using Edge = std::pair<const BasicBlock *, const BasicBlock *>;
349using EdgeWeightMap = DenseMap<Edge, uint64_t>;
350using BlockEdgeMap =
352
353class GUIDToFuncNameMapper {
354public:
355 GUIDToFuncNameMapper(Module &M, SampleProfileReader &Reader,
356 DenseMap<uint64_t, StringRef> &GUIDToFuncNameMap)
357 : CurrentReader(Reader), CurrentModule(M),
358 CurrentGUIDToFuncNameMap(GUIDToFuncNameMap) {
359 if (!CurrentReader.useMD5())
360 return;
361
362 for (const auto &F : CurrentModule) {
363 StringRef OrigName = F.getName();
364 CurrentGUIDToFuncNameMap.insert(
365 {Function::getGUIDAssumingExternalLinkage(OrigName), OrigName});
366
367 // Local to global var promotion used by optimization like thinlto
368 // will rename the var and add suffix like ".llvm.xxx" to the
369 // original local name. In sample profile, the suffixes of function
370 // names are all stripped. Since it is possible that the mapper is
371 // built in post-thin-link phase and var promotion has been done,
372 // we need to add the substring of function name without the suffix
373 // into the GUIDToFuncNameMap.
375 if (CanonName != OrigName)
376 CurrentGUIDToFuncNameMap.insert(
377 {Function::getGUIDAssumingExternalLinkage(CanonName), CanonName});
378 }
379
380 // Update GUIDToFuncNameMap for each function including inlinees.
381 SetGUIDToFuncNameMapForAll(&CurrentGUIDToFuncNameMap);
382 }
383
384 ~GUIDToFuncNameMapper() {
385 if (!CurrentReader.useMD5())
386 return;
387
388 CurrentGUIDToFuncNameMap.clear();
389
390 // Reset GUIDToFuncNameMap for of each function as they're no
391 // longer valid at this point.
392 SetGUIDToFuncNameMapForAll(nullptr);
393 }
394
395private:
396 void SetGUIDToFuncNameMapForAll(DenseMap<uint64_t, StringRef> *Map) {
397 std::queue<FunctionSamples *> FSToUpdate;
398 for (auto &IFS : CurrentReader.getProfiles()) {
399 FSToUpdate.push(&IFS.second);
400 }
401
402 while (!FSToUpdate.empty()) {
403 FunctionSamples *FS = FSToUpdate.front();
404 FSToUpdate.pop();
405 FS->GUIDToFuncNameMap = Map;
406 for (const auto &ICS : FS->getCallsiteSamples()) {
407 const FunctionSamplesMap &FSMap = ICS.second;
408 for (const auto &IFS : FSMap) {
409 FunctionSamples &FS = const_cast<FunctionSamples &>(IFS.second);
410 FSToUpdate.push(&FS);
411 }
412 }
413 }
414 }
415
417 Module &CurrentModule;
418 DenseMap<uint64_t, StringRef> &CurrentGUIDToFuncNameMap;
419};
420
421// Inline candidate used by iterative callsite prioritized inliner
422struct InlineCandidate {
423 CallBase *CallInstr;
424 const FunctionSamples *CalleeSamples;
425 // Prorated callsite count, which will be used to guide inlining. For example,
426 // if a callsite is duplicated in LTO prelink, then in LTO postlink the two
427 // copies will get their own distribution factors and their prorated counts
428 // will be used to decide if they should be inlined independently.
429 uint64_t CallsiteCount;
430 // Call site distribution factor to prorate the profile samples for a
431 // duplicated callsite. Default value is 1.0.
432 float CallsiteDistribution;
433};
434
435// Inline candidate comparer using call site weight
436struct CandidateComparer {
437 bool operator()(const InlineCandidate &LHS, const InlineCandidate &RHS) {
438 if (LHS.CallsiteCount != RHS.CallsiteCount)
439 return LHS.CallsiteCount < RHS.CallsiteCount;
440
441 const FunctionSamples *LCS = LHS.CalleeSamples;
442 const FunctionSamples *RCS = RHS.CalleeSamples;
443 // In inline replay mode, CalleeSamples may be null and the order doesn't
444 // matter.
445 if (!LCS || !RCS)
446 return LCS;
447
448 // Tie breaker using number of samples try to favor smaller functions first
449 if (LCS->getBodySamples().size() != RCS->getBodySamples().size())
450 return LCS->getBodySamples().size() > RCS->getBodySamples().size();
451
452 // Tie breaker using GUID so we have stable/deterministic inlining order
453 return LCS->getGUID() < RCS->getGUID();
454 }
455};
456
457using CandidateQueue =
459 CandidateComparer>;
460
461/// Sample profile pass.
462///
463/// This pass reads profile data from the file specified by
464/// -sample-profile-file and annotates every affected function with the
465/// profile information found in that file.
466class SampleProfileLoader final : public SampleProfileLoaderBaseImpl<Function> {
467public:
468 SampleProfileLoader(
469 StringRef Name, StringRef RemapName, ThinOrFullLTOPhase LTOPhase,
470 IntrusiveRefCntPtr<vfs::FileSystem> FS,
471 std::function<AssumptionCache &(Function &)> GetAssumptionCache,
472 std::function<TargetTransformInfo &(Function &)> GetTargetTransformInfo,
473 std::function<const TargetLibraryInfo &(Function &)> GetTLI,
474 LazyCallGraph &CG, bool DisableSampleProfileInlining,
475 bool UseFlattenedProfile)
476 : SampleProfileLoaderBaseImpl(std::string(Name), std::string(RemapName),
477 std::move(FS)),
478 GetAC(std::move(GetAssumptionCache)),
479 GetTTI(std::move(GetTargetTransformInfo)), GetTLI(std::move(GetTLI)),
480 CG(CG), LTOPhase(LTOPhase),
481 AnnotatedPassName(AnnotateSampleProfileInlinePhase
482 ? llvm::AnnotateInlinePassName(InlineContext{
485 DisableSampleProfileInlining(DisableSampleProfileInlining),
486 UseFlattenedProfile(UseFlattenedProfile) {}
487
488 bool doInitialization(Module &M, FunctionAnalysisManager *FAM = nullptr);
489 bool runOnModule(Module &M, ModuleAnalysisManager &AM,
490 ProfileSummaryInfo *_PSI);
491
492protected:
493 bool runOnFunction(Function &F, ModuleAnalysisManager &AM);
494 bool emitAnnotations(Function &F);
495 ErrorOr<uint64_t> getInstWeight(const Instruction &I) override;
496 const FunctionSamples *findCalleeFunctionSamples(const CallBase &I) const;
497 const FunctionSamples *
498 findFunctionSamples(const Instruction &I) const override;
499 std::vector<const FunctionSamples *>
500 findIndirectCallFunctionSamples(const Instruction &I, uint64_t &Sum) const;
501 void findExternalInlineCandidate(CallBase *CB, const FunctionSamples *Samples,
502 DenseSet<GlobalValue::GUID> &InlinedGUIDs,
503 uint64_t Threshold);
504 // Attempt to promote indirect call and also inline the promoted call
505 bool tryPromoteAndInlineCandidate(
506 Function &F, InlineCandidate &Candidate, uint64_t SumOrigin,
507 uint64_t &Sum, SmallVector<CallBase *, 8> *InlinedCallSites = nullptr);
508
509 bool inlineHotFunctions(Function &F,
510 DenseSet<GlobalValue::GUID> &InlinedGUIDs);
511 std::optional<InlineCost> getExternalInlineAdvisorCost(CallBase &CB);
512 bool getExternalInlineAdvisorShouldInline(CallBase &CB);
513 InlineCost shouldInlineCandidate(InlineCandidate &Candidate);
514 bool getInlineCandidate(InlineCandidate *NewCandidate, CallBase *CB);
515 bool
516 tryInlineCandidate(InlineCandidate &Candidate,
517 SmallVector<CallBase *, 8> *InlinedCallSites = nullptr);
518 bool
519 inlineHotFunctionsWithPriority(Function &F,
520 DenseSet<GlobalValue::GUID> &InlinedGUIDs);
521 // Inline cold/small functions in addition to hot ones
522 bool shouldInlineColdCallee(CallBase &CallInst);
523 void emitOptimizationRemarksForInlineCandidates(
524 const SmallVectorImpl<CallBase *> &Candidates, const Function &F,
525 bool Hot);
526 void promoteMergeNotInlinedContextSamples(
527 MapVector<CallBase *, const FunctionSamples *> NonInlinedCallSites,
528 const Function &F);
529 std::vector<Function *> buildFunctionOrder(Module &M, LazyCallGraph &CG);
530 std::unique_ptr<ProfiledCallGraph> buildProfiledCallGraph(Module &M);
531 void generateMDProfMetadata(Function &F);
532 bool rejectHighStalenessProfile(Module &M, ProfileSummaryInfo *PSI,
533 const SampleProfileMap &Profiles);
534 void removePseudoProbeInstsDiscriminator(Module &M);
535
536 /// Map from function name to Function *. Used to find the function from
537 /// the function name. If the function name contains suffix, additional
538 /// entry is added to map from the stripped name to the function if there
539 /// is one-to-one mapping.
540 HashKeyMap<std::unordered_map, FunctionId, Function *> SymbolMap;
541
542 /// Map from function name to profile name generated by call-graph based
543 /// profile fuzzy matching(--salvage-unused-profile).
544 HashKeyMap<std::unordered_map, FunctionId, FunctionId> FuncNameToProfNameMap;
545
546 std::function<AssumptionCache &(Function &)> GetAC;
547 std::function<TargetTransformInfo &(Function &)> GetTTI;
548 std::function<const TargetLibraryInfo &(Function &)> GetTLI;
549 LazyCallGraph &CG;
550
551 /// Profile tracker for different context.
552 std::unique_ptr<SampleContextTracker> ContextTracker;
553
554 /// Flag indicating which LTO/ThinLTO phase the pass is invoked in.
555 ///
556 /// We need to know the LTO phase because for example in ThinLTOPrelink
557 /// phase, in annotation, we should not promote indirect calls. Instead,
558 /// we will mark GUIDs that needs to be annotated to the function.
559 const ThinOrFullLTOPhase LTOPhase;
560 const std::string AnnotatedPassName;
561
562 /// Profle Symbol list tells whether a function name appears in the binary
563 /// used to generate the current profile.
564 std::shared_ptr<ProfileSymbolList> PSL;
565
566 // Information recorded when we declined to inline a call site
567 // because we have determined it is too cold is accumulated for
568 // each callee function. Initially this is just the entry count.
569 struct NotInlinedProfileInfo {
570 uint64_t entryCount;
571 };
572 DenseMap<Function *, NotInlinedProfileInfo> notInlinedCallInfo;
573
574 // GUIDToFuncNameMap saves the mapping from GUID to the symbol name, for
575 // all the function symbols defined or declared in current module.
576 DenseMap<uint64_t, StringRef> GUIDToFuncNameMap;
577
578 // All the Names used in FunctionSamples including outline function
579 // names, inline instance names and call target names.
580 StringSet<> NamesInProfile;
581 // MD5 version of NamesInProfile. Either NamesInProfile or GUIDsInProfile is
582 // populated, depends on whether the profile uses MD5. Because the name table
583 // generally contains several magnitude more entries than the number of
584 // functions, we do not want to convert all names from one form to another.
585 llvm::DenseSet<uint64_t> GUIDsInProfile;
586
587 // For symbol in profile symbol list, whether to regard their profiles
588 // to be accurate. It is mainly decided by existance of profile symbol
589 // list and -profile-accurate-for-symsinlist flag, but it can be
590 // overriden by -profile-sample-accurate or profile-sample-accurate
591 // attribute.
592 bool ProfAccForSymsInList;
593
594 bool DisableSampleProfileInlining;
595
596 bool UseFlattenedProfile;
597
598 // External inline advisor used to replay inline decision from remarks.
599 std::unique_ptr<InlineAdvisor> ExternalInlineAdvisor;
600
601 // A helper to implement the sample profile matching algorithm.
602 std::unique_ptr<SampleProfileMatcher> MatchingManager;
603
604private:
605 const char *getAnnotatedRemarkPassName() const {
606 return AnnotatedPassName.c_str();
607 }
608};
609} // end anonymous namespace
610
611namespace llvm {
612template <>
613inline bool SampleProfileInference<Function>::isExit(const BasicBlock *BB) {
614 return succ_empty(BB);
615}
616
617template <>
618inline void SampleProfileInference<Function>::findUnlikelyJumps(
619 const std::vector<const BasicBlockT *> &BasicBlocks,
620 BlockEdgeMap &Successors, FlowFunction &Func) {
621 for (auto &Jump : Func.Jumps) {
622 const auto *BB = BasicBlocks[Jump.Source];
623 const auto *Succ = BasicBlocks[Jump.Target];
624 const Instruction *TI = BB->getTerminator();
625 // Check if a block ends with InvokeInst and mark non-taken branch unlikely.
626 // In that case block Succ should be a landing pad
627 const auto &Succs = Successors[BB];
628 if (Succs.size() == 2 && Succs.back() == Succ) {
629 if (isa<InvokeInst>(TI)) {
630 Jump.IsUnlikely = true;
631 }
632 }
633 const Instruction *SuccTI = Succ->getTerminator();
634 // Check if the target block contains UnreachableInst and mark it unlikely
635 if (SuccTI->getNumSuccessors() == 0) {
636 if (isa<UnreachableInst>(SuccTI)) {
637 Jump.IsUnlikely = true;
638 }
639 }
640 }
641}
642
643template <>
645 Function &F) {
646 DT.reset(new DominatorTree);
647 DT->recalculate(F);
648
649 PDT.reset(new PostDominatorTree(F));
650
651 LI.reset(new LoopInfo);
652 LI->analyze(*DT);
653}
654} // namespace llvm
655
656ErrorOr<uint64_t> SampleProfileLoader::getInstWeight(const Instruction &Inst) {
658 return getProbeWeight(Inst);
659
660 const DebugLoc &DLoc = Inst.getDebugLoc();
661 if (!DLoc)
662 return std::error_code();
663
664 // Ignore all intrinsics, phinodes and branch instructions.
665 // Branch and phinodes instruction usually contains debug info from sources
666 // outside of the residing basic block, thus we ignore them during annotation.
667 if (isa<BranchInst>(Inst) || isa<IntrinsicInst>(Inst) || isa<PHINode>(Inst))
668 return std::error_code();
669
670 // For non-CS profile, if a direct call/invoke instruction is inlined in
671 // profile (findCalleeFunctionSamples returns non-empty result), but not
672 // inlined here, it means that the inlined callsite has no sample, thus the
673 // call instruction should have 0 count.
674 // For CS profile, the callsite count of previously inlined callees is
675 // populated with the entry count of the callees.
677 if (const auto *CB = dyn_cast<CallBase>(&Inst))
678 if (!CB->isIndirectCall() && findCalleeFunctionSamples(*CB))
679 return 0;
680
681 return getInstWeightImpl(Inst);
682}
683
684/// Get the FunctionSamples for a call instruction.
685///
686/// The FunctionSamples of a call/invoke instruction \p Inst is the inlined
687/// instance in which that call instruction is calling to. It contains
688/// all samples that resides in the inlined instance. We first find the
689/// inlined instance in which the call instruction is from, then we
690/// traverse its children to find the callsite with the matching
691/// location.
692///
693/// \param Inst Call/Invoke instruction to query.
694///
695/// \returns The FunctionSamples pointer to the inlined instance.
696const FunctionSamples *
697SampleProfileLoader::findCalleeFunctionSamples(const CallBase &Inst) const {
698 const DILocation *DIL = Inst.getDebugLoc();
699 if (!DIL) {
700 return nullptr;
701 }
702
703 StringRef CalleeName;
704 if (Function *Callee = Inst.getCalledFunction())
705 CalleeName = Callee->getName();
706
708 return ContextTracker->getCalleeContextSamplesFor(Inst, CalleeName);
709
710 const FunctionSamples *FS = findFunctionSamples(Inst);
711 if (FS == nullptr)
712 return nullptr;
713
714 return FS->findFunctionSamplesAt(FunctionSamples::getCallSiteIdentifier(DIL),
715 CalleeName, Reader->getRemapper(),
716 &FuncNameToProfNameMap);
717}
718
719/// Returns a vector of FunctionSamples that are the indirect call targets
720/// of \p Inst. The vector is sorted by the total number of samples. Stores
721/// the total call count of the indirect call in \p Sum.
722std::vector<const FunctionSamples *>
723SampleProfileLoader::findIndirectCallFunctionSamples(
724 const Instruction &Inst, uint64_t &Sum) const {
725 const DILocation *DIL = Inst.getDebugLoc();
726 std::vector<const FunctionSamples *> R;
727
728 if (!DIL) {
729 return R;
730 }
731
732 auto FSCompare = [](const FunctionSamples *L, const FunctionSamples *R) {
733 assert(L && R && "Expect non-null FunctionSamples");
734 if (L->getHeadSamplesEstimate() != R->getHeadSamplesEstimate())
735 return L->getHeadSamplesEstimate() > R->getHeadSamplesEstimate();
736 return L->getGUID() < R->getGUID();
737 };
738
740 auto CalleeSamples =
741 ContextTracker->getIndirectCalleeContextSamplesFor(DIL);
742 if (CalleeSamples.empty())
743 return R;
744
745 // For CSSPGO, we only use target context profile's entry count
746 // as that already includes both inlined callee and non-inlined ones..
747 Sum = 0;
748 for (const auto *const FS : CalleeSamples) {
749 Sum += FS->getHeadSamplesEstimate();
750 R.push_back(FS);
751 }
752 llvm::sort(R, FSCompare);
753 return R;
754 }
755
756 const FunctionSamples *FS = findFunctionSamples(Inst);
757 if (FS == nullptr)
758 return R;
759
761 Sum = 0;
762 if (auto T = FS->findCallTargetMapAt(CallSite))
763 for (const auto &T_C : *T)
764 Sum += T_C.second;
765 if (const FunctionSamplesMap *M = FS->findFunctionSamplesMapAt(CallSite)) {
766 if (M->empty())
767 return R;
768 for (const auto &NameFS : *M) {
769 Sum += NameFS.second.getHeadSamplesEstimate();
770 R.push_back(&NameFS.second);
771 }
772 llvm::sort(R, FSCompare);
773 }
774 return R;
775}
776
777const FunctionSamples *
778SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
780 std::optional<PseudoProbe> Probe = extractProbe(Inst);
781 if (!Probe)
782 return nullptr;
783 }
784
785 const DILocation *DIL = Inst.getDebugLoc();
786 if (!DIL)
787 return Samples;
788
789 auto it = DILocation2SampleMap.try_emplace(DIL,nullptr);
790 if (it.second) {
792 it.first->second = ContextTracker->getContextSamplesFor(DIL);
793 else
794 it.first->second = Samples->findFunctionSamples(
795 DIL, Reader->getRemapper(), &FuncNameToProfNameMap);
796 }
797 return it.first->second;
798}
799
800/// Check whether the indirect call promotion history of \p Inst allows
801/// the promotion for \p Candidate.
802/// If the profile count for the promotion candidate \p Candidate is
803/// NOMORE_ICP_MAGICNUM, it means \p Candidate has already been promoted
804/// for \p Inst. If we already have at least MaxNumPromotions
805/// NOMORE_ICP_MAGICNUM count values in the value profile of \p Inst, we
806/// cannot promote for \p Inst anymore.
807static bool doesHistoryAllowICP(const Instruction &Inst, StringRef Candidate) {
808 uint64_t TotalCount = 0;
809 auto ValueData = getValueProfDataFromInst(Inst, IPVK_IndirectCallTarget,
810 MaxNumPromotions, TotalCount, true);
811 // No valid value profile so no promoted targets have been recorded
812 // before. Ok to do ICP.
813 if (ValueData.empty())
814 return true;
815
816 unsigned NumPromoted = 0;
817 for (const auto &V : ValueData) {
818 if (V.Count != NOMORE_ICP_MAGICNUM)
819 continue;
820
821 // If the promotion candidate has NOMORE_ICP_MAGICNUM count in the
822 // metadata, it means the candidate has been promoted for this
823 // indirect call.
824 if (V.Value == Function::getGUIDAssumingExternalLinkage(Candidate))
825 return false;
826 NumPromoted++;
827 // If already have MaxNumPromotions promotion, don't do it anymore.
828 if (NumPromoted == MaxNumPromotions)
829 return false;
830 }
831 return true;
832}
833
834/// Update indirect call target profile metadata for \p Inst.
835/// Usually \p Sum is the sum of counts of all the targets for \p Inst.
836/// If it is 0, it means updateIDTMetaData is used to mark a
837/// certain target to be promoted already. If it is not zero,
838/// we expect to use it to update the total count in the value profile.
839static void
841 const SmallVectorImpl<InstrProfValueData> &CallTargets,
842 uint64_t Sum) {
843 // Bail out early if MaxNumPromotions is zero.
844 // This prevents allocating an array of zero length below.
845 //
846 // Note `updateIDTMetaData` is called in two places so check
847 // `MaxNumPromotions` inside it.
848 if (MaxNumPromotions == 0)
849 return;
850 // OldSum is the existing total count in the value profile data.
851 uint64_t OldSum = 0;
852 auto ValueData = getValueProfDataFromInst(Inst, IPVK_IndirectCallTarget,
853 MaxNumPromotions, OldSum, true);
854
855 DenseMap<uint64_t, uint64_t> ValueCountMap;
856 if (Sum == 0) {
857 assert((CallTargets.size() == 1 &&
858 CallTargets[0].Count == NOMORE_ICP_MAGICNUM) &&
859 "If sum is 0, assume only one element in CallTargets "
860 "with count being NOMORE_ICP_MAGICNUM");
861 // Initialize ValueCountMap with existing value profile data.
862 for (const auto &V : ValueData)
863 ValueCountMap[V.Value] = V.Count;
864 auto Pair =
865 ValueCountMap.try_emplace(CallTargets[0].Value, CallTargets[0].Count);
866 // If the target already exists in value profile, decrease the total
867 // count OldSum and reset the target's count to NOMORE_ICP_MAGICNUM.
868 if (!Pair.second) {
869 OldSum -= Pair.first->second;
870 Pair.first->second = NOMORE_ICP_MAGICNUM;
871 }
872 Sum = OldSum;
873 } else {
874 // Initialize ValueCountMap with existing NOMORE_ICP_MAGICNUM
875 // counts in the value profile.
876 for (const auto &V : ValueData) {
877 if (V.Count == NOMORE_ICP_MAGICNUM)
878 ValueCountMap[V.Value] = V.Count;
879 }
880
881 for (const auto &Data : CallTargets) {
882 auto Pair = ValueCountMap.try_emplace(Data.Value, Data.Count);
883 if (Pair.second)
884 continue;
885 // The target represented by Data.Value has already been promoted.
886 // Keep the count as NOMORE_ICP_MAGICNUM in the profile and decrease
887 // Sum by Data.Count.
888 assert(Sum >= Data.Count && "Sum should never be less than Data.Count");
889 Sum -= Data.Count;
890 }
891 }
892
894 for (const auto &ValueCount : ValueCountMap) {
895 NewCallTargets.emplace_back(
896 InstrProfValueData{ValueCount.first, ValueCount.second});
897 }
898
899 llvm::sort(NewCallTargets,
900 [](const InstrProfValueData &L, const InstrProfValueData &R) {
901 return std::tie(L.Count, L.Value) > std::tie(R.Count, R.Value);
902 });
903
904 uint32_t MaxMDCount =
905 std::min(NewCallTargets.size(), static_cast<size_t>(MaxNumPromotions));
906 annotateValueSite(*Inst.getParent()->getParent()->getParent(), Inst,
907 NewCallTargets, Sum, IPVK_IndirectCallTarget, MaxMDCount);
908}
909
910/// Attempt to promote indirect call and also inline the promoted call.
911///
912/// \param F Caller function.
913/// \param Candidate ICP and inline candidate.
914/// \param SumOrigin Original sum of target counts for indirect call before
915/// promoting given candidate.
916/// \param Sum Prorated sum of remaining target counts for indirect call
917/// after promoting given candidate.
918/// \param InlinedCallSite Output vector for new call sites exposed after
919/// inlining.
920bool SampleProfileLoader::tryPromoteAndInlineCandidate(
921 Function &F, InlineCandidate &Candidate, uint64_t SumOrigin, uint64_t &Sum,
922 SmallVector<CallBase *, 8> *InlinedCallSite) {
923 // Bail out early if sample-loader inliner is disabled.
924 if (DisableSampleProfileInlining)
925 return false;
926
927 // Bail out early if MaxNumPromotions is zero.
928 // This prevents allocating an array of zero length in callees below.
929 if (MaxNumPromotions == 0)
930 return false;
931 auto CalleeFunctionName = Candidate.CalleeSamples->getFunction();
932 auto R = SymbolMap.find(CalleeFunctionName);
933 if (R == SymbolMap.end() || !R->second)
934 return false;
935
936 auto &CI = *Candidate.CallInstr;
937 if (!doesHistoryAllowICP(CI, R->second->getName()))
938 return false;
939
940 const char *Reason = "Callee function not available";
941 // R->getValue() != &F is to prevent promoting a recursive call.
942 // If it is a recursive call, we do not inline it as it could bloat
943 // the code exponentially. There is way to better handle this, e.g.
944 // clone the caller first, and inline the cloned caller if it is
945 // recursive. As llvm does not inline recursive calls, we will
946 // simply ignore it instead of handling it explicitly.
947 if (!R->second->isDeclaration() && R->second->getSubprogram() &&
948 R->second->hasFnAttribute("use-sample-profile") &&
949 R->second != &F && isLegalToPromote(CI, R->second, &Reason)) {
950 // For promoted target, set its value with NOMORE_ICP_MAGICNUM count
951 // in the value profile metadata so the target won't be promoted again.
952 SmallVector<InstrProfValueData, 1> SortedCallTargets = {InstrProfValueData{
953 Function::getGUIDAssumingExternalLinkage(R->second->getName()),
955 updateIDTMetaData(CI, SortedCallTargets, 0);
956
957 auto *DI = &pgo::promoteIndirectCall(
958 CI, R->second, Candidate.CallsiteCount, Sum, false, ORE);
959 if (DI) {
960 Sum -= Candidate.CallsiteCount;
961 // Do not prorate the indirect callsite distribution since the original
962 // distribution will be used to scale down non-promoted profile target
963 // counts later. By doing this we lose track of the real callsite count
964 // for the leftover indirect callsite as a trade off for accurate call
965 // target counts.
966 // TODO: Ideally we would have two separate factors, one for call site
967 // counts and one is used to prorate call target counts.
968 // Do not update the promoted direct callsite distribution at this
969 // point since the original distribution combined with the callee profile
970 // will be used to prorate callsites from the callee if inlined. Once not
971 // inlined, the direct callsite distribution should be prorated so that
972 // the it will reflect the real callsite counts.
973 Candidate.CallInstr = DI;
974 if (isa<CallInst>(DI) || isa<InvokeInst>(DI)) {
975 bool Inlined = tryInlineCandidate(Candidate, InlinedCallSite);
976 if (!Inlined) {
977 // Prorate the direct callsite distribution so that it reflects real
978 // callsite counts.
980 *DI, static_cast<float>(Candidate.CallsiteCount) / SumOrigin);
981 }
982 return Inlined;
983 }
984 }
985 } else {
986 LLVM_DEBUG(dbgs() << "\nFailed to promote indirect call to "
988 Candidate.CallInstr->getName())<< " because "
989 << Reason << "\n");
990 }
991 return false;
992}
993
994bool SampleProfileLoader::shouldInlineColdCallee(CallBase &CallInst) {
996 return false;
997
998 Function *Callee = CallInst.getCalledFunction();
999 if (Callee == nullptr)
1000 return false;
1001
1002 InlineCost Cost = getInlineCost(CallInst, getInlineParams(), GetTTI(*Callee),
1003 GetAC, GetTLI);
1004
1005 if (Cost.isNever())
1006 return false;
1007
1008 if (Cost.isAlways())
1009 return true;
1010
1011 return Cost.getCost() <= SampleColdCallSiteThreshold;
1012}
1013
1014void SampleProfileLoader::emitOptimizationRemarksForInlineCandidates(
1015 const SmallVectorImpl<CallBase *> &Candidates, const Function &F,
1016 bool Hot) {
1017 for (auto *I : Candidates) {
1018 Function *CalledFunction = I->getCalledFunction();
1019 if (CalledFunction) {
1020 ORE->emit(OptimizationRemarkAnalysis(getAnnotatedRemarkPassName(),
1021 "InlineAttempt", I->getDebugLoc(),
1022 I->getParent())
1023 << "previous inlining reattempted for "
1024 << (Hot ? "hotness: '" : "size: '")
1025 << ore::NV("Callee", CalledFunction) << "' into '"
1026 << ore::NV("Caller", &F) << "'");
1027 }
1028 }
1029}
1030
1031void SampleProfileLoader::findExternalInlineCandidate(
1032 CallBase *CB, const FunctionSamples *Samples,
1033 DenseSet<GlobalValue::GUID> &InlinedGUIDs, uint64_t Threshold) {
1034
1035 // If ExternalInlineAdvisor(ReplayInlineAdvisor) wants to inline an external
1036 // function make sure it's imported
1037 if (CB && getExternalInlineAdvisorShouldInline(*CB)) {
1038 // Samples may not exist for replayed function, if so
1039 // just add the direct GUID and move on
1040 if (!Samples) {
1041 InlinedGUIDs.insert(Function::getGUIDAssumingExternalLinkage(
1042 CB->getCalledFunction()->getName()));
1043 return;
1044 }
1045 // Otherwise, drop the threshold to import everything that we can
1046 Threshold = 0;
1047 }
1048
1049 // In some rare cases, call instruction could be changed after being pushed
1050 // into inline candidate queue, this is because earlier inlining may expose
1051 // constant propagation which can change indirect call to direct call. When
1052 // this happens, we may fail to find matching function samples for the
1053 // candidate later, even if a match was found when the candidate was enqueued.
1054 if (!Samples)
1055 return;
1056
1057 // For AutoFDO profile, retrieve candidate profiles by walking over
1058 // the nested inlinee profiles.
1060 // Set threshold to zero to honor pre-inliner decision.
1062 Threshold = 0;
1063 Samples->findInlinedFunctions(InlinedGUIDs, SymbolMap, Threshold);
1064 return;
1065 }
1066
1067 ContextTrieNode *Caller = ContextTracker->getContextNodeForProfile(Samples);
1068 std::queue<ContextTrieNode *> CalleeList;
1069 CalleeList.push(Caller);
1070 while (!CalleeList.empty()) {
1071 ContextTrieNode *Node = CalleeList.front();
1072 CalleeList.pop();
1073 FunctionSamples *CalleeSample = Node->getFunctionSamples();
1074 // For CSSPGO profile, retrieve candidate profile by walking over the
1075 // trie built for context profile. Note that also take call targets
1076 // even if callee doesn't have a corresponding context profile.
1077 if (!CalleeSample)
1078 continue;
1079
1080 // If pre-inliner decision is used, honor that for importing as well.
1081 bool PreInline =
1084 if (!PreInline && CalleeSample->getHeadSamplesEstimate() < Threshold)
1085 continue;
1086
1087 Function *Func = SymbolMap.lookup(CalleeSample->getFunction());
1088 // Add to the import list only when it's defined out of module.
1089 if (!Func || Func->isDeclaration())
1090 InlinedGUIDs.insert(CalleeSample->getGUID());
1091
1092 // Import hot CallTargets, which may not be available in IR because full
1093 // profile annotation cannot be done until backend compilation in ThinLTO.
1094 for (const auto &BS : CalleeSample->getBodySamples())
1095 for (const auto &TS : BS.second.getCallTargets())
1096 if (TS.second > Threshold) {
1097 const Function *Callee = SymbolMap.lookup(TS.first);
1098 if (!Callee || Callee->isDeclaration())
1099 InlinedGUIDs.insert(TS.first.getHashCode());
1100 }
1101
1102 // Import hot child context profile associted with callees. Note that this
1103 // may have some overlap with the call target loop above, but doing this
1104 // based child context profile again effectively allow us to use the max of
1105 // entry count and call target count to determine importing.
1106 for (auto &Child : Node->getAllChildContext()) {
1107 ContextTrieNode *CalleeNode = &Child.second;
1108 CalleeList.push(CalleeNode);
1109 }
1110 }
1111}
1112
1113/// Iteratively inline hot callsites of a function.
1114///
1115/// Iteratively traverse all callsites of the function \p F, so as to
1116/// find out callsites with corresponding inline instances.
1117///
1118/// For such callsites,
1119/// - If it is hot enough, inline the callsites and adds callsites of the callee
1120/// into the caller. If the call is an indirect call, first promote
1121/// it to direct call. Each indirect call is limited with a single target.
1122///
1123/// - If a callsite is not inlined, merge the its profile to the outline
1124/// version (if --sample-profile-merge-inlinee is true), or scale the
1125/// counters of standalone function based on the profile of inlined
1126/// instances (if --sample-profile-merge-inlinee is false).
1127///
1128/// Later passes may consume the updated profiles.
1129///
1130/// \param F function to perform iterative inlining.
1131/// \param InlinedGUIDs a set to be updated to include all GUIDs that are
1132/// inlined in the profiled binary.
1133///
1134/// \returns True if there is any inline happened.
1135bool SampleProfileLoader::inlineHotFunctions(
1136 Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
1137 // ProfAccForSymsInList is used in callsiteIsHot. The assertion makes sure
1138 // Profile symbol list is ignored when profile-sample-accurate is on.
1139 assert((!ProfAccForSymsInList ||
1141 !F.hasFnAttribute("profile-sample-accurate"))) &&
1142 "ProfAccForSymsInList should be false when profile-sample-accurate "
1143 "is enabled");
1144
1145 MapVector<CallBase *, const FunctionSamples *> LocalNotInlinedCallSites;
1146 bool Changed = false;
1147 bool LocalChanged = true;
1148 while (LocalChanged) {
1149 LocalChanged = false;
1151 for (auto &BB : F) {
1152 bool Hot = false;
1153 SmallVector<CallBase *, 10> AllCandidates;
1154 SmallVector<CallBase *, 10> ColdCandidates;
1155 for (auto &I : BB) {
1156 const FunctionSamples *FS = nullptr;
1157 if (auto *CB = dyn_cast<CallBase>(&I)) {
1158 if (!isa<IntrinsicInst>(I)) {
1159 if ((FS = findCalleeFunctionSamples(*CB))) {
1160 assert((!FunctionSamples::UseMD5 || FS->GUIDToFuncNameMap) &&
1161 "GUIDToFuncNameMap has to be populated");
1162 AllCandidates.push_back(CB);
1163 if (FS->getHeadSamplesEstimate() > 0 ||
1165 LocalNotInlinedCallSites.insert({CB, FS});
1166 if (callsiteIsHot(FS, PSI, ProfAccForSymsInList))
1167 Hot = true;
1168 else if (shouldInlineColdCallee(*CB))
1169 ColdCandidates.push_back(CB);
1170 } else if (getExternalInlineAdvisorShouldInline(*CB)) {
1171 AllCandidates.push_back(CB);
1172 }
1173 }
1174 }
1175 }
1176 if (Hot || ExternalInlineAdvisor) {
1177 CIS.insert(CIS.begin(), AllCandidates.begin(), AllCandidates.end());
1178 emitOptimizationRemarksForInlineCandidates(AllCandidates, F, true);
1179 } else {
1180 CIS.insert(CIS.begin(), ColdCandidates.begin(), ColdCandidates.end());
1181 emitOptimizationRemarksForInlineCandidates(ColdCandidates, F, false);
1182 }
1183 }
1184 for (CallBase *I : CIS) {
1185 Function *CalledFunction = I->getCalledFunction();
1186 InlineCandidate Candidate = {I, LocalNotInlinedCallSites.lookup(I),
1187 0 /* dummy count */,
1188 1.0 /* dummy distribution factor */};
1189 // Do not inline recursive calls.
1190 if (CalledFunction == &F)
1191 continue;
1192 if (I->isIndirectCall()) {
1193 uint64_t Sum;
1194 for (const auto *FS : findIndirectCallFunctionSamples(*I, Sum)) {
1195 uint64_t SumOrigin = Sum;
1196 if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1197 findExternalInlineCandidate(I, FS, InlinedGUIDs,
1198 PSI->getOrCompHotCountThreshold());
1199 continue;
1200 }
1201 if (!callsiteIsHot(FS, PSI, ProfAccForSymsInList))
1202 continue;
1203
1204 Candidate = {I, FS, FS->getHeadSamplesEstimate(), 1.0};
1205 if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum)) {
1206 LocalNotInlinedCallSites.erase(I);
1207 LocalChanged = true;
1208 }
1209 }
1210 } else if (CalledFunction && CalledFunction->getSubprogram() &&
1211 !CalledFunction->isDeclaration()) {
1212 if (tryInlineCandidate(Candidate)) {
1213 LocalNotInlinedCallSites.erase(I);
1214 LocalChanged = true;
1215 }
1216 } else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1217 findExternalInlineCandidate(I, findCalleeFunctionSamples(*I),
1218 InlinedGUIDs,
1219 PSI->getOrCompHotCountThreshold());
1220 }
1221 }
1222 Changed |= LocalChanged;
1223 }
1224
1225 // For CS profile, profile for not inlined context will be merged when
1226 // base profile is being retrieved.
1228 promoteMergeNotInlinedContextSamples(LocalNotInlinedCallSites, F);
1229 return Changed;
1230}
1231
1232bool SampleProfileLoader::tryInlineCandidate(
1233 InlineCandidate &Candidate, SmallVector<CallBase *, 8> *InlinedCallSites) {
1234 // Do not attempt to inline a candidate if
1235 // --disable-sample-loader-inlining is true.
1236 if (DisableSampleProfileInlining)
1237 return false;
1238
1239 CallBase &CB = *Candidate.CallInstr;
1240 Function *CalledFunction = CB.getCalledFunction();
1241 assert(CalledFunction && "Expect a callee with definition");
1242 DebugLoc DLoc = CB.getDebugLoc();
1243 BasicBlock *BB = CB.getParent();
1244
1245 InlineCost Cost = shouldInlineCandidate(Candidate);
1246 if (Cost.isNever()) {
1247 ORE->emit(OptimizationRemarkAnalysis(getAnnotatedRemarkPassName(),
1248 "InlineFail", DLoc, BB)
1249 << "incompatible inlining");
1250 return false;
1251 }
1252
1253 if (!Cost)
1254 return false;
1255
1256 InlineFunctionInfo IFI(GetAC);
1257 IFI.UpdateProfile = false;
1258 InlineResult IR = InlineFunction(CB, IFI,
1259 /*MergeAttributes=*/true);
1260 if (!IR.isSuccess())
1261 return false;
1262
1263 // The call to InlineFunction erases I, so we can't pass it here.
1264 emitInlinedIntoBasedOnCost(*ORE, DLoc, BB, *CalledFunction, *BB->getParent(),
1265 Cost, true, getAnnotatedRemarkPassName());
1266
1267 // Now populate the list of newly exposed call sites.
1268 if (InlinedCallSites) {
1269 InlinedCallSites->clear();
1270 llvm::append_range(*InlinedCallSites, IFI.InlinedCallSites);
1271 }
1272
1274 ContextTracker->markContextSamplesInlined(Candidate.CalleeSamples);
1275 ++NumCSInlined;
1276
1277 // Prorate inlined probes for a duplicated inlining callsite which probably
1278 // has a distribution less than 100%. Samples for an inlinee should be
1279 // distributed among the copies of the original callsite based on each
1280 // callsite's distribution factor for counts accuracy. Note that an inlined
1281 // probe may come with its own distribution factor if it has been duplicated
1282 // in the inlinee body. The two factor are multiplied to reflect the
1283 // aggregation of duplication.
1284 if (Candidate.CallsiteDistribution < 1) {
1285 for (auto &I : IFI.InlinedCallSites) {
1286 if (std::optional<PseudoProbe> Probe = extractProbe(*I))
1287 setProbeDistributionFactor(*I, Probe->Factor *
1288 Candidate.CallsiteDistribution);
1289 }
1290 NumDuplicatedInlinesite++;
1291 }
1292
1293 return true;
1294}
1295
1296bool SampleProfileLoader::getInlineCandidate(InlineCandidate *NewCandidate,
1297 CallBase *CB) {
1298 assert(CB && "Expect non-null call instruction");
1299
1300 if (isa<IntrinsicInst>(CB))
1301 return false;
1302
1303 // Find the callee's profile. For indirect call, find hottest target profile.
1304 const FunctionSamples *CalleeSamples = findCalleeFunctionSamples(*CB);
1305 // If ExternalInlineAdvisor wants to inline this site, do so even
1306 // if Samples are not present.
1307 if (!CalleeSamples && !getExternalInlineAdvisorShouldInline(*CB))
1308 return false;
1309
1310 float Factor = 1.0;
1311 if (std::optional<PseudoProbe> Probe = extractProbe(*CB))
1312 Factor = Probe->Factor;
1313
1314 uint64_t CallsiteCount =
1315 CalleeSamples ? CalleeSamples->getHeadSamplesEstimate() * Factor : 0;
1316 *NewCandidate = {CB, CalleeSamples, CallsiteCount, Factor};
1317 return true;
1318}
1319
1320std::optional<InlineCost>
1321SampleProfileLoader::getExternalInlineAdvisorCost(CallBase &CB) {
1322 std::unique_ptr<InlineAdvice> Advice = nullptr;
1323 if (ExternalInlineAdvisor) {
1324 Advice = ExternalInlineAdvisor->getAdvice(CB);
1325 if (Advice) {
1326 if (!Advice->isInliningRecommended()) {
1327 Advice->recordUnattemptedInlining();
1328 return InlineCost::getNever("not previously inlined");
1329 }
1330 Advice->recordInlining();
1331 return InlineCost::getAlways("previously inlined");
1332 }
1333 }
1334
1335 return {};
1336}
1337
1338bool SampleProfileLoader::getExternalInlineAdvisorShouldInline(CallBase &CB) {
1339 std::optional<InlineCost> Cost = getExternalInlineAdvisorCost(CB);
1340 return Cost ? !!*Cost : false;
1341}
1342
1343InlineCost
1344SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) {
1345 if (std::optional<InlineCost> ReplayCost =
1346 getExternalInlineAdvisorCost(*Candidate.CallInstr))
1347 return *ReplayCost;
1348 // Adjust threshold based on call site hotness, only do this for callsite
1349 // prioritized inliner because otherwise cost-benefit check is done earlier.
1350 int SampleThreshold = SampleColdCallSiteThreshold;
1352 if (Candidate.CallsiteCount > PSI->getHotCountThreshold())
1353 SampleThreshold = SampleHotCallSiteThreshold;
1354 else if (!ProfileSizeInline)
1355 return InlineCost::getNever("cold callsite");
1356 }
1357
1358 Function *Callee = Candidate.CallInstr->getCalledFunction();
1359 assert(Callee && "Expect a definition for inline candidate of direct call");
1360
1361 InlineParams Params = getInlineParams();
1362 // We will ignore the threshold from inline cost, so always get full cost.
1363 Params.ComputeFullInlineCost = true;
1365 // Checks if there is anything in the reachable portion of the callee at
1366 // this callsite that makes this inlining potentially illegal. Need to
1367 // set ComputeFullInlineCost, otherwise getInlineCost may return early
1368 // when cost exceeds threshold without checking all IRs in the callee.
1369 // The acutal cost does not matter because we only checks isNever() to
1370 // see if it is legal to inline the callsite.
1371 InlineCost Cost = getInlineCost(*Candidate.CallInstr, Callee, Params,
1372 GetTTI(*Callee), GetAC, GetTLI);
1373
1374 // Honor always inline and never inline from call analyzer
1375 if (Cost.isNever() || Cost.isAlways())
1376 return Cost;
1377
1378 // With CSSPGO, the preinliner in llvm-profgen can estimate global inline
1379 // decisions based on hotness as well as accurate function byte sizes for
1380 // given context using function/inlinee sizes from previous build. It
1381 // stores the decision in profile, and also adjust/merge context profile
1382 // aiming at better context-sensitive post-inline profile quality, assuming
1383 // all inline decision estimates are going to be honored by compiler. Here
1384 // we replay that inline decision under `sample-profile-use-preinliner`.
1385 // Note that we don't need to handle negative decision from preinliner as
1386 // context profile for not inlined calls are merged by preinliner already.
1387 if (UsePreInlinerDecision && Candidate.CalleeSamples) {
1388 // Once two node are merged due to promotion, we're losing some context
1389 // so the original context-sensitive preinliner decision should be ignored
1390 // for SyntheticContext.
1391 SampleContext &Context = Candidate.CalleeSamples->getContext();
1392 if (!Context.hasState(SyntheticContext) &&
1393 Context.hasAttribute(ContextShouldBeInlined))
1394 return InlineCost::getAlways("preinliner");
1395 }
1396
1397 // For old FDO inliner, we inline the call site if it is below hot threshold,
1398 // even if the function is hot based on sample profile data. This is to
1399 // prevent huge functions from being inlined.
1402 }
1403
1404 // Otherwise only use the cost from call analyzer, but overwite threshold with
1405 // Sample PGO threshold.
1406 return InlineCost::get(Cost.getCost(), SampleThreshold);
1407}
1408
1409bool SampleProfileLoader::inlineHotFunctionsWithPriority(
1410 Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
1411 // ProfAccForSymsInList is used in callsiteIsHot. The assertion makes sure
1412 // Profile symbol list is ignored when profile-sample-accurate is on.
1413 assert((!ProfAccForSymsInList ||
1415 !F.hasFnAttribute("profile-sample-accurate"))) &&
1416 "ProfAccForSymsInList should be false when profile-sample-accurate "
1417 "is enabled");
1418
1419 // Populating worklist with initial call sites from root inliner, along
1420 // with call site weights.
1421 CandidateQueue CQueue;
1422 InlineCandidate NewCandidate;
1423 for (auto &BB : F) {
1424 for (auto &I : BB) {
1425 auto *CB = dyn_cast<CallBase>(&I);
1426 if (!CB)
1427 continue;
1428 if (getInlineCandidate(&NewCandidate, CB))
1429 CQueue.push(NewCandidate);
1430 }
1431 }
1432
1433 // Cap the size growth from profile guided inlining. This is needed even
1434 // though cost of each inline candidate already accounts for callee size,
1435 // because with top-down inlining, we can grow inliner size significantly
1436 // with large number of smaller inlinees each pass the cost check.
1438 "Max inline size limit should not be smaller than min inline size "
1439 "limit.");
1440 unsigned SizeLimit = F.getInstructionCount() * ProfileInlineGrowthLimit;
1441 SizeLimit = std::min(SizeLimit, (unsigned)ProfileInlineLimitMax);
1442 SizeLimit = std::max(SizeLimit, (unsigned)ProfileInlineLimitMin);
1443 if (ExternalInlineAdvisor)
1444 SizeLimit = std::numeric_limits<unsigned>::max();
1445
1446 MapVector<CallBase *, const FunctionSamples *> LocalNotInlinedCallSites;
1447
1448 // Perform iterative BFS call site prioritized inlining
1449 bool Changed = false;
1450 while (!CQueue.empty() && F.getInstructionCount() < SizeLimit) {
1451 InlineCandidate Candidate = CQueue.top();
1452 CQueue.pop();
1453 CallBase *I = Candidate.CallInstr;
1454 Function *CalledFunction = I->getCalledFunction();
1455
1456 if (CalledFunction == &F)
1457 continue;
1458 if (I->isIndirectCall()) {
1459 uint64_t Sum = 0;
1460 auto CalleeSamples = findIndirectCallFunctionSamples(*I, Sum);
1461 uint64_t SumOrigin = Sum;
1462 Sum *= Candidate.CallsiteDistribution;
1463 unsigned ICPCount = 0;
1464 for (const auto *FS : CalleeSamples) {
1465 // TODO: Consider disable pre-lTO ICP for MonoLTO as well
1466 if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1467 findExternalInlineCandidate(I, FS, InlinedGUIDs,
1468 PSI->getOrCompHotCountThreshold());
1469 continue;
1470 }
1471 uint64_t EntryCountDistributed =
1472 FS->getHeadSamplesEstimate() * Candidate.CallsiteDistribution;
1473 // In addition to regular inline cost check, we also need to make sure
1474 // ICP isn't introducing excessive speculative checks even if individual
1475 // target looks beneficial to promote and inline. That means we should
1476 // only do ICP when there's a small number dominant targets.
1477 if (ICPCount >= ProfileICPRelativeHotnessSkip &&
1478 EntryCountDistributed * 100 < SumOrigin * ProfileICPRelativeHotness)
1479 break;
1480 // TODO: Fix CallAnalyzer to handle all indirect calls.
1481 // For indirect call, we don't run CallAnalyzer to get InlineCost
1482 // before actual inlining. This is because we could see two different
1483 // types from the same definition, which makes CallAnalyzer choke as
1484 // it's expecting matching parameter type on both caller and callee
1485 // side. See example from PR18962 for the triggering cases (the bug was
1486 // fixed, but we generate different types).
1487 if (!PSI->isHotCount(EntryCountDistributed))
1488 break;
1489 SmallVector<CallBase *, 8> InlinedCallSites;
1490 // Attach function profile for promoted indirect callee, and update
1491 // call site count for the promoted inline candidate too.
1492 Candidate = {I, FS, EntryCountDistributed,
1493 Candidate.CallsiteDistribution};
1494 if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum,
1495 &InlinedCallSites)) {
1496 for (auto *CB : InlinedCallSites) {
1497 if (getInlineCandidate(&NewCandidate, CB))
1498 CQueue.emplace(NewCandidate);
1499 }
1500 ICPCount++;
1501 Changed = true;
1502 } else if (!ContextTracker) {
1503 LocalNotInlinedCallSites.insert({I, FS});
1504 }
1505 }
1506 } else if (CalledFunction && CalledFunction->getSubprogram() &&
1507 !CalledFunction->isDeclaration()) {
1508 SmallVector<CallBase *, 8> InlinedCallSites;
1509 if (tryInlineCandidate(Candidate, &InlinedCallSites)) {
1510 for (auto *CB : InlinedCallSites) {
1511 if (getInlineCandidate(&NewCandidate, CB))
1512 CQueue.emplace(NewCandidate);
1513 }
1514 Changed = true;
1515 } else if (!ContextTracker) {
1516 LocalNotInlinedCallSites.insert({I, Candidate.CalleeSamples});
1517 }
1518 } else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
1519 findExternalInlineCandidate(I, findCalleeFunctionSamples(*I),
1520 InlinedGUIDs,
1521 PSI->getOrCompHotCountThreshold());
1522 }
1523 }
1524
1525 if (!CQueue.empty()) {
1526 if (SizeLimit == (unsigned)ProfileInlineLimitMax)
1527 ++NumCSInlinedHitMaxLimit;
1528 else if (SizeLimit == (unsigned)ProfileInlineLimitMin)
1529 ++NumCSInlinedHitMinLimit;
1530 else
1531 ++NumCSInlinedHitGrowthLimit;
1532 }
1533
1534 // For CS profile, profile for not inlined context will be merged when
1535 // base profile is being retrieved.
1537 promoteMergeNotInlinedContextSamples(LocalNotInlinedCallSites, F);
1538 return Changed;
1539}
1540
1541void SampleProfileLoader::promoteMergeNotInlinedContextSamples(
1542 MapVector<CallBase *, const FunctionSamples *> NonInlinedCallSites,
1543 const Function &F) {
1544 // Accumulate not inlined callsite information into notInlinedSamples
1545 for (const auto &Pair : NonInlinedCallSites) {
1546 CallBase *I = Pair.first;
1547 Function *Callee = I->getCalledFunction();
1548 if (!Callee || Callee->isDeclaration())
1549 continue;
1550
1551 ORE->emit(
1552 OptimizationRemarkAnalysis(getAnnotatedRemarkPassName(), "NotInline",
1553 I->getDebugLoc(), I->getParent())
1554 << "previous inlining not repeated: '" << ore::NV("Callee", Callee)
1555 << "' into '" << ore::NV("Caller", &F) << "'");
1556
1557 ++NumCSNotInlined;
1558 const FunctionSamples *FS = Pair.second;
1559 if (FS->getTotalSamples() == 0 && FS->getHeadSamplesEstimate() == 0) {
1560 continue;
1561 }
1562
1563 // Do not merge a context that is already duplicated into the base profile.
1564 if (FS->getContext().hasAttribute(sampleprof::ContextDuplicatedIntoBase))
1565 continue;
1566
1567 if (ProfileMergeInlinee) {
1568 // A function call can be replicated by optimizations like callsite
1569 // splitting or jump threading and the replicates end up sharing the
1570 // sample nested callee profile instead of slicing the original
1571 // inlinee's profile. We want to do merge exactly once by filtering out
1572 // callee profiles with a non-zero head sample count.
1573 if (FS->getHeadSamples() == 0) {
1574 // Use entry samples as head samples during the merge, as inlinees
1575 // don't have head samples.
1576 const_cast<FunctionSamples *>(FS)->addHeadSamples(
1577 FS->getHeadSamplesEstimate());
1578
1579 // Note that we have to do the merge right after processing function.
1580 // This allows OutlineFS's profile to be used for annotation during
1581 // top-down processing of functions' annotation.
1582 FunctionSamples *OutlineFS = Reader->getSamplesFor(*Callee);
1583 // If outlined function does not exist in the profile, add it to a
1584 // separate map so that it does not rehash the original profile.
1585 if (!OutlineFS)
1586 OutlineFS = &OutlineFunctionSamples[
1587 FunctionId(FunctionSamples::getCanonicalFnName(Callee->getName()))];
1588 OutlineFS->merge(*FS, 1);
1589 // Set outlined profile to be synthetic to not bias the inliner.
1590 OutlineFS->setContextSynthetic();
1591 }
1592 } else {
1593 auto pair =
1594 notInlinedCallInfo.try_emplace(Callee, NotInlinedProfileInfo{0});
1595 pair.first->second.entryCount += FS->getHeadSamplesEstimate();
1596 }
1597 }
1598}
1599
1600/// Returns the sorted CallTargetMap \p M by count in descending order.
1604 for (const auto &I : SampleRecord::sortCallTargets(M)) {
1605 R.emplace_back(
1606 InstrProfValueData{I.first.getHashCode(), I.second});
1607 }
1608 return R;
1609}
1610
1611// Generate MD_prof metadata for every branch instruction using the
1612// edge weights computed during propagation.
1613void SampleProfileLoader::generateMDProfMetadata(Function &F) {
1614 // Generate MD_prof metadata for every branch instruction using the
1615 // edge weights computed during propagation.
1616 LLVM_DEBUG(dbgs() << "\nPropagation complete. Setting branch weights\n");
1617 LLVMContext &Ctx = F.getContext();
1618 MDBuilder MDB(Ctx);
1619 for (auto &BI : F) {
1620 BasicBlock *BB = &BI;
1621
1622 if (BlockWeights[BB]) {
1623 for (auto &I : *BB) {
1624 if (!isa<CallInst>(I) && !isa<InvokeInst>(I))
1625 continue;
1627 const DebugLoc &DLoc = I.getDebugLoc();
1628 if (!DLoc)
1629 continue;
1630 const DILocation *DIL = DLoc;
1631 const FunctionSamples *FS = findFunctionSamples(I);
1632 if (!FS)
1633 continue;
1635 ErrorOr<SampleRecord::CallTargetMap> T =
1636 FS->findCallTargetMapAt(CallSite);
1637 if (!T || T.get().empty())
1638 continue;
1640 // Prorate the callsite counts based on the pre-ICP distribution
1641 // factor to reflect what is already done to the callsite before
1642 // ICP, such as calliste cloning.
1643 if (std::optional<PseudoProbe> Probe = extractProbe(I)) {
1644 if (Probe->Factor < 1)
1645 T = SampleRecord::adjustCallTargets(T.get(), Probe->Factor);
1646 }
1647 }
1648 SmallVector<InstrProfValueData, 2> SortedCallTargets =
1650 uint64_t Sum = 0;
1651 for (const auto &C : T.get())
1652 Sum += C.second;
1653 // With CSSPGO all indirect call targets are counted torwards the
1654 // original indirect call site in the profile, including both
1655 // inlined and non-inlined targets.
1657 if (const FunctionSamplesMap *M =
1658 FS->findFunctionSamplesMapAt(CallSite)) {
1659 for (const auto &NameFS : *M)
1660 Sum += NameFS.second.getHeadSamplesEstimate();
1661 }
1662 }
1663 if (Sum)
1664 updateIDTMetaData(I, SortedCallTargets, Sum);
1665 else if (OverwriteExistingWeights)
1666 I.setMetadata(LLVMContext::MD_prof, nullptr);
1667 } else if (!isa<IntrinsicInst>(&I)) {
1669 I, ArrayRef<uint32_t>{static_cast<uint32_t>(BlockWeights[BB])},
1670 /*IsExpected=*/false);
1671 }
1672 }
1674 // Set profile metadata (possibly annotated by LTO prelink) to zero or
1675 // clear it for cold code.
1676 for (auto &I : *BB) {
1677 if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
1679 I.setMetadata(LLVMContext::MD_prof, nullptr);
1680 } else {
1681 setBranchWeights(I, ArrayRef<uint32_t>{uint32_t(0)},
1682 /*IsExpected=*/false);
1683 }
1684 }
1685 }
1686 }
1687
1688 Instruction *TI = BB->getTerminator();
1689 if (TI->getNumSuccessors() == 1)
1690 continue;
1691 if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI) &&
1693 continue;
1694
1695 DebugLoc BranchLoc = TI->getDebugLoc();
1696 LLVM_DEBUG(dbgs() << "\nGetting weights for branch at line "
1697 << ((BranchLoc) ? Twine(BranchLoc.getLine())
1698 : Twine("<UNKNOWN LOCATION>"))
1699 << ".\n");
1700 SmallVector<uint32_t, 4> Weights;
1701 uint32_t MaxWeight = 0;
1702 Instruction *MaxDestInst;
1703 // Since profi treats multiple edges (multiway branches) as a single edge,
1704 // we need to distribute the computed weight among the branches. We do
1705 // this by evenly splitting the edge weight among destinations.
1706 DenseMap<const BasicBlock *, uint64_t> EdgeMultiplicity;
1707 std::vector<uint64_t> EdgeIndex;
1709 EdgeIndex.resize(TI->getNumSuccessors());
1710 for (unsigned I = 0; I < TI->getNumSuccessors(); ++I) {
1711 const BasicBlock *Succ = TI->getSuccessor(I);
1712 EdgeIndex[I] = EdgeMultiplicity[Succ];
1713 EdgeMultiplicity[Succ]++;
1714 }
1715 }
1716 for (unsigned I = 0; I < TI->getNumSuccessors(); ++I) {
1717 BasicBlock *Succ = TI->getSuccessor(I);
1718 Edge E = std::make_pair(BB, Succ);
1719 uint64_t Weight = EdgeWeights[E];
1720 LLVM_DEBUG(dbgs() << "\t"; printEdgeWeight(dbgs(), E));
1721 // Use uint32_t saturated arithmetic to adjust the incoming weights,
1722 // if needed. Sample counts in profiles are 64-bit unsigned values,
1723 // but internally branch weights are expressed as 32-bit values.
1724 if (Weight > std::numeric_limits<uint32_t>::max()) {
1725 LLVM_DEBUG(dbgs() << " (saturated due to uint32_t overflow)\n");
1726 Weight = std::numeric_limits<uint32_t>::max();
1727 }
1728 if (!SampleProfileUseProfi) {
1729 // Weight is added by one to avoid propagation errors introduced by
1730 // 0 weights.
1731 Weights.push_back(static_cast<uint32_t>(
1732 Weight == std::numeric_limits<uint32_t>::max() ? Weight
1733 : Weight + 1));
1734 } else {
1735 // Profi creates proper weights that do not require "+1" adjustments but
1736 // we evenly split the weight among branches with the same destination.
1737 uint64_t W = Weight / EdgeMultiplicity[Succ];
1738 // Rounding up, if needed, so that first branches are hotter.
1739 if (EdgeIndex[I] < Weight % EdgeMultiplicity[Succ])
1740 W++;
1741 Weights.push_back(static_cast<uint32_t>(W));
1742 }
1743 if (Weight != 0) {
1744 if (Weight > MaxWeight) {
1745 MaxWeight = Weight;
1746 MaxDestInst = &*Succ->getFirstNonPHIOrDbgOrLifetime();
1747 }
1748 }
1749 }
1750
1751 misexpect::checkExpectAnnotations(*TI, Weights, /*IsFrontend=*/false);
1752
1753 uint64_t TempWeight;
1754 // Only set weights if there is at least one non-zero weight.
1755 // In any other case, let the analyzer set weights.
1756 // Do not set weights if the weights are present unless under
1757 // OverwriteExistingWeights. In ThinLTO, the profile annotation is done
1758 // twice. If the first annotation already set the weights, the second pass
1759 // does not need to set it. With OverwriteExistingWeights, Blocks with zero
1760 // weight should have their existing metadata (possibly annotated by LTO
1761 // prelink) cleared.
1762 if (MaxWeight > 0 &&
1763 (!TI->extractProfTotalWeight(TempWeight) || OverwriteExistingWeights)) {
1764 LLVM_DEBUG(dbgs() << "SUCCESS. Found non-zero weights.\n");
1765 setBranchWeights(*TI, Weights, /*IsExpected=*/false);
1766 ORE->emit([&]() {
1767 return OptimizationRemark(DEBUG_TYPE, "PopularDest", MaxDestInst)
1768 << "most popular destination for conditional branches at "
1769 << ore::NV("CondBranchesLoc", BranchLoc);
1770 });
1771 } else {
1773 TI->setMetadata(LLVMContext::MD_prof, nullptr);
1774 LLVM_DEBUG(dbgs() << "CLEARED. All branch weights are zero.\n");
1775 } else {
1776 LLVM_DEBUG(dbgs() << "SKIPPED. All branch weights are zero.\n");
1777 }
1778 }
1779 }
1780}
1781
1782/// Once all the branch weights are computed, we emit the MD_prof
1783/// metadata on BB using the computed values for each of its branches.
1784///
1785/// \param F The function to query.
1786///
1787/// \returns true if \p F was modified. Returns false, otherwise.
1788bool SampleProfileLoader::emitAnnotations(Function &F) {
1789 bool Changed = false;
1790
1792 LLVM_DEBUG({
1793 if (!ProbeManager->getDesc(F))
1794 dbgs() << "Probe descriptor missing for Function " << F.getName()
1795 << "\n";
1796 });
1797
1798 if (ProbeManager->profileIsValid(F, *Samples)) {
1799 ++NumMatchedProfile;
1800 } else {
1801 ++NumMismatchedProfile;
1802 LLVM_DEBUG(
1803 dbgs() << "Profile is invalid due to CFG mismatch for Function "
1804 << F.getName() << "\n");
1806 return false;
1807 }
1808 } else {
1809 if (getFunctionLoc(F) == 0)
1810 return false;
1811
1812 LLVM_DEBUG(dbgs() << "Line number for the first instruction in "
1813 << F.getName() << ": " << getFunctionLoc(F) << "\n");
1814 }
1815
1816 DenseSet<GlobalValue::GUID> InlinedGUIDs;
1818 Changed |= inlineHotFunctionsWithPriority(F, InlinedGUIDs);
1819 else
1820 Changed |= inlineHotFunctions(F, InlinedGUIDs);
1821
1822 Changed |= computeAndPropagateWeights(F, InlinedGUIDs);
1823
1824 if (Changed)
1825 generateMDProfMetadata(F);
1826
1827 emitCoverageRemarks(F);
1828 return Changed;
1829}
1830
1831std::unique_ptr<ProfiledCallGraph>
1832SampleProfileLoader::buildProfiledCallGraph(Module &M) {
1833 std::unique_ptr<ProfiledCallGraph> ProfiledCG;
1835 ProfiledCG = std::make_unique<ProfiledCallGraph>(*ContextTracker);
1836 else
1837 ProfiledCG = std::make_unique<ProfiledCallGraph>(Reader->getProfiles());
1838
1839 // Add all functions into the profiled call graph even if they are not in
1840 // the profile. This makes sure functions missing from the profile still
1841 // gets a chance to be processed.
1842 for (Function &F : M) {
1844 continue;
1845 ProfiledCG->addProfiledFunction(
1847 }
1848
1849 return ProfiledCG;
1850}
1851
1852std::vector<Function *>
1853SampleProfileLoader::buildFunctionOrder(Module &M, LazyCallGraph &CG) {
1854 std::vector<Function *> FunctionOrderList;
1855 FunctionOrderList.reserve(M.size());
1856
1858 errs() << "WARNING: -use-profiled-call-graph ignored, should be used "
1859 "together with -sample-profile-top-down-load.\n";
1860
1861 if (!ProfileTopDownLoad) {
1862 if (ProfileMergeInlinee) {
1863 // Disable ProfileMergeInlinee if profile is not loaded in top down order,
1864 // because the profile for a function may be used for the profile
1865 // annotation of its outline copy before the profile merging of its
1866 // non-inlined inline instances, and that is not the way how
1867 // ProfileMergeInlinee is supposed to work.
1868 ProfileMergeInlinee = false;
1869 }
1870
1871 for (Function &F : M)
1873 FunctionOrderList.push_back(&F);
1874 return FunctionOrderList;
1875 }
1876
1879 // Use profiled call edges to augment the top-down order. There are cases
1880 // that the top-down order computed based on the static call graph doesn't
1881 // reflect real execution order. For example
1882 //
1883 // 1. Incomplete static call graph due to unknown indirect call targets.
1884 // Adjusting the order by considering indirect call edges from the
1885 // profile can enable the inlining of indirect call targets by allowing
1886 // the caller processed before them.
1887 // 2. Mutual call edges in an SCC. The static processing order computed for
1888 // an SCC may not reflect the call contexts in the context-sensitive
1889 // profile, thus may cause potential inlining to be overlooked. The
1890 // function order in one SCC is being adjusted to a top-down order based
1891 // on the profile to favor more inlining. This is only a problem with CS
1892 // profile.
1893 // 3. Transitive indirect call edges due to inlining. When a callee function
1894 // (say B) is inlined into a caller function (say A) in LTO prelink,
1895 // every call edge originated from the callee B will be transferred to
1896 // the caller A. If any transferred edge (say A->C) is indirect, the
1897 // original profiled indirect edge B->C, even if considered, would not
1898 // enforce a top-down order from the caller A to the potential indirect
1899 // call target C in LTO postlink since the inlined callee B is gone from
1900 // the static call graph.
1901 // 4. #3 can happen even for direct call targets, due to functions defined
1902 // in header files. A header function (say A), when included into source
1903 // files, is defined multiple times but only one definition survives due
1904 // to ODR. Therefore, the LTO prelink inlining done on those dropped
1905 // definitions can be useless based on a local file scope. More
1906 // importantly, the inlinee (say B), once fully inlined to a
1907 // to-be-dropped A, will have no profile to consume when its outlined
1908 // version is compiled. This can lead to a profile-less prelink
1909 // compilation for the outlined version of B which may be called from
1910 // external modules. while this isn't easy to fix, we rely on the
1911 // postlink AutoFDO pipeline to optimize B. Since the survived copy of
1912 // the A can be inlined in its local scope in prelink, it may not exist
1913 // in the merged IR in postlink, and we'll need the profiled call edges
1914 // to enforce a top-down order for the rest of the functions.
1915 //
1916 // Considering those cases, a profiled call graph completely independent of
1917 // the static call graph is constructed based on profile data, where
1918 // function objects are not even needed to handle case #3 and case 4.
1919 //
1920 // Note that static callgraph edges are completely ignored since they
1921 // can be conflicting with profiled edges for cyclic SCCs and may result in
1922 // an SCC order incompatible with profile-defined one. Using strictly
1923 // profile order ensures a maximum inlining experience. On the other hand,
1924 // static call edges are not so important when they don't correspond to a
1925 // context in the profile.
1926
1927 std::unique_ptr<ProfiledCallGraph> ProfiledCG = buildProfiledCallGraph(M);
1928 scc_iterator<ProfiledCallGraph *> CGI = scc_begin(ProfiledCG.get());
1929 while (!CGI.isAtEnd()) {
1930 auto Range = *CGI;
1931 if (SortProfiledSCC) {
1932 // Sort nodes in one SCC based on callsite hotness.
1933 scc_member_iterator<ProfiledCallGraph *> SI(*CGI);
1934 Range = *SI;
1935 }
1936 for (auto *Node : Range) {
1937 Function *F = SymbolMap.lookup(Node->Name);
1938 if (F && !skipProfileForFunction(*F))
1939 FunctionOrderList.push_back(F);
1940 }
1941 ++CGI;
1942 }
1943 std::reverse(FunctionOrderList.begin(), FunctionOrderList.end());
1944 } else
1945 buildTopDownFuncOrder(CG, FunctionOrderList);
1946
1947 LLVM_DEBUG({
1948 dbgs() << "Function processing order:\n";
1949 for (auto F : FunctionOrderList) {
1950 dbgs() << F->getName() << "\n";
1951 }
1952 });
1953
1954 return FunctionOrderList;
1955}
1956
1957bool SampleProfileLoader::doInitialization(Module &M,
1959 auto &Ctx = M.getContext();
1960
1961 auto ReaderOrErr = SampleProfileReader::create(
1962 Filename, Ctx, *FS, FSDiscriminatorPass::Base, RemappingFilename);
1963 if (std::error_code EC = ReaderOrErr.getError()) {
1964 std::string Msg = "Could not open profile: " + EC.message();
1965 Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
1966 return false;
1967 }
1968 Reader = std::move(ReaderOrErr.get());
1969 Reader->setSkipFlatProf(LTOPhase == ThinOrFullLTOPhase::ThinLTOPostLink);
1970 // set module before reading the profile so reader may be able to only
1971 // read the function profiles which are used by the current module.
1972 Reader->setModule(&M);
1973 if (std::error_code EC = Reader->read()) {
1974 std::string Msg = "profile reading failed: " + EC.message();
1975 Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
1976 return false;
1977 }
1978
1979 PSL = Reader->getProfileSymbolList();
1980
1982 DisableSampleProfileInlining = DisableSampleLoaderInlining;
1983
1984 if (UseFlattenedProfile)
1985 ProfileConverter::flattenProfile(Reader->getProfiles(),
1986 Reader->profileIsCS());
1987
1988 // While profile-sample-accurate is on, ignore symbol list.
1989 ProfAccForSymsInList =
1991 if (ProfAccForSymsInList) {
1992 NamesInProfile.clear();
1993 GUIDsInProfile.clear();
1994 if (auto NameTable = Reader->getNameTable()) {
1996 for (auto Name : *NameTable)
1997 GUIDsInProfile.insert(Name.getHashCode());
1998 } else {
1999 for (auto Name : *NameTable)
2000 NamesInProfile.insert(Name.stringRef());
2001 }
2002 }
2003 CoverageTracker.setProfAccForSymsInList(true);
2004 }
2005
2006 if (FAM && !ProfileInlineReplayFile.empty()) {
2007 ExternalInlineAdvisor = getReplayInlineAdvisor(
2008 M, *FAM, Ctx, /*OriginalAdvisor=*/nullptr,
2009 ReplayInlinerSettings{ProfileInlineReplayFile,
2013 /*EmitRemarks=*/false, InlineContext{LTOPhase, InlinePass::ReplaySampleProfileInliner});
2014 }
2015
2016 // Apply tweaks if context-sensitive or probe-based profile is available.
2017 if (Reader->profileIsCS() || Reader->profileIsPreInlined() ||
2018 Reader->profileIsProbeBased()) {
2022 SampleProfileUseProfi = true;
2025 // Enable priority-base inliner and size inline by default for CSSPGO.
2027 ProfileSizeInline = true;
2030 // For CSSPGO, we also allow recursive inline to best use context profile.
2032 AllowRecursiveInline = true;
2033
2034 if (Reader->profileIsPreInlined()) {
2036 UsePreInlinerDecision = true;
2037 }
2038
2039 // Enable stale profile matching by default for probe-based profile.
2040 // Currently the matching relies on if the checksum mismatch is detected,
2041 // which is currently only available for pseudo-probe mode. Removing the
2042 // checksum check could cause regressions for some cases, so further tuning
2043 // might be needed if we want to enable it for all cases.
2044 if (Reader->profileIsProbeBased()) {
2046 SalvageStaleProfile = true;
2048 SalvageUnusedProfile = true;
2049 }
2050
2051 if (!Reader->profileIsCS()) {
2052 // Non-CS profile should be fine without a function size budget for the
2053 // inliner since the contexts in the profile are either all from inlining
2054 // in the prevoius build or pre-computed by the preinliner with a size
2055 // cap, thus they are bounded.
2056 if (!ProfileInlineLimitMin.getNumOccurrences())
2057 ProfileInlineLimitMin = std::numeric_limits<unsigned>::max();
2058 if (!ProfileInlineLimitMax.getNumOccurrences())
2059 ProfileInlineLimitMax = std::numeric_limits<unsigned>::max();
2060 }
2061 }
2062
2063 if (Reader->profileIsCS()) {
2064 // Tracker for profiles under different context
2065 ContextTracker = std::make_unique<SampleContextTracker>(
2066 Reader->getProfiles(), &GUIDToFuncNameMap);
2067 }
2068
2069 // Load pseudo probe descriptors for probe-based function samples.
2070 if (Reader->profileIsProbeBased()) {
2071 ProbeManager = std::make_unique<PseudoProbeManager>(M);
2072 if (!ProbeManager->moduleIsProbed(M)) {
2073 const char *Msg =
2074 "Pseudo-probe-based profile requires SampleProfileProbePass";
2075 Ctx.diagnose(DiagnosticInfoSampleProfile(M.getModuleIdentifier(), Msg,
2076 DS_Warning));
2077 return false;
2078 }
2079 }
2080
2083 MatchingManager = std::make_unique<SampleProfileMatcher>(
2084 M, *Reader, CG, ProbeManager.get(), LTOPhase, SymbolMap, PSL,
2085 FuncNameToProfNameMap);
2086 }
2087
2088 return true;
2089}
2090
2091// Note that this is a module-level check. Even if one module is errored out,
2092// the entire build will be errored out. However, the user could make big
2093// changes to functions in single module but those changes might not be
2094// performance significant to the whole binary. Therefore, to avoid those false
2095// positives, we select a reasonable big set of hot functions that are supposed
2096// to be globally performance significant, only compute and check the mismatch
2097// within those functions. The function selection is based on two criteria:
2098// 1) The function is hot enough, which is tuned by a hotness-based
2099// flag(HotFuncCutoffForStalenessError). 2) The num of function is large enough
2100// which is tuned by the MinfuncsForStalenessError flag.
2101bool SampleProfileLoader::rejectHighStalenessProfile(
2102 Module &M, ProfileSummaryInfo *PSI, const SampleProfileMap &Profiles) {
2104 "Only support for probe-based profile");
2105 uint64_t TotalHotFunc = 0;
2106 uint64_t NumMismatchedFunc = 0;
2107 for (const auto &I : Profiles) {
2108 const auto &FS = I.second;
2109 const auto *FuncDesc = ProbeManager->getDesc(FS.getGUID());
2110 if (!FuncDesc)
2111 continue;
2112
2113 // Use a hotness-based threshold to control the function selection.
2115 FS.getTotalSamples()))
2116 continue;
2117
2118 TotalHotFunc++;
2119 if (ProbeManager->profileIsHashMismatched(*FuncDesc, FS))
2120 NumMismatchedFunc++;
2121 }
2122 // Make sure that the num of selected function is not too small to distinguish
2123 // from the user's benign changes.
2124 if (TotalHotFunc < MinfuncsForStalenessError)
2125 return false;
2126
2127 // Finally check the mismatch percentage against the threshold.
2128 if (NumMismatchedFunc * 100 >=
2129 TotalHotFunc * PrecentMismatchForStalenessError) {
2130 auto &Ctx = M.getContext();
2131 const char *Msg =
2132 "The input profile significantly mismatches current source code. "
2133 "Please recollect profile to avoid performance regression.";
2134 Ctx.diagnose(DiagnosticInfoSampleProfile(M.getModuleIdentifier(), Msg));
2135 return true;
2136 }
2137 return false;
2138}
2139
2140void SampleProfileLoader::removePseudoProbeInstsDiscriminator(Module &M) {
2141 for (auto &F : M) {
2142 std::vector<Instruction *> InstsToDel;
2143 for (auto &BB : F) {
2144 for (auto &I : BB) {
2145 if (isa<PseudoProbeInst>(&I))
2146 InstsToDel.push_back(&I);
2147 else if (isa<CallBase>(&I))
2148 if (const DILocation *DIL = I.getDebugLoc().get()) {
2149 // Restore dwarf discriminator for call.
2150 unsigned Discriminator = DIL->getDiscriminator();
2151 if (DILocation::isPseudoProbeDiscriminator(Discriminator)) {
2152 std::optional<uint32_t> DwarfDiscriminator =
2154 Discriminator);
2155 I.setDebugLoc(
2156 DIL->cloneWithDiscriminator(DwarfDiscriminator.value_or(0)));
2157 }
2158 }
2159 }
2160 }
2161 for (auto *I : InstsToDel)
2162 I->eraseFromParent();
2163 }
2164}
2165
2166bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager &AM,
2167 ProfileSummaryInfo *_PSI) {
2168 GUIDToFuncNameMapper Mapper(M, *Reader, GUIDToFuncNameMap);
2169
2170 PSI = _PSI;
2171 if (M.getProfileSummary(/* IsCS */ false) == nullptr) {
2172 M.setProfileSummary(Reader->getSummary().getMD(M.getContext()),
2174 PSI->refresh();
2175 }
2176
2178 rejectHighStalenessProfile(M, PSI, Reader->getProfiles()))
2179 return false;
2180
2181 auto Remapper = Reader->getRemapper();
2182 // Populate the symbol map.
2183 for (const auto &N_F : M.getValueSymbolTable()) {
2184 StringRef OrigName = N_F.getKey();
2185 Function *F = dyn_cast<Function>(N_F.getValue());
2186 if (F == nullptr || OrigName.empty())
2187 continue;
2188 SymbolMap[FunctionId(OrigName)] = F;
2189 StringRef NewName = FunctionSamples::getCanonicalFnName(*F);
2190 if (OrigName != NewName && !NewName.empty()) {
2191 auto r = SymbolMap.emplace(FunctionId(NewName), F);
2192 // Failiing to insert means there is already an entry in SymbolMap,
2193 // thus there are multiple functions that are mapped to the same
2194 // stripped name. In this case of name conflicting, set the value
2195 // to nullptr to avoid confusion.
2196 if (!r.second)
2197 r.first->second = nullptr;
2198 OrigName = NewName;
2199 }
2200 // Insert the remapped names into SymbolMap.
2201 if (Remapper) {
2202 if (auto MapName = Remapper->lookUpNameInProfile(OrigName)) {
2203 if (*MapName != OrigName && !MapName->empty())
2204 SymbolMap.emplace(FunctionId(*MapName), F);
2205 }
2206 }
2207 }
2208
2209 // Stale profile matching.
2212 MatchingManager->runOnModule();
2213 MatchingManager->clearMatchingData();
2214 }
2215 assert(SymbolMap.count(FunctionId()) == 0 &&
2216 "No empty StringRef should be added in SymbolMap");
2217 assert((SalvageUnusedProfile || FuncNameToProfNameMap.empty()) &&
2218 "FuncNameToProfNameMap is not empty when --salvage-unused-profile is "
2219 "not enabled");
2220
2221 bool retval = false;
2222 for (auto *F : buildFunctionOrder(M, CG)) {
2223 assert(!F->isDeclaration());
2224 clearFunctionData();
2225 retval |= runOnFunction(*F, AM);
2226 }
2227
2228 // Account for cold calls not inlined....
2230 for (const std::pair<Function *, NotInlinedProfileInfo> &pair :
2231 notInlinedCallInfo)
2232 updateProfileCallee(pair.first, pair.second.entryCount);
2233
2236 removePseudoProbeInstsDiscriminator(M);
2237 if (auto *FuncInfo = M.getNamedMetadata(PseudoProbeDescMetadataName))
2238 M.eraseNamedMetadata(FuncInfo);
2239 }
2240
2241 return retval;
2242}
2243
2244bool SampleProfileLoader::runOnFunction(Function &F,
2246 LLVM_DEBUG(dbgs() << "\n\nProcessing Function " << F.getName() << "\n");
2247 DILocation2SampleMap.clear();
2248 // By default the entry count is initialized to -1, which will be treated
2249 // conservatively by getEntryCount as the same as unknown (None). This is
2250 // to avoid newly added code to be treated as cold. If we have samples
2251 // this will be overwritten in emitAnnotations.
2252 uint64_t initialEntryCount = -1;
2253
2254 ProfAccForSymsInList = ProfileAccurateForSymsInList && PSL;
2255 if (ProfileSampleAccurate || F.hasFnAttribute("profile-sample-accurate")) {
2256 // initialize all the function entry counts to 0. It means all the
2257 // functions without profile will be regarded as cold.
2258 initialEntryCount = 0;
2259 // profile-sample-accurate is a user assertion which has a higher precedence
2260 // than symbol list. When profile-sample-accurate is on, ignore symbol list.
2261 ProfAccForSymsInList = false;
2262 }
2263 CoverageTracker.setProfAccForSymsInList(ProfAccForSymsInList);
2264
2265 // PSL -- profile symbol list include all the symbols in sampled binary.
2266 // If ProfileAccurateForSymsInList is enabled, PSL is used to treat
2267 // old functions without samples being cold, without having to worry
2268 // about new and hot functions being mistakenly treated as cold.
2269 if (ProfAccForSymsInList) {
2270 // Initialize the entry count to 0 for functions in the list.
2271 if (PSL->contains(F.getName()))
2272 initialEntryCount = 0;
2273
2274 // Function in the symbol list but without sample will be regarded as
2275 // cold. To minimize the potential negative performance impact it could
2276 // have, we want to be a little conservative here saying if a function
2277 // shows up in the profile, no matter as outline function, inline instance
2278 // or call targets, treat the function as not being cold. This will handle
2279 // the cases such as most callsites of a function are inlined in sampled
2280 // binary but not inlined in current build (because of source code drift,
2281 // imprecise debug information, or the callsites are all cold individually
2282 // but not cold accumulatively...), so the outline function showing up as
2283 // cold in sampled binary will actually not be cold after current build.
2284 StringRef CanonName = FunctionSamples::getCanonicalFnName(F);
2286 GUIDsInProfile.count(
2287 Function::getGUIDAssumingExternalLinkage(CanonName))) ||
2288 (!FunctionSamples::UseMD5 && NamesInProfile.count(CanonName)))
2289 initialEntryCount = -1;
2290 }
2291
2292 // Initialize entry count when the function has no existing entry
2293 // count value.
2294 if (!F.getEntryCount())
2295 F.setEntryCount(ProfileCount(initialEntryCount, Function::PCT_Real));
2296 std::unique_ptr<OptimizationRemarkEmitter> OwnedORE;
2297 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(*F.getParent())
2298 .getManager();
2299 ORE = &FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
2300
2302 Samples = ContextTracker->getBaseSamplesFor(F);
2303 else {
2304 Samples = Reader->getSamplesFor(F);
2305 // Try search in previously inlined functions that were split or duplicated
2306 // into base.
2307 if (!Samples) {
2308 StringRef CanonName = FunctionSamples::getCanonicalFnName(F);
2309 auto It = OutlineFunctionSamples.find(FunctionId(CanonName));
2310 if (It != OutlineFunctionSamples.end()) {
2311 Samples = &It->second;
2312 } else if (auto Remapper = Reader->getRemapper()) {
2313 if (auto RemppedName = Remapper->lookUpNameInProfile(CanonName)) {
2314 It = OutlineFunctionSamples.find(FunctionId(*RemppedName));
2315 if (It != OutlineFunctionSamples.end())
2316 Samples = &It->second;
2317 }
2318 }
2319 }
2320 }
2321
2322 if (Samples && !Samples->empty())
2323 return emitAnnotations(F);
2324 return false;
2325}
2327 std::string File, std::string RemappingFile, ThinOrFullLTOPhase LTOPhase,
2328 IntrusiveRefCntPtr<vfs::FileSystem> FS, bool DisableSampleProfileInlining,
2329 bool UseFlattenedProfile)
2330 : ProfileFileName(File), ProfileRemappingFileName(RemappingFile),
2331 LTOPhase(LTOPhase), FS(std::move(FS)),
2332 DisableSampleProfileInlining(DisableSampleProfileInlining),
2333 UseFlattenedProfile(UseFlattenedProfile) {}
2334
2339
2340 auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & {
2341 return FAM.getResult<AssumptionAnalysis>(F);
2342 };
2343 auto GetTTI = [&](Function &F) -> TargetTransformInfo & {
2344 return FAM.getResult<TargetIRAnalysis>(F);
2345 };
2346 auto GetTLI = [&](Function &F) -> const TargetLibraryInfo & {
2347 return FAM.getResult<TargetLibraryAnalysis>(F);
2348 };
2349
2350 if (!FS)
2353
2354 SampleProfileLoader SampleLoader(
2355 ProfileFileName.empty() ? SampleProfileFile : ProfileFileName,
2356 ProfileRemappingFileName.empty() ? SampleProfileRemappingFile
2357 : ProfileRemappingFileName,
2358 LTOPhase, FS, GetAssumptionCache, GetTTI, GetTLI, CG,
2359 DisableSampleProfileInlining, UseFlattenedProfile);
2360 if (!SampleLoader.doInitialization(M, &FAM))
2361 return PreservedAnalyses::all();
2362
2364 if (!SampleLoader.runOnModule(M, AM, PSI))
2365 return PreservedAnalyses::all();
2366
2367 return PreservedAnalyses::none();
2368}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
static bool runOnFunction(Function &F, bool PostInlining)
Provides ErrorOr<T> smart pointer.
#define DEBUG_TYPE
static cl::opt< unsigned > SizeLimit("eif-limit", cl::init(6), cl::Hidden, cl::desc("Size limit in Hexagon early if-conversion"))
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
static LVReader * CurrentReader
Definition LVReader.cpp:151
Implements a lazy call graph analysis and related passes for the new pass manager.
Legalize the Machine IR a function s Machine IR
Definition Legalizer.cpp:80
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
Machine Check Debug Module
This file implements a map that provides insertion order iteration.
static cl::opt< bool > SalvageStaleProfile("memprof-salvage-stale-profile", cl::desc("Salvage stale MemProf profile"), cl::init(false), cl::Hidden)
static const Function * getCalledFunction(const Value *V)
#define T
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
FunctionAnalysisManager FAM
This file defines the PriorityQueue class.
This file contains the declarations for profiling metadata utility functions.
This builds on the llvm/ADT/GraphTraits.h file to find the strongly connected components (SCCs) of a ...
std::pair< BasicBlock *, BasicBlock * > Edge
This file provides the interface for context-sensitive profile tracker used by CSSPGO.
This file provides the interface for the sampled PGO profile loader base implementation.
This file provides the utility functions for the sampled PGO loader base implementation.
This file provides the interface for SampleProfileMatcher.
This file provides the interface for the pseudo probe implementation for AutoFDO.
static cl::opt< unsigned > MinfuncsForStalenessError("min-functions-for-staleness-error", cl::Hidden, cl::init(50), cl::desc("Skip the check if the number of hot functions is smaller than " "the specified number."))
static cl::opt< unsigned > PrecentMismatchForStalenessError("precent-mismatch-for-staleness-error", cl::Hidden, cl::init(80), cl::desc("Reject the profile if the mismatch percent is higher than the " "given number."))
static cl::opt< bool > RemoveProbeAfterProfileAnnotation("sample-profile-remove-probe", cl::Hidden, cl::init(false), cl::desc("Remove pseudo-probe after sample profile annotation."))
static cl::opt< ReplayInlinerSettings::Fallback > ProfileInlineReplayFallback("sample-profile-inline-replay-fallback", cl::init(ReplayInlinerSettings::Fallback::Original), cl::values(clEnumValN(ReplayInlinerSettings::Fallback::Original, "Original", "All decisions not in replay send to original advisor (default)"), clEnumValN(ReplayInlinerSettings::Fallback::AlwaysInline, "AlwaysInline", "All decisions not in replay are inlined"), clEnumValN(ReplayInlinerSettings::Fallback::NeverInline, "NeverInline", "All decisions not in replay are not inlined")), cl::desc("How sample profile inline replay treats sites that don't come " "from the replay. Original: defers to original advisor, " "AlwaysInline: inline all sites not in replay, NeverInline: " "inline no sites not in replay"), cl::Hidden)
static cl::opt< bool > OverwriteExistingWeights("overwrite-existing-weights", cl::Hidden, cl::init(false), cl::desc("Ignore existing branch weights on IR and always overwrite."))
static void updateIDTMetaData(Instruction &Inst, const SmallVectorImpl< InstrProfValueData > &CallTargets, uint64_t Sum)
Update indirect call target profile metadata for Inst.
static cl::opt< bool > AnnotateSampleProfileInlinePhase("annotate-sample-profile-inline-phase", cl::Hidden, cl::init(false), cl::desc("Annotate LTO phase (prelink / postlink), or main (no LTO) for " "sample-profile inline pass name."))
static cl::opt< std::string > ProfileInlineReplayFile("sample-profile-inline-replay", cl::init(""), cl::value_desc("filename"), cl::desc("Optimization remarks file containing inline remarks to be replayed " "by inlining from sample profile loader."), cl::Hidden)
static cl::opt< bool > CallsitePrioritizedInline("sample-profile-prioritized-inline", cl::Hidden, cl::desc("Use call site prioritized inlining for sample profile loader. " "Currently only CSSPGO is supported."))
static bool doesHistoryAllowICP(const Instruction &Inst, StringRef Candidate)
Check whether the indirect call promotion history of Inst allows the promotion for Candidate.
static SmallVector< InstrProfValueData, 2 > GetSortedValueDataFromCallTargets(const SampleRecord::CallTargetMap &M)
Returns the sorted CallTargetMap M by count in descending order.
#define CSINLINE_DEBUG
static cl::opt< ReplayInlinerSettings::Scope > ProfileInlineReplayScope("sample-profile-inline-replay-scope", cl::init(ReplayInlinerSettings::Scope::Function), cl::values(clEnumValN(ReplayInlinerSettings::Scope::Function, "Function", "Replay on functions that have remarks associated " "with them (default)"), clEnumValN(ReplayInlinerSettings::Scope::Module, "Module", "Replay on the entire module")), cl::desc("Whether inline replay should be applied to the entire " "Module or just the Functions (default) that are present as " "callers in remarks during sample profile inlining."), cl::Hidden)
static cl::opt< unsigned > ProfileICPRelativeHotness("sample-profile-icp-relative-hotness", cl::Hidden, cl::init(25), cl::desc("Relative hotness percentage threshold for indirect " "call promotion in proirity-based sample profile loader inlining."))
static cl::opt< unsigned > ProfileICPRelativeHotnessSkip("sample-profile-icp-relative-hotness-skip", cl::Hidden, cl::init(1), cl::desc("Skip relative hotness check for ICP up to given number of targets."))
static cl::opt< bool > UsePreInlinerDecision("sample-profile-use-preinliner", cl::Hidden, cl::desc("Use the preinliner decisions stored in profile context."))
static cl::opt< bool > AllowRecursiveInline("sample-profile-recursive-inline", cl::Hidden, cl::desc("Allow sample loader inliner to inline recursive calls."))
static cl::opt< CallSiteFormat::Format > ProfileInlineReplayFormat("sample-profile-inline-replay-format", cl::init(CallSiteFormat::Format::LineColumnDiscriminator), cl::values(clEnumValN(CallSiteFormat::Format::Line, "Line", "<Line Number>"), clEnumValN(CallSiteFormat::Format::LineColumn, "LineColumn", "<Line Number>:<Column Number>"), clEnumValN(CallSiteFormat::Format::LineDiscriminator, "LineDiscriminator", "<Line Number>.<Discriminator>"), clEnumValN(CallSiteFormat::Format::LineColumnDiscriminator, "LineColumnDiscriminator", "<Line Number>:<Column Number>.<Discriminator> (default)")), cl::desc("How sample profile inline replay file is formatted"), cl::Hidden)
static cl::opt< unsigned > HotFuncCutoffForStalenessError("hot-func-cutoff-for-staleness-error", cl::Hidden, cl::init(800000), cl::desc("A function is considered hot for staleness error check if its " "total sample count is above the specified percentile"))
This file provides the interface for the sampled PGO loader pass.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
This pass exposes codegen information to IR-level passes.
Defines the virtual file system interface vfs::FileSystem.
Value * RHS
Value * LHS
bool empty() const
Returns true if the analysis manager has an empty results cache.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
static bool isPseudoProbeDiscriminator(unsigned Discriminator)
const DILocation * cloneWithDiscriminator(unsigned Discriminator) const
Returns a new DILocation with updated Discriminator.
A debug info location.
Definition DebugLoc.h:124
LLVM_ABI unsigned getLine() const
Definition DebugLoc.cpp:54
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:237
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:165
Represents either an error or a value T.
Definition ErrorOr.h:56
Class to represent profile counts.
Definition Function.h:297
DISubprogram * getSubprogram() const
Get the attached subprogram.
static LLVM_ABI GUID getGUIDAssumingExternalLinkage(StringRef GlobalName)
Return a 64-bit global unique ID constructed from the name of a global symbol.
Definition Globals.cpp:77
LLVM_ABI bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition Globals.cpp:328
static InlineCost getNever(const char *Reason, std::optional< CostBenefitPair > CostBenefit=std::nullopt)
Definition InlineCost.h:132
static InlineCost getAlways(const char *Reason, std::optional< CostBenefitPair > CostBenefit=std::nullopt)
Definition InlineCost.h:127
static InlineCost get(int Cost, int Threshold, int StaticBonus=0)
Definition InlineCost.h:121
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI bool extractProfTotalWeight(uint64_t &TotalVal) const
Retrieve total raw weight values of a branch.
LLVM_ABI BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
A smart pointer to a reference-counted object that inherits from RefCountedBase or ThreadSafeRefCount...
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
An analysis pass which computes the call graph for a module.
A lazily constructed view of the call graph of a module.
VectorType::iterator erase(typename VectorType::iterator Iterator)
Remove the element given by Iterator.
Definition MapVector.h:175
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition MapVector.h:119
ValueT lookup(const KeyT &Key) const
Definition MapVector.h:103
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PriorityQueue - This class behaves like std::priority_queue and provides a few additional convenience...
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
LLVM_ABI void refresh(std::unique_ptr< ProfileSummary > &&Other=nullptr)
If a summary is provided as argument, use that.
LLVM_ABI bool isHotCountNthPercentile(int PercentileCutoff, uint64_t C) const
Returns true if count C is considered hot with regard to a given hot percentile cutoff value.
void computeDominanceAndLoopInfo(FunctionT &F)
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
LLVM_ABI SampleProfileLoaderPass(std::string File="", std::string RemappingFile="", ThinOrFullLTOPhase LTOPhase=ThinOrFullLTOPhase::None, IntrusiveRefCntPtr< vfs::FileSystem > FS=nullptr, bool DisableSampleProfileInlining=false, bool UseFlattenedProfile=false)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
size_type count(StringRef Key) const
count - Return 1 if the element is in the map, 0 otherwise.
Definition StringMap.h:285
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
constexpr bool empty() const
empty - Check if the string is empty.
Definition StringRef.h:143
std::pair< typename Base::iterator, bool > insert(StringRef key)
Definition StringSet.h:39
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
LLVM Value Representation.
Definition Value.h:75
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
int getNumOccurrences() const
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:202
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition DenseSet.h:180
const ParentTy * getParent() const
Definition ilist_node.h:34
Representation of the samples collected for a function.
Definition SampleProf.h:777
void findInlinedFunctions(DenseSet< GlobalValue::GUID > &S, const HashKeyMap< std::unordered_map, FunctionId, Function * > &SymbolMap, uint64_t Threshold) const
Recursively traverses all children, if the total sample count of the corresponding function is no les...
static LLVM_ABI bool ProfileIsCS
FunctionId getFunction() const
Return the function name.
static LLVM_ABI bool ProfileIsProbeBased
static StringRef getCanonicalFnName(const Function &F)
Return the canonical name for a function, taking into account suffix elision policy attributes.
SampleContext & getContext() const
sampleprof_error merge(const FunctionSamples &Other, uint64_t Weight=1)
Merge the samples in Other into this one.
static LLVM_ABI LineLocation getCallSiteIdentifier(const DILocation *DIL, bool ProfileIsFS=false)
Returns a unique call site identifier for a given debug location of a call instruction.
uint64_t getHeadSamplesEstimate() const
Return an estimate of the sample count of the function entry basic block.
uint64_t getGUID() const
Return the GUID of the context's name.
const BodySampleMap & getBodySamples() const
Return all the samples collected in the body of the function.
static LLVM_ABI bool UseMD5
Whether the profile uses MD5 to represent string.
static void flattenProfile(SampleProfileMap &ProfileMap, bool ProfileIsCS=false)
bool hasAttribute(ContextAttributeMask A)
Definition SampleProf.h:639
static LLVM_ABI ErrorOr< std::unique_ptr< SampleProfileReader > > create(StringRef Filename, LLVMContext &C, vfs::FileSystem &FS, FSDiscriminatorPass P=FSDiscriminatorPass::Base, StringRef RemapFilename="")
Create a sample profile reader appropriate to the file format.
std::unordered_map< FunctionId, uint64_t > CallTargetMap
Definition SampleProf.h:363
static const SortedCallTargetSet sortCallTargets(const CallTargetMap &Targets)
Sort call targets in descending order of call frequency.
Definition SampleProf.h:432
static const CallTargetMap adjustCallTargets(const CallTargetMap &Targets, float DistributionFactor)
Prorate call targets by a distribution factor.
Definition SampleProf.h:441
bool isAtEnd() const
Direct loop termination test which is more efficient than comparison with end().
Changed
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
void checkExpectAnnotations(Instruction &I, const ArrayRef< uint32_t > ExistingWeights, bool IsFrontend)
checkExpectAnnotations - compares PGO counters to the thresholds used for llvm.expect and warns if th...
DiagnosticInfoOptimizationBase::Argument NV
LLVM_ABI CallBase & promoteIndirectCall(CallBase &CB, Function *F, uint64_t Count, uint64_t TotalCount, bool AttachProfToDirectCall, OptimizationRemarkEmitter *ORE)
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
static FunctionId getRepInFormat(StringRef Name)
Get the proper representation of a string according to whether the current Format uses MD5 to represe...
std::map< FunctionId, FunctionSamples > FunctionSamplesMap
Definition SampleProf.h:766
bool callsiteIsHot(const FunctionSamples *CallsiteFS, ProfileSummaryInfo *PSI, bool ProfAccForSymsInList)
Return true if the given callsite is hot wrt to hot cutoff threshold.
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
LLVM_ABI IntrusiveRefCntPtr< FileSystem > getRealFileSystem()
Gets an vfs::FileSystem for the 'real' file system, as seen by the operating system.
This is an optimization pass for GlobalISel generic memory operations.
static bool isIndirectCall(const MachineInstr &MI)
cl::opt< bool > ReportProfileStaleness("report-profile-staleness", cl::Hidden, cl::init(false), cl::desc("Compute and report stale profile statistical metrics."))
cl::opt< bool > PersistProfileStaleness("persist-profile-staleness", cl::Hidden, cl::init(false), cl::desc("Compute stale profile statistical metrics and write it into the " "native object file(.llvm_stats section)."))
LLVM_ABI bool isLegalToPromote(const CallBase &CB, Function *Callee, const char **FailureReason=nullptr)
Return true if the given indirect call site can be made to call Callee.
LLVM_ABI cl::opt< int > ProfileInlineLimitMin
bool succ_empty(const Instruction *I)
Definition CFG.h:256
LLVM_ABI InlineResult InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, bool MergeAttributes=false, AAResults *CalleeAAR=nullptr, bool InsertLifetime=true, Function *ForwardVarArgsTo=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
This function inlines the called function into the basic block of the caller.
InstructionCost Cost
static cl::opt< bool > ProfileAccurateForSymsInList("profile-accurate-for-symsinlist", cl::Hidden, cl::init(true), cl::desc("For symbols in profile symbol list, regard their profiles to " "be accurate. It may be overridden by profile-sample-accurate. "))
static cl::opt< bool > ProfileMergeInlinee("sample-profile-merge-inlinee", cl::Hidden, cl::init(true), cl::desc("Merge past inlinee's profile to outline version if sample " "profile loader decided not to inline a call site. It will " "only be enabled when top-down order of profile loading is " "enabled. "))
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:644
static cl::opt< bool > DisableSampleLoaderInlining("disable-sample-loader-inlining", cl::Hidden, cl::init(false), cl::desc("If true, artificially skip inline transformation in sample-loader " "pass, and merge (or scale) profiles (as configured by " "--sample-profile-merge-inlinee)."))
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2116
scc_iterator< T > scc_begin(const T &G)
Construct the begin iterator for a deduced graph type T.
static cl::opt< bool > UseProfiledCallGraph("use-profiled-call-graph", cl::init(true), cl::Hidden, cl::desc("Process functions in a top-down order " "defined by the profiled call graph when " "-sample-profile-top-down-load is on."))
static cl::opt< bool > ProfileSampleAccurate("profile-sample-accurate", cl::Hidden, cl::init(false), cl::desc("If the sample profile is accurate, we will mark all un-sampled " "callsite and function as having 0 samples. Otherwise, treat " "un-sampled callsites and functions conservatively as unknown. "))
InnerAnalysisManagerProxy< FunctionAnalysisManager, Module > FunctionAnalysisManagerModuleProxy
Provide the FunctionAnalysisManager to Module proxy.
static void buildTopDownFuncOrder(LazyCallGraph &CG, std::vector< Function * > &FunctionOrderList)
LLVM_ABI void setProbeDistributionFactor(Instruction &Inst, float Factor)
LLVM_ABI void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights, bool IsExpected, bool ElideAllZero=false)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
static cl::opt< bool > ProfileSampleBlockAccurate("profile-sample-block-accurate", cl::Hidden, cl::init(false), cl::desc("If the sample profile is accurate, we will mark all un-sampled " "branches and calls as having 0 samples. Otherwise, treat " "them conservatively as unknown. "))
LLVM_ABI std::string AnnotateInlinePassName(InlineContext IC)
ThinOrFullLTOPhase
This enumerates the LLVM full LTO or ThinLTO optimization phases.
Definition Pass.h:77
cl::opt< bool > SampleProfileUseProfi
LLVM_ABI void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIndx, uint32_t MaxMDCount=3)
Get the value profile data for value site SiteIdx from InstrProfR and annotate the instruction Inst w...
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1624
llvm::cl::opt< bool > UseIterativeBFIInference
LLVM_ABI std::optional< PseudoProbe > extractProbe(const Instruction &Inst)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void emitInlinedIntoBasedOnCost(OptimizationRemarkEmitter &ORE, DebugLoc DLoc, const BasicBlock *Block, const Function &Callee, const Function &Caller, const InlineCost &IC, bool ForProfileContext=false, const char *PassName=nullptr)
Emit ORE message based in cost (default heuristic).
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
Function::ProfileCount ProfileCount
static cl::opt< std::string > SampleProfileRemappingFile("sample-profile-remapping-file", cl::init(""), cl::value_desc("filename"), cl::desc("Profile remapping file loaded by -sample-profile"), cl::Hidden)
LLVM_ABI SmallVector< InstrProfValueData, 4 > getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind, uint32_t MaxNumValueData, uint64_t &TotalC, bool GetNoICPValue=false)
Extract the value profile data from Inst and returns them if Inst is annotated with value profile dat...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
std::unique_ptr< InlineAdvisor > getReplayInlineAdvisor(Module &M, FunctionAnalysisManager &FAM, LLVMContext &Context, std::unique_ptr< InlineAdvisor > OriginalAdvisor, const ReplayInlinerSettings &ReplaySettings, bool EmitRemarks, InlineContext IC)
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
LLVM_ABI cl::opt< int > SampleHotCallSiteThreshold
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI InlineCost getInlineCost(CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, function_ref< AssumptionCache &(Function &)> GetAssumptionCache, function_ref< const TargetLibraryInfo &(Function &)> GetTLI, function_ref< BlockFrequencyInfo &(Function &)> GetBFI=nullptr, ProfileSummaryInfo *PSI=nullptr, OptimizationRemarkEmitter *ORE=nullptr, function_ref< EphemeralValuesCache &(Function &)> GetEphValuesCache=nullptr)
Get an InlineCost object representing the cost of inlining this callsite.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:189
LLVM_ABI void updateProfileCallee(Function *Callee, int64_t EntryDelta, const ValueMap< const Value *, WeakTrackingVH > *VMap=nullptr)
Updates profile information by adjusting the entry count by adding EntryDelta then scaling callsite i...
cl::opt< bool > SalvageStaleProfile("salvage-stale-profile", cl::Hidden, cl::init(false), cl::desc("Salvage stale profile by fuzzy matching and use the remapped " "location for sample profile query."))
LLVM_ABI cl::opt< int > SampleColdCallSiteThreshold
LLVM_ABI InlineParams getInlineParams()
Generate the parameters to tune the inline cost analysis based only on the commandline options.
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1847
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:560
static bool skipProfileForFunction(const Function &F)
LLVM_ABI cl::opt< bool > SortProfiledSCC
static cl::opt< std::string > SampleProfileFile("sample-profile-file", cl::init(""), cl::value_desc("filename"), cl::desc("Profile file loaded by -sample-profile"), cl::Hidden)
cl::opt< bool > SalvageUnusedProfile("salvage-unused-profile", cl::Hidden, cl::init(false), cl::desc("Salvage unused profile by matching with new " "functions on call graph."))
static cl::opt< bool > ProfileTopDownLoad("sample-profile-top-down-load", cl::Hidden, cl::init(true), cl::desc("Do profile annotation and inlining for functions in top-down " "order of call graph during sample profile loading. It only " "works for new pass manager. "))
static cl::opt< unsigned > MaxNumPromotions("icp-max-prom", cl::init(3), cl::Hidden, cl::desc("Max number of promotions for a single indirect " "call callsite"))
LLVM_ABI cl::opt< int > ProfileInlineLimitMax
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
cl::opt< bool > EnableExtTspBlockPlacement
const uint64_t NOMORE_ICP_MAGICNUM
Magic number in the value profile metadata showing a target has been promoted for the instruction and...
Definition Metadata.h:59
LLVM_ABI cl::opt< int > ProfileInlineGrowthLimit
static cl::opt< bool > ProfileSizeInline("sample-profile-inline-size", cl::Hidden, cl::init(false), cl::desc("Inline cold call sites in profile loader if it's beneficial " "for code size."))
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
Definition MIRParser.h:39
constexpr const char * PseudoProbeDescMetadataName
Definition PseudoProbe.h:26
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:870
A wrapper of binary function with basic blocks and jumps.
std::optional< bool > AllowRecursiveCall
Indicate whether we allow inlining for recursive call.
Definition InlineCost.h:240
std::optional< bool > ComputeFullInlineCost
Compute inline cost even when the cost has exceeded the threshold.
Definition InlineCost.h:234
static std::optional< uint32_t > extractDwarfBaseDiscriminator(uint32_t Value)
Definition PseudoProbe.h:81