Thanks to visit codestin.com
Credit goes to llvm.org

LLVM 22.0.0git
SampleProfileMatcher.cpp
Go to the documentation of this file.
1//===- SampleProfileMatcher.cpp - Sampling-based Stale Profile Matcher ----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the SampleProfileMatcher used for stale
10// profile matching.
11//
12//===----------------------------------------------------------------------===//
13
17#include "llvm/IR/MDBuilder.h"
20
21using namespace llvm;
22using namespace sampleprof;
23
24#define DEBUG_TYPE "sample-profile-matcher"
25
26namespace llvm {
27
29 "func-profile-similarity-threshold", cl::Hidden, cl::init(80),
30 cl::desc("Consider a profile matches a function if the similarity of their "
31 "callee sequences is above the specified percentile."));
32
34 "min-func-count-for-cg-matching", cl::Hidden, cl::init(5),
35 cl::desc("The minimum number of basic blocks required for a function to "
36 "run stale profile call graph matching."));
37
39 "min-call-count-for-cg-matching", cl::Hidden, cl::init(3),
40 cl::desc("The minimum number of call anchors required for a function to "
41 "run stale profile call graph matching."));
42
44 "load-func-profile-for-cg-matching", cl::Hidden, cl::init(true),
46 "Load top-level profiles that the sample reader initially skipped for "
47 "the call-graph matching (only meaningful for extended binary "
48 "format)"));
49
54
56 "salvage-stale-profile-max-callsites", cl::Hidden, cl::init(UINT_MAX),
57 cl::desc("The maximum number of callsites in a function, above which stale "
58 "profile matching will be skipped."));
59
60} // end namespace llvm
61
62void SampleProfileMatcher::findIRAnchors(const Function &F,
63 AnchorMap &IRAnchors) const {
64 // For inlined code, recover the original callsite and callee by finding the
65 // top-level inline frame. e.g. For frame stack "main:1 @ foo:2 @ bar:3", the
66 // top-level frame is "main:1", the callsite is "1" and the callee is "foo".
67 auto FindTopLevelInlinedCallsite = [](const DILocation *DIL) {
68 assert((DIL && DIL->getInlinedAt()) && "No inlined callsite");
69 const DILocation *PrevDIL = nullptr;
70 do {
71 PrevDIL = DIL;
72 DIL = DIL->getInlinedAt();
73 } while (DIL->getInlinedAt());
74
75 LineLocation Callsite = FunctionSamples::getCallSiteIdentifier(
77 StringRef CalleeName = PrevDIL->getSubprogramLinkageName();
78 return std::make_pair(Callsite, FunctionId(CalleeName));
79 };
80
81 auto GetCanonicalCalleeName = [](const CallBase *CB) {
82 StringRef CalleeName = UnknownIndirectCallee;
83 if (Function *Callee = CB->getCalledFunction())
84 CalleeName = FunctionSamples::getCanonicalFnName(Callee->getName());
85 return CalleeName;
86 };
87
88 // Extract profile matching anchors in the IR.
89 for (auto &BB : F) {
90 for (auto &I : BB) {
91 DILocation *DIL = I.getDebugLoc();
92 if (!DIL)
93 continue;
94
96 if (auto Probe = extractProbe(I)) {
97 // Flatten inlined IR for the matching.
98 if (DIL->getInlinedAt()) {
99 IRAnchors.emplace(FindTopLevelInlinedCallsite(DIL));
100 } else {
101 // Use empty StringRef for basic block probe.
102 StringRef CalleeName;
103 if (const auto *CB = dyn_cast<CallBase>(&I)) {
104 // Skip the probe inst whose callee name is "llvm.pseudoprobe".
105 if (!isa<IntrinsicInst>(&I))
106 CalleeName = GetCanonicalCalleeName(CB);
107 }
108 LineLocation Loc = LineLocation(Probe->Id, 0);
109 IRAnchors.emplace(Loc, FunctionId(CalleeName));
110 }
111 }
112 } else {
113 // TODO: For line-number based profile(AutoFDO), currently only support
114 // find callsite anchors. In future, we need to parse all the non-call
115 // instructions to extract the line locations for profile matching.
117 continue;
118
119 if (DIL->getInlinedAt()) {
120 IRAnchors.emplace(FindTopLevelInlinedCallsite(DIL));
121 } else {
122 LineLocation Callsite = FunctionSamples::getCallSiteIdentifier(
124 StringRef CalleeName = GetCanonicalCalleeName(dyn_cast<CallBase>(&I));
125 IRAnchors.emplace(Callsite, FunctionId(CalleeName));
126 }
127 }
128 }
129 }
130}
131
132void SampleProfileMatcher::findProfileAnchors(const FunctionSamples &FS,
133 AnchorMap &ProfileAnchors) const {
134 auto isInvalidLineOffset = [](uint32_t LineOffset) {
135 return LineOffset & 0x8000;
136 };
137
138 auto InsertAnchor = [](const LineLocation &Loc, const FunctionId &CalleeName,
139 AnchorMap &ProfileAnchors) {
140 auto Ret = ProfileAnchors.try_emplace(Loc, CalleeName);
141 if (!Ret.second) {
142 // For multiple callees, which indicates it's an indirect call, we use a
143 // dummy name(UnknownIndirectCallee) as the indrect callee name.
144 Ret.first->second = FunctionId(UnknownIndirectCallee);
145 }
146 };
147
148 for (const auto &I : FS.getBodySamples()) {
149 const LineLocation &Loc = I.first;
150 if (isInvalidLineOffset(Loc.LineOffset))
151 continue;
152 for (const auto &C : I.second.getCallTargets())
153 InsertAnchor(Loc, C.first, ProfileAnchors);
154 }
155
156 for (const auto &I : FS.getCallsiteSamples()) {
157 const LineLocation &Loc = I.first;
158 if (isInvalidLineOffset(Loc.LineOffset))
159 continue;
160 for (const auto &C : I.second)
161 InsertAnchor(Loc, C.first, ProfileAnchors);
162 }
163}
164
165bool SampleProfileMatcher::functionHasProfile(const FunctionId &IRFuncName,
166 Function *&FuncWithoutProfile) {
167 FuncWithoutProfile = nullptr;
168 auto R = FunctionsWithoutProfile.find(IRFuncName);
169 if (R != FunctionsWithoutProfile.end())
170 FuncWithoutProfile = R->second;
171 return !FuncWithoutProfile;
172}
173
174bool SampleProfileMatcher::isProfileUnused(const FunctionId &ProfileFuncName) {
175 return SymbolMap->find(ProfileFuncName) == SymbolMap->end();
176}
177
178bool SampleProfileMatcher::functionMatchesProfile(
179 const FunctionId &IRFuncName, const FunctionId &ProfileFuncName,
180 bool FindMatchedProfileOnly) {
181 if (IRFuncName == ProfileFuncName)
182 return true;
184 return false;
185
186 // If IR function doesn't have profile and the profile is unused, try
187 // matching them.
188 Function *IRFunc = nullptr;
189 if (functionHasProfile(IRFuncName, IRFunc) ||
190 !isProfileUnused(ProfileFuncName))
191 return false;
192
193 assert(FunctionId(IRFunc->getName()) != ProfileFuncName &&
194 "IR function should be different from profile function to match");
195 return functionMatchesProfile(*IRFunc, ProfileFuncName,
196 FindMatchedProfileOnly);
197}
198
200SampleProfileMatcher::longestCommonSequence(const AnchorList &AnchorList1,
201 const AnchorList &AnchorList2,
202 bool MatchUnusedFunction) {
203 LocToLocMap MatchedAnchors;
205 AnchorList1, AnchorList2,
206 [&](const FunctionId &A, const FunctionId &B) {
207 return functionMatchesProfile(
208 A, B,
209 !MatchUnusedFunction // Find matched function only
210 );
211 },
212 [&](LineLocation A, LineLocation B) {
213 MatchedAnchors.try_emplace(A, B);
214 });
215 return MatchedAnchors;
216}
217
218void SampleProfileMatcher::matchNonCallsiteLocs(
219 const LocToLocMap &MatchedAnchors, const AnchorMap &IRAnchors,
220 LocToLocMap &IRToProfileLocationMap) {
221 auto InsertMatching = [&](const LineLocation &From, const LineLocation &To) {
222 // Skip the unchanged location mapping to save memory.
223 if (From != To)
224 IRToProfileLocationMap.insert({From, To});
225 };
226
227 // Use function's beginning location as the initial anchor.
228 int32_t LocationDelta = 0;
229 SmallVector<LineLocation> LastMatchedNonAnchors;
230 for (const auto &IR : IRAnchors) {
231 const auto &Loc = IR.first;
232 bool IsMatchedAnchor = false;
233 // Match the anchor location in lexical order.
234 auto R = MatchedAnchors.find(Loc);
235 if (R != MatchedAnchors.end()) {
236 const auto &Candidate = R->second;
237 InsertMatching(Loc, Candidate);
238 LLVM_DEBUG(dbgs() << "Callsite with callee:" << IR.second.stringRef()
239 << " is matched from " << Loc << " to " << Candidate
240 << "\n");
241 LocationDelta = Candidate.LineOffset - Loc.LineOffset;
242
243 // Match backwards for non-anchor locations.
244 // The locations in LastMatchedNonAnchors have been matched forwards
245 // based on the previous anchor, spilt it evenly and overwrite the
246 // second half based on the current anchor.
247 for (size_t I = (LastMatchedNonAnchors.size() + 1) / 2;
248 I < LastMatchedNonAnchors.size(); I++) {
249 const auto &L = LastMatchedNonAnchors[I];
250 uint32_t CandidateLineOffset = L.LineOffset + LocationDelta;
251 LineLocation Candidate(CandidateLineOffset, L.Discriminator);
252 InsertMatching(L, Candidate);
253 LLVM_DEBUG(dbgs() << "Location is rematched backwards from " << L
254 << " to " << Candidate << "\n");
255 }
256
257 IsMatchedAnchor = true;
258 LastMatchedNonAnchors.clear();
259 }
260
261 // Match forwards for non-anchor locations.
262 if (!IsMatchedAnchor) {
263 uint32_t CandidateLineOffset = Loc.LineOffset + LocationDelta;
264 LineLocation Candidate(CandidateLineOffset, Loc.Discriminator);
265 InsertMatching(Loc, Candidate);
266 LLVM_DEBUG(dbgs() << "Location is matched from " << Loc << " to "
267 << Candidate << "\n");
268 LastMatchedNonAnchors.emplace_back(Loc);
269 }
270 }
271}
272
273// Filter the non-call locations from IRAnchors and ProfileAnchors and write
274// them into a list for random access later.
275void SampleProfileMatcher::getFilteredAnchorList(
276 const AnchorMap &IRAnchors, const AnchorMap &ProfileAnchors,
277 AnchorList &FilteredIRAnchorsList, AnchorList &FilteredProfileAnchorList) {
278 for (const auto &I : IRAnchors) {
279 if (I.second.stringRef().empty())
280 continue;
281 FilteredIRAnchorsList.emplace_back(I);
282 }
283
284 for (const auto &I : ProfileAnchors)
285 FilteredProfileAnchorList.emplace_back(I);
286}
287
288// Call target name anchor based profile fuzzy matching.
289// Input:
290// For IR locations, the anchor is the callee name of direct callsite; For
291// profile locations, it's the call target name for BodySamples or inlinee's
292// profile name for CallsiteSamples.
293// Matching heuristic:
294// First match all the anchors using the diff algorithm, then split the
295// non-anchor locations between the two anchors evenly, first half are matched
296// based on the start anchor, second half are matched based on the end anchor.
297// For example, given:
298// IR locations: [1, 2(foo), 3, 5, 6(bar), 7]
299// Profile locations: [1, 2, 3(foo), 4, 7, 8(bar), 9]
300// The matching gives:
301// [1, 2(foo), 3, 5, 6(bar), 7]
302// | | | | | |
303// [1, 2, 3(foo), 4, 7, 8(bar), 9]
304// The output mapping: [2->3, 3->4, 5->7, 6->8, 7->9].
305void SampleProfileMatcher::runStaleProfileMatching(
306 const Function &F, const AnchorMap &IRAnchors,
307 const AnchorMap &ProfileAnchors, LocToLocMap &IRToProfileLocationMap,
308 bool RunCFGMatching, bool RunCGMatching) {
309 if (!RunCFGMatching && !RunCGMatching)
310 return;
311 LLVM_DEBUG(dbgs() << "Run stale profile matching for " << F.getName()
312 << "\n");
313 assert(IRToProfileLocationMap.empty() &&
314 "Run stale profile matching only once per function");
315
316 AnchorList FilteredProfileAnchorList;
317 AnchorList FilteredIRAnchorsList;
318 getFilteredAnchorList(IRAnchors, ProfileAnchors, FilteredIRAnchorsList,
319 FilteredProfileAnchorList);
320
321 if (FilteredIRAnchorsList.empty() || FilteredProfileAnchorList.empty())
322 return;
323
324 if (FilteredIRAnchorsList.size() > SalvageStaleProfileMaxCallsites ||
325 FilteredProfileAnchorList.size() > SalvageStaleProfileMaxCallsites) {
326 LLVM_DEBUG(dbgs() << "Skip stale profile matching for " << F.getName()
327 << " because the number of callsites in the IR is "
328 << FilteredIRAnchorsList.size()
329 << " and in the profile is "
330 << FilteredProfileAnchorList.size() << "\n");
331 return;
332 }
333
334 // Match the callsite anchors by finding the longest common subsequence
335 // between IR and profile.
336 // Define a match between two anchors as follows:
337 // 1) The function names of anchors are the same.
338 // 2) The similarity between the anchor functions is above a threshold if
339 // RunCGMatching is set.
340 // For 2), we only consider the anchor functions from IR and profile don't
341 // appear on either side to reduce the matching scope. Note that we need to
342 // use IR anchor as base(A side) to align with the order of
343 // IRToProfileLocationMap.
344 LocToLocMap MatchedAnchors =
345 longestCommonSequence(FilteredIRAnchorsList, FilteredProfileAnchorList,
346 RunCGMatching /* Match unused functions */);
347
348 // CFG level matching:
349 // Apply the callsite matchings to infer matching for the basic
350 // block(non-callsite) locations and write the result to
351 // IRToProfileLocationMap.
352 if (RunCFGMatching)
353 matchNonCallsiteLocs(MatchedAnchors, IRAnchors, IRToProfileLocationMap);
354}
355
356void SampleProfileMatcher::runOnFunction(Function &F) {
357 // We need to use flattened function samples for matching.
358 // Unlike IR, which includes all callsites from the source code, the callsites
359 // in profile only show up when they are hit by samples, i,e. the profile
360 // callsites in one context may differ from those in another context. To get
361 // the maximum number of callsites, we merge the function profiles from all
362 // contexts, aka, the flattened profile to find profile anchors.
363 const auto *FSForMatching = getFlattenedSamplesFor(F);
364 if (SalvageUnusedProfile && !FSForMatching) {
365 // Apply the matching in place to find the new function's matched profile.
366 auto R = FuncToProfileNameMap.find(&F);
367 if (R != FuncToProfileNameMap.end()) {
368 FSForMatching = getFlattenedSamplesFor(R->second);
369 // Try to find the salvaged top-level profiles that are explicitly loaded
370 // for the matching, see "functionMatchesProfileHelper" for the details.
371 if (!FSForMatching && LoadFuncProfileforCGMatching)
372 FSForMatching = Reader.getSamplesFor(R->second.stringRef());
373 }
374 }
375 if (!FSForMatching)
376 return;
377
378 // Anchors for IR. It's a map from IR location to callee name, callee name is
379 // empty for non-call instruction and use a dummy name(UnknownIndirectCallee)
380 // for unknown indrect callee name.
381 AnchorMap IRAnchors;
382 findIRAnchors(F, IRAnchors);
383 // Anchors for profile. It's a map from callsite location to a set of callee
384 // name.
385 AnchorMap ProfileAnchors;
386 findProfileAnchors(*FSForMatching, ProfileAnchors);
387
388 // Compute the callsite match states for profile staleness report.
390 recordCallsiteMatchStates(F, IRAnchors, ProfileAnchors, nullptr);
391
393 return;
394 // For probe-based profiles, run matching only when profile checksum is
395 // mismatched.
396 bool ChecksumMismatch = FunctionSamples::ProfileIsProbeBased &&
397 !ProbeManager->profileIsValid(F, *FSForMatching);
398 bool RunCFGMatching =
399 !FunctionSamples::ProfileIsProbeBased || ChecksumMismatch;
400 bool RunCGMatching = SalvageUnusedProfile;
401 // For imported functions, the checksum metadata(pseudo_probe_desc) are
402 // dropped, so we leverage function attribute(profile-checksum-mismatch) to
403 // transfer the info: add the attribute during pre-link phase and check it
404 // during post-link phase(see "profileIsValid").
405 if (ChecksumMismatch && LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink)
406 F.addFnAttr("profile-checksum-mismatch");
407
408 // The matching result will be saved to IRToProfileLocationMap, create a
409 // new map for each function.
410 auto &IRToProfileLocationMap = getIRToProfileLocationMap(F);
411 runStaleProfileMatching(F, IRAnchors, ProfileAnchors, IRToProfileLocationMap,
412 RunCFGMatching, RunCGMatching);
413 // Find and update callsite match states after matching.
414 if (RunCFGMatching && (ReportProfileStaleness || PersistProfileStaleness))
415 recordCallsiteMatchStates(F, IRAnchors, ProfileAnchors,
416 &IRToProfileLocationMap);
417}
418
419void SampleProfileMatcher::recordCallsiteMatchStates(
420 const Function &F, const AnchorMap &IRAnchors,
421 const AnchorMap &ProfileAnchors,
422 const LocToLocMap *IRToProfileLocationMap) {
423 bool IsPostMatch = IRToProfileLocationMap != nullptr;
424 auto &CallsiteMatchStates =
425 FuncCallsiteMatchStates[FunctionSamples::getCanonicalFnName(F.getName())];
426
427 auto MapIRLocToProfileLoc = [&](const LineLocation &IRLoc) {
428 // IRToProfileLocationMap is null in pre-match phrase.
429 if (!IRToProfileLocationMap)
430 return IRLoc;
431 const auto &ProfileLoc = IRToProfileLocationMap->find(IRLoc);
432 if (ProfileLoc != IRToProfileLocationMap->end())
433 return ProfileLoc->second;
434 else
435 return IRLoc;
436 };
437
438 for (const auto &I : IRAnchors) {
439 // After fuzzy profile matching, use the matching result to remap the
440 // current IR callsite.
441 const auto &ProfileLoc = MapIRLocToProfileLoc(I.first);
442 const auto &IRCalleeId = I.second;
443 const auto &It = ProfileAnchors.find(ProfileLoc);
444 if (It == ProfileAnchors.end())
445 continue;
446 const auto &ProfCalleeId = It->second;
447 if (IRCalleeId == ProfCalleeId) {
448 auto It = CallsiteMatchStates.find(ProfileLoc);
449 if (It == CallsiteMatchStates.end())
450 CallsiteMatchStates.emplace(ProfileLoc, MatchState::InitialMatch);
451 else if (IsPostMatch) {
452 if (It->second == MatchState::InitialMatch)
453 It->second = MatchState::UnchangedMatch;
454 else if (It->second == MatchState::InitialMismatch)
455 It->second = MatchState::RecoveredMismatch;
456 }
457 }
458 }
459
460 // Check if there are any callsites in the profile that does not match to any
461 // IR callsites.
462 for (const auto &I : ProfileAnchors) {
463 const auto &Loc = I.first;
464 assert(!I.second.stringRef().empty() && "Callees should not be empty");
465 auto It = CallsiteMatchStates.find(Loc);
466 if (It == CallsiteMatchStates.end())
467 CallsiteMatchStates.emplace(Loc, MatchState::InitialMismatch);
468 else if (IsPostMatch) {
469 // Update the state if it's not matched(UnchangedMatch or
470 // RecoveredMismatch).
471 if (It->second == MatchState::InitialMismatch)
472 It->second = MatchState::UnchangedMismatch;
473 else if (It->second == MatchState::InitialMatch)
474 It->second = MatchState::RemovedMatch;
475 }
476 }
477}
478
479void SampleProfileMatcher::countMismatchedFuncSamples(const FunctionSamples &FS,
480 bool IsTopLevel) {
481 const auto *FuncDesc = ProbeManager->getDesc(FS.getGUID());
482 // Skip the function that is external or renamed.
483 if (!FuncDesc)
484 return;
485
486 if (ProbeManager->profileIsHashMismatched(*FuncDesc, FS)) {
487 if (IsTopLevel)
488 NumStaleProfileFunc++;
489 // Given currently all probe ids are after block probe ids, once the
490 // checksum is mismatched, it's likely all the callites are mismatched and
491 // dropped. We conservatively count all the samples as mismatched and stop
492 // counting the inlinees' profiles.
493 MismatchedFunctionSamples += FS.getTotalSamples();
494 return;
495 }
496
497 // Even the current-level function checksum is matched, it's possible that the
498 // nested inlinees' checksums are mismatched that affect the inlinee's sample
499 // loading, we need to go deeper to check the inlinees' function samples.
500 // Similarly, count all the samples as mismatched if the inlinee's checksum is
501 // mismatched using this recursive function.
502 for (const auto &I : FS.getCallsiteSamples())
503 for (const auto &CS : I.second)
504 countMismatchedFuncSamples(CS.second, false);
505}
506
507void SampleProfileMatcher::countMismatchedCallsiteSamples(
508 const FunctionSamples &FS) {
509 auto It = FuncCallsiteMatchStates.find(FS.getFuncName());
510 // Skip it if no mismatched callsite or this is an external function.
511 if (It == FuncCallsiteMatchStates.end() || It->second.empty())
512 return;
513 const auto &CallsiteMatchStates = It->second;
514
515 auto findMatchState = [&](const LineLocation &Loc) {
516 auto It = CallsiteMatchStates.find(Loc);
517 if (It == CallsiteMatchStates.end())
518 return MatchState::Unknown;
519 return It->second;
520 };
521
522 auto AttributeMismatchedSamples = [&](const enum MatchState &State,
523 uint64_t Samples) {
524 if (isMismatchState(State))
525 MismatchedCallsiteSamples += Samples;
526 else if (State == MatchState::RecoveredMismatch)
527 RecoveredCallsiteSamples += Samples;
528 };
529
530 // The non-inlined callsites are saved in the body samples of function
531 // profile, go through it to count the non-inlined callsite samples.
532 for (const auto &I : FS.getBodySamples())
533 AttributeMismatchedSamples(findMatchState(I.first), I.second.getSamples());
534
535 // Count the inlined callsite samples.
536 for (const auto &I : FS.getCallsiteSamples()) {
537 auto State = findMatchState(I.first);
538 uint64_t CallsiteSamples = 0;
539 for (const auto &CS : I.second)
540 CallsiteSamples += CS.second.getTotalSamples();
541 AttributeMismatchedSamples(State, CallsiteSamples);
542
543 if (isMismatchState(State))
544 continue;
545
546 // When the current level of inlined call site matches the profiled call
547 // site, we need to go deeper along the inline tree to count mismatches from
548 // lower level inlinees.
549 for (const auto &CS : I.second)
550 countMismatchedCallsiteSamples(CS.second);
551 }
552}
553
554void SampleProfileMatcher::countMismatchCallsites(const FunctionSamples &FS) {
555 auto It = FuncCallsiteMatchStates.find(FS.getFuncName());
556 // Skip it if no mismatched callsite or this is an external function.
557 if (It == FuncCallsiteMatchStates.end() || It->second.empty())
558 return;
559 const auto &MatchStates = It->second;
560 [[maybe_unused]] bool OnInitialState =
561 isInitialState(MatchStates.begin()->second);
562 for (const auto &I : MatchStates) {
563 TotalProfiledCallsites++;
564 assert(
565 (OnInitialState ? isInitialState(I.second) : isFinalState(I.second)) &&
566 "Profile matching state is inconsistent");
567
568 if (isMismatchState(I.second))
569 NumMismatchedCallsites++;
570 else if (I.second == MatchState::RecoveredMismatch)
571 NumRecoveredCallsites++;
572 }
573}
574
575void SampleProfileMatcher::countCallGraphRecoveredSamples(
576 const FunctionSamples &FS,
577 std::unordered_set<FunctionId> &CallGraphRecoveredProfiles) {
578 if (CallGraphRecoveredProfiles.count(FS.getFunction())) {
579 NumCallGraphRecoveredFuncSamples += FS.getTotalSamples();
580 return;
581 }
582
583 for (const auto &CM : FS.getCallsiteSamples()) {
584 for (const auto &CS : CM.second) {
585 countCallGraphRecoveredSamples(CS.second, CallGraphRecoveredProfiles);
586 }
587 }
588}
589
590void SampleProfileMatcher::computeAndReportProfileStaleness() {
592 return;
593
594 std::unordered_set<FunctionId> CallGraphRecoveredProfiles;
596 for (const auto &I : FuncToProfileNameMap) {
597 CallGraphRecoveredProfiles.insert(I.second);
598 if (GlobalValue::isAvailableExternallyLinkage(I.first->getLinkage()))
599 continue;
600 NumCallGraphRecoveredProfiledFunc++;
601 }
602 }
603
604 // Count profile mismatches for profile staleness report.
605 for (const auto &F : M) {
607 continue;
608 // As the stats will be merged by linker, skip reporting the metrics for
609 // imported functions to avoid repeated counting.
611 continue;
612 const auto *FS = Reader.getSamplesFor(F);
613 if (!FS)
614 continue;
615 TotalProfiledFunc++;
616 TotalFunctionSamples += FS->getTotalSamples();
617
618 if (SalvageUnusedProfile && !CallGraphRecoveredProfiles.empty())
619 countCallGraphRecoveredSamples(*FS, CallGraphRecoveredProfiles);
620
621 // Checksum mismatch is only used in pseudo-probe mode.
623 countMismatchedFuncSamples(*FS, true);
624
625 // Count mismatches and samples for calliste.
626 countMismatchCallsites(*FS);
627 countMismatchedCallsiteSamples(*FS);
628 }
629
632 errs() << "(" << NumStaleProfileFunc << "/" << TotalProfiledFunc
633 << ") of functions' profile are invalid and ("
634 << MismatchedFunctionSamples << "/" << TotalFunctionSamples
635 << ") of samples are discarded due to function hash mismatch.\n";
636 }
638 errs() << "(" << NumCallGraphRecoveredProfiledFunc << "/"
639 << TotalProfiledFunc << ") of functions' profile are matched and ("
640 << NumCallGraphRecoveredFuncSamples << "/" << TotalFunctionSamples
641 << ") of samples are reused by call graph matching.\n";
642 }
643
644 errs() << "(" << (NumMismatchedCallsites + NumRecoveredCallsites) << "/"
645 << TotalProfiledCallsites
646 << ") of callsites' profile are invalid and ("
647 << (MismatchedCallsiteSamples + RecoveredCallsiteSamples) << "/"
648 << TotalFunctionSamples
649 << ") of samples are discarded due to callsite location mismatch.\n";
650 errs() << "(" << NumRecoveredCallsites << "/"
651 << (NumRecoveredCallsites + NumMismatchedCallsites)
652 << ") of callsites and (" << RecoveredCallsiteSamples << "/"
653 << (RecoveredCallsiteSamples + MismatchedCallsiteSamples)
654 << ") of samples are recovered by stale profile matching.\n";
655 }
656
658 LLVMContext &Ctx = M.getContext();
659 MDBuilder MDB(Ctx);
660
663 ProfStatsVec.emplace_back("NumStaleProfileFunc", NumStaleProfileFunc);
664 ProfStatsVec.emplace_back("TotalProfiledFunc", TotalProfiledFunc);
665 ProfStatsVec.emplace_back("MismatchedFunctionSamples",
666 MismatchedFunctionSamples);
667 ProfStatsVec.emplace_back("TotalFunctionSamples", TotalFunctionSamples);
668 }
669
671 ProfStatsVec.emplace_back("NumCallGraphRecoveredProfiledFunc",
672 NumCallGraphRecoveredProfiledFunc);
673 ProfStatsVec.emplace_back("NumCallGraphRecoveredFuncSamples",
674 NumCallGraphRecoveredFuncSamples);
675 }
676
677 ProfStatsVec.emplace_back("NumMismatchedCallsites", NumMismatchedCallsites);
678 ProfStatsVec.emplace_back("NumRecoveredCallsites", NumRecoveredCallsites);
679 ProfStatsVec.emplace_back("TotalProfiledCallsites", TotalProfiledCallsites);
680 ProfStatsVec.emplace_back("MismatchedCallsiteSamples",
681 MismatchedCallsiteSamples);
682 ProfStatsVec.emplace_back("RecoveredCallsiteSamples",
683 RecoveredCallsiteSamples);
684
685 auto *MD = MDB.createLLVMStats(ProfStatsVec);
686 auto *NMD = M.getOrInsertNamedMetadata("llvm.stats");
687 NMD->addOperand(MD);
688 }
689}
690
691void SampleProfileMatcher::findFunctionsWithoutProfile() {
692 // TODO: Support MD5 profile.
694 return;
695 StringSet<> NamesInProfile;
696 if (auto NameTable = Reader.getNameTable()) {
697 for (auto Name : *NameTable)
698 NamesInProfile.insert(Name.stringRef());
699 }
700
701 for (auto &F : M) {
702 // Skip declarations, as even if the function can be matched, we have
703 // nothing to do with it.
704 if (F.isDeclaration())
705 continue;
706
707 StringRef CanonFName = FunctionSamples::getCanonicalFnName(F.getName());
708 const auto *FS = getFlattenedSamplesFor(F);
709 if (FS)
710 continue;
711
712 // For extended binary, functions fully inlined may not be loaded in the
713 // top-level profile, so check the NameTable which has the all symbol names
714 // in profile.
715 if (NamesInProfile.count(CanonFName))
716 continue;
717
718 // For extended binary, non-profiled function symbols are in the profile
719 // symbol list table.
720 if (PSL && PSL->contains(CanonFName))
721 continue;
722
723 LLVM_DEBUG(dbgs() << "Function " << CanonFName
724 << " is not in profile or profile symbol list.\n");
725 FunctionsWithoutProfile[FunctionId(CanonFName)] = &F;
726 }
727}
728
729bool SampleProfileMatcher::functionMatchesProfileHelper(
730 const Function &IRFunc, const FunctionId &ProfFunc) {
731 // The value is in the range [0, 1]. The bigger the value is, the more similar
732 // two sequences are.
733 float Similarity = 0.0;
734
735 // Match the functions if they have the same base name(after demangling) and
736 // skip the similarity check.
737 ItaniumPartialDemangler Demangler;
738 // Helper lambda to demangle and get the base name. If the demangling failed,
739 // return an empty string.
740 auto GetBaseName = [&](StringRef FName) {
741 auto FunctionName = FName.str();
742 if (Demangler.partialDemangle(FunctionName.c_str()))
743 return std::string();
744 size_t BaseNameSize = 0;
745 // The demangler API follows the __cxa_demangle one, and thus needs a
746 // pointer that originates from malloc (or nullptr) and the caller is
747 // responsible for free()-ing the buffer.
748 char *BaseNamePtr = Demangler.getFunctionBaseName(nullptr, &BaseNameSize);
749 std::string Result = (BaseNamePtr && BaseNameSize)
750 ? std::string(BaseNamePtr, BaseNameSize)
751 : std::string();
752 free(BaseNamePtr);
753 return Result;
754 };
755 auto IRBaseName = GetBaseName(IRFunc.getName());
756 auto ProfBaseName = GetBaseName(ProfFunc.stringRef());
757 if (!IRBaseName.empty() && IRBaseName == ProfBaseName) {
758 LLVM_DEBUG(dbgs() << "The functions " << IRFunc.getName() << "(IR) and "
759 << ProfFunc << "(Profile) share the same base name: "
760 << IRBaseName << ".\n");
761 return true;
762 }
763
764 const auto *FSForMatching = getFlattenedSamplesFor(ProfFunc);
765 // With extbinary profile format, initial profile loading only reads profile
766 // based on current function names in the module.
767 // However, if a function is renamed, sample loader skips to load its original
768 // profile(which has a different name), we will miss this case. To address
769 // this, we load the top-level profile candidate explicitly for the matching.
770 if (!FSForMatching && LoadFuncProfileforCGMatching) {
771 DenseSet<StringRef> TopLevelFunc({ProfFunc.stringRef()});
772 if (std::error_code EC = Reader.read(TopLevelFunc))
773 return false;
774 FSForMatching = Reader.getSamplesFor(ProfFunc.stringRef());
775 LLVM_DEBUG({
776 if (FSForMatching)
777 dbgs() << "Read top-level function " << ProfFunc
778 << " for call-graph matching\n";
779 });
780 }
781 if (!FSForMatching)
782 return false;
783 // The check for similarity or checksum may not be reliable if the function is
784 // tiny, we use the number of basic block as a proxy for the function
785 // complexity and skip the matching if it's too small.
786 if (IRFunc.size() < MinFuncCountForCGMatching ||
787 FSForMatching->getBodySamples().size() < MinFuncCountForCGMatching)
788 return false;
789
790 // For probe-based function, we first trust the checksum info. If the checksum
791 // doesn't match, we continue checking for similarity.
793 const auto *FuncDesc = ProbeManager->getDesc(IRFunc);
794 if (FuncDesc &&
795 !ProbeManager->profileIsHashMismatched(*FuncDesc, *FSForMatching)) {
796 LLVM_DEBUG(dbgs() << "The checksums for " << IRFunc.getName()
797 << "(IR) and " << ProfFunc << "(Profile) match.\n");
798
799 return true;
800 }
801 }
802
803 AnchorMap IRAnchors;
804 findIRAnchors(IRFunc, IRAnchors);
805 AnchorMap ProfileAnchors;
806 findProfileAnchors(*FSForMatching, ProfileAnchors);
807
808 AnchorList FilteredIRAnchorsList;
809 AnchorList FilteredProfileAnchorList;
810 getFilteredAnchorList(IRAnchors, ProfileAnchors, FilteredIRAnchorsList,
811 FilteredProfileAnchorList);
812
813 // Similarly skip the matching if the num of anchors is not enough.
814 if (FilteredIRAnchorsList.size() < MinCallCountForCGMatching ||
815 FilteredProfileAnchorList.size() < MinCallCountForCGMatching)
816 return false;
817
818 // Use the diff algorithm to find the LCS between IR and profile.
819
820 // Don't recursively match the callee function to avoid infinite matching,
821 // callee functions will be handled later since it's processed in top-down
822 // order .
823 LocToLocMap MatchedAnchors =
824 longestCommonSequence(FilteredIRAnchorsList, FilteredProfileAnchorList,
825 false /* Match unused functions */);
826
827 Similarity = static_cast<float>(MatchedAnchors.size()) /
828 FilteredProfileAnchorList.size();
829
830 LLVM_DEBUG(dbgs() << "The similarity between " << IRFunc.getName()
831 << "(IR) and " << ProfFunc << "(profile) is "
832 << format("%.2f", Similarity) << "\n");
833 assert((Similarity >= 0 && Similarity <= 1.0) &&
834 "Similarity value should be in [0, 1]");
835 return Similarity * 100 > FuncProfileSimilarityThreshold;
836}
837
838// If FindMatchedProfileOnly is set to true, only use the processed function
839// results. This is used for skipping the repeated recursive matching.
840bool SampleProfileMatcher::functionMatchesProfile(Function &IRFunc,
841 const FunctionId &ProfFunc,
842 bool FindMatchedProfileOnly) {
843 auto R = FuncProfileMatchCache.find({&IRFunc, ProfFunc});
844 if (R != FuncProfileMatchCache.end())
845 return R->second;
846
847 if (FindMatchedProfileOnly)
848 return false;
849
850 bool Matched = functionMatchesProfileHelper(IRFunc, ProfFunc);
851 FuncProfileMatchCache[{&IRFunc, ProfFunc}] = Matched;
852 if (Matched) {
853 FuncToProfileNameMap[&IRFunc] = ProfFunc;
854 LLVM_DEBUG(dbgs() << "Function:" << IRFunc.getName()
855 << " matches profile:" << ProfFunc << "\n");
856 }
857
858 return Matched;
859}
860
861void SampleProfileMatcher::UpdateWithSalvagedProfiles() {
862 DenseSet<StringRef> ProfileSalvagedFuncs;
863 // Update FuncNameToProfNameMap and SymbolMap.
864 for (auto &I : FuncToProfileNameMap) {
865 assert(I.first && "New function is null");
866 FunctionId FuncName(I.first->getName());
867 ProfileSalvagedFuncs.insert(I.second.stringRef());
868 FuncNameToProfNameMap->emplace(FuncName, I.second);
869
870 // We need to remove the old entry to avoid duplicating the function
871 // processing.
872 SymbolMap->erase(FuncName);
873 SymbolMap->emplace(I.second, I.first);
874 }
875
876 // With extbinary profile format, initial profile loading only reads profile
877 // based on current function names in the module, so we need to load top-level
878 // profiles for functions with different profile name explicitly after
879 // function-profile name map is established with stale profile matching.
880 Reader.read(ProfileSalvagedFuncs);
881 Reader.setFuncNameToProfNameMap(*FuncNameToProfNameMap);
882}
883
885 ProfileConverter::flattenProfile(Reader.getProfiles(), FlattenedProfiles,
888 findFunctionsWithoutProfile();
889
890 // Process the matching in top-down order so that the caller matching result
891 // can be used to the callee matching.
892 std::vector<Function *> TopDownFunctionList;
893 TopDownFunctionList.reserve(M.size());
894 buildTopDownFuncOrder(CG, TopDownFunctionList);
895 for (auto *F : TopDownFunctionList) {
897 continue;
898 runOnFunction(*F);
899 }
900
902 UpdateWithSalvagedProfiles();
903
905 distributeIRToProfileLocationMap();
906
907 computeAndReportProfileStaleness();
908}
909
910void SampleProfileMatcher::distributeIRToProfileLocationMap(
911 FunctionSamples &FS) {
912 const auto ProfileMappings = FuncMappings.find(FS.getFuncName());
913 if (ProfileMappings != FuncMappings.end()) {
914 FS.setIRToProfileLocationMap(&(ProfileMappings->second));
915 }
916
917 for (auto &Callees :
918 const_cast<CallsiteSampleMap &>(FS.getCallsiteSamples())) {
919 for (auto &FS : Callees.second) {
920 distributeIRToProfileLocationMap(FS.second);
921 }
922 }
923}
924
925// Use a central place to distribute the matching results. Outlined and inlined
926// profile with the function name will be set to the same pointer.
927void SampleProfileMatcher::distributeIRToProfileLocationMap() {
928 for (auto &I : Reader.getProfiles()) {
929 distributeIRToProfileLocationMap(I.second);
930 }
931}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
itanium_demangle::ManglingParser< DefaultAllocator > Demangler
Legalize the Machine IR a function s Machine IR
Definition Legalizer.cpp:80
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
This file provides the interface for SampleProfileMatcher.
#define LLVM_DEBUG(...)
Definition Debug.h:114
size_t size() const
Definition Function.h:856
iterator end()
Definition Function.h:853
static bool isAvailableExternallyLinkage(LinkageTypes Linkage)
reference emplace_back(ArgTypes &&... Args)
iterator end()
Definition StringMap.h:224
iterator find(StringRef Key)
Definition StringMap.h:237
size_type count(StringRef Key) const
count - Return 1 if the element is in the map, 0 otherwise.
Definition StringMap.h:285
std::pair< typename Base::iterator, bool > insert(StringRef key)
Definition StringSet.h:39
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:202
This class represents a function that is read from a sample profile.
Definition FunctionId.h:36
StringRef stringRef() const
Convert to StringRef.
Definition FunctionId.h:108
Representation of the samples collected for a function.
Definition SampleProf.h:777
static LLVM_ABI bool ProfileIsCS
static LLVM_ABI bool ProfileIsProbeBased
static StringRef getCanonicalFnName(const Function &F)
Return the canonical name for a function, taking into account suffix elision policy attributes.
static LLVM_ABI bool ProfileIsFS
If this profile uses flow sensitive discriminators.
static LLVM_ABI LineLocation getCallSiteIdentifier(const DILocation *DIL, bool ProfileIsFS=false)
Returns a unique call site identifier for a given debug location of a call instruction.
static LLVM_ABI bool UseMD5
Whether the profile uses MD5 to represent string.
static void flattenProfile(SampleProfileMap &ProfileMap, bool ProfileIsCS=false)
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
initializer< Ty > init(const Ty &Val)
std::map< LineLocation, FunctionSamplesMap > CallsiteSampleMap
Definition SampleProf.h:767
std::unordered_map< LineLocation, LineLocation, LineLocationHash > LocToLocMap
Definition SampleProf.h:769
This is an optimization pass for GlobalISel generic memory operations.
cl::opt< bool > ReportProfileStaleness("report-profile-staleness", cl::Hidden, cl::init(false), cl::desc("Compute and report stale profile statistical metrics."))
cl::opt< bool > PersistProfileStaleness("persist-profile-staleness", cl::Hidden, cl::init(false), cl::desc("Compute stale profile statistical metrics and write it into the " "native object file(.llvm_stats section)."))
std::map< LineLocation, FunctionId > AnchorMap
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:644
static cl::opt< bool > LoadFuncProfileforCGMatching("load-func-profile-for-cg-matching", cl::Hidden, cl::init(true), cl::desc("Load top-level profiles that the sample reader initially skipped for " "the call-graph matching (only meaningful for extended binary " "format)"))
static void buildTopDownFuncOrder(LazyCallGraph &CG, std::vector< Function * > &FunctionOrderList)
@ ThinLTOPreLink
ThinLTO prelink (summary) phase.
Definition Pass.h:81
static cl::opt< unsigned > MinCallCountForCGMatching("min-call-count-for-cg-matching", cl::Hidden, cl::init(3), cl::desc("The minimum number of call anchors required for a function to " "run stale profile call graph matching."))
LLVM_ABI std::optional< PseudoProbe > extractProbe(const Instruction &Inst)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
static cl::opt< unsigned > MinFuncCountForCGMatching("min-func-count-for-cg-matching", cl::Hidden, cl::init(5), cl::desc("The minimum number of basic blocks required for a function to " "run stale profile call graph matching."))
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition Format.h:118
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
cl::opt< bool > SalvageStaleProfile("salvage-stale-profile", cl::Hidden, cl::init(false), cl::desc("Salvage stale profile by fuzzy matching and use the remapped " "location for sample profile query."))
void longestCommonSequence(AnchorList AnchorList1, AnchorList AnchorList2, llvm::function_ref< bool(const Function &, const Function &)> FunctionMatchesProfile, llvm::function_ref< void(Loc, Loc)> InsertMatching)
std::vector< std::pair< LineLocation, FunctionId > > AnchorList
static bool skipProfileForFunction(const Function &F)
cl::opt< bool > SalvageUnusedProfile("salvage-unused-profile", cl::Hidden, cl::init(false), cl::desc("Salvage unused profile by matching with new " "functions on call graph."))
static cl::opt< unsigned > SalvageStaleProfileMaxCallsites("salvage-stale-profile-max-callsites", cl::Hidden, cl::init(UINT_MAX), cl::desc("The maximum number of callsites in a function, above which stale " "profile matching will be skipped."))
static cl::opt< unsigned > FuncProfileSimilarityThreshold("func-profile-similarity-threshold", cl::Hidden, cl::init(80), cl::desc("Consider a profile matches a function if the similarity of their " "callee sequences is above the specified percentile."))