Thanks to visit codestin.com
Credit goes to llvm.org

LLVM 22.0.0git
BasicBlockSectionsProfileReader.cpp
Go to the documentation of this file.
1//===-- BasicBlockSectionsProfileReader.cpp -------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Implementation of the basic block sections profile reader pass. It parses
10// and stores the basic block sections profile file (which is specified via the
11// `-basic-block-sections` flag).
12//
13//===----------------------------------------------------------------------===//
14
16#include "llvm/ADT/DenseSet.h"
17#include "llvm/ADT/SmallSet.h"
20#include "llvm/ADT/StringMap.h"
21#include "llvm/ADT/StringRef.h"
23#include "llvm/Pass.h"
24#include "llvm/Support/Error.h"
28#include "llvm/Support/Path.h"
30#include <llvm/ADT/STLExtras.h>
31
32using namespace llvm;
33
36 "bbsections-profile-reader",
37 "Reads and parses a basic block sections profile.", false,
38 false)
39
41BasicBlockSectionsProfileReader::parseUniqueBBID(StringRef S) const {
43 S.split(Parts, '.');
44 if (Parts.size() > 2)
45 return createProfileParseError(Twine("unable to parse basic block id: '") +
46 S + "'");
47 unsigned long long BaseBBID;
48 if (getAsUnsignedInteger(Parts[0], 10, BaseBBID))
49 return createProfileParseError(
50 Twine("unable to parse BB id: '" + Parts[0]) +
51 "': unsigned integer expected");
52 unsigned long long CloneID = 0;
53 if (Parts.size() > 1 && getAsUnsignedInteger(Parts[1], 10, CloneID))
54 return createProfileParseError(Twine("unable to parse clone id: '") +
55 Parts[1] + "': unsigned integer expected");
56 return UniqueBBID{static_cast<unsigned>(BaseBBID),
57 static_cast<unsigned>(CloneID)};
58}
59
61 return getClusterInfoForFunction(FuncName).first;
62}
63
64std::pair<bool, SmallVector<BBClusterInfo>>
66 StringRef FuncName) const {
67 auto R = ProgramPathAndClusterInfo.find(getAliasName(FuncName));
68 return R != ProgramPathAndClusterInfo.end()
69 ? std::pair(true, R->second.ClusterInfo)
70 : std::pair(false, SmallVector<BBClusterInfo>());
71}
72
75 StringRef FuncName) const {
76 return ProgramPathAndClusterInfo.lookup(getAliasName(FuncName)).ClonePaths;
77}
78
80 StringRef FuncName, const UniqueBBID &SrcBBID,
81 const UniqueBBID &SinkBBID) const {
82 auto It = ProgramPathAndClusterInfo.find(getAliasName(FuncName));
83 if (It == ProgramPathAndClusterInfo.end())
84 return 0;
85 auto NodeIt = It->second.EdgeCounts.find(SrcBBID);
86 if (NodeIt == It->second.EdgeCounts.end())
87 return 0;
88 auto EdgeIt = NodeIt->second.find(SinkBBID);
89 if (EdgeIt == NodeIt->second.end())
90 return 0;
91 return EdgeIt->second;
92}
93
94// Reads the version 1 basic block sections profile. Profile for each function
95// is encoded as follows:
96// m <module_name>
97// f <function_name_1> <function_name_2> ...
98// c <bb_id_1> <bb_id_2> <bb_id_3>
99// c <bb_id_4> <bb_id_5>
100// ...
101// Module name specifier (starting with 'm') is optional and allows
102// distinguishing profile for internal-linkage functions with the same name. If
103// not specified, it will apply to any function with the same name. Function
104// name specifier (starting with 'f') can specify multiple function name
105// aliases. Basic block clusters are specified by 'c' and specify the cluster of
106// basic blocks, and the internal order in which they must be placed in the same
107// section.
108// This profile can also specify cloning paths which instruct the compiler to
109// clone basic blocks along a path. The cloned blocks are then specified in the
110// cluster information.
111// The following profile lists two cloning paths (starting with 'p') for
112// function bar and places the total 9 blocks within two clusters. The first two
113// blocks of a cloning path specify the edge along which the path is cloned. For
114// instance, path 1 (1 -> 3 -> 4) instructs that 3 and 4 must be cloned along
115// the edge 1->3. Within the given clusters, each cloned block is identified by
116// "<original block id>.<clone id>". For instance, 3.1 represents the first
117// clone of block 3. Original blocks are specified just with their block ids. A
118// block cloned multiple times appears with distinct clone ids. The CFG for bar
119// is shown below before and after cloning with its final clusters labeled.
120//
121// f main
122// f bar
123// p 1 3 4 # cloning path 1
124// p 4 2 # cloning path 2
125// c 1 3.1 4.1 6 # basic block cluster 1
126// c 0 2 3 4 2.1 5 # basic block cluster 2
127// ****************************************************************************
128// function bar before and after cloning with basic block clusters shown.
129// ****************************************************************************
130// .... ..............
131// 0 -------+ : 0 :---->: 1 ---> 3.1 :
132// | | : | : :........ | :
133// v v : v : : v :
134// +--> 2 --> 5 1 ~~~~~~> +---: 2 : : 4.1: clsuter 1
135// | | | | : | : : | :
136// | v | | : v ....... : v :
137// | 3 <------+ | : 3 <--+ : : 6 :
138// | | | : | | : :....:
139// | v | : v | :
140// +--- 4 ---> 6 | : 4 | :
141// | : | | :
142// | : v | :
143// | :2.1---+ : cluster 2
144// | : | ......:
145// | : v :
146// +-->: 5 :
147// ....
148// ****************************************************************************
149Error BasicBlockSectionsProfileReader::ReadV1Profile() {
150 auto FI = ProgramPathAndClusterInfo.end();
151
152 // Current cluster ID corresponding to this function.
153 unsigned CurrentCluster = 0;
154 // Current position in the current cluster.
155 unsigned CurrentPosition = 0;
156
157 // Temporary set to ensure every basic block ID appears once in the clusters
158 // of a function.
159 DenseSet<UniqueBBID> FuncBBIDs;
160
161 // Debug-info-based module filename for the current function. Empty string
162 // means no filename.
163 StringRef DIFilename;
164
165 for (; !LineIt.is_at_eof(); ++LineIt) {
166 StringRef S(*LineIt);
167 char Specifier = S[0];
168 S = S.drop_front().trim();
170 S.split(Values, ' ');
171 switch (Specifier) {
172 case '@':
173 continue;
174 case 'm': // Module name speicifer.
175 if (Values.size() != 1) {
176 return createProfileParseError(Twine("invalid module name value: '") +
177 S + "'");
178 }
179 DIFilename = sys::path::remove_leading_dotslash(Values[0]);
180 continue;
181 case 'f': { // Function names specifier.
182 bool FunctionFound = any_of(Values, [&](StringRef Alias) {
183 auto It = FunctionNameToDIFilename.find(Alias);
184 // No match if this function name is not found in this module.
185 if (It == FunctionNameToDIFilename.end())
186 return false;
187 // Return a match if debug-info-filename is not specified. Otherwise,
188 // check for equality.
189 return DIFilename.empty() || It->second == DIFilename;
190 });
191 if (!FunctionFound) {
192 // Skip the following profile by setting the profile iterator (FI) to
193 // the past-the-end element.
194 FI = ProgramPathAndClusterInfo.end();
195 DIFilename = "";
196 continue;
197 }
198 for (size_t i = 1; i < Values.size(); ++i)
199 FuncAliasMap.try_emplace(Values[i], Values.front());
200
201 // Prepare for parsing clusters of this function name.
202 // Start a new cluster map for this function name.
203 auto R = ProgramPathAndClusterInfo.try_emplace(Values.front());
204 // Report error when multiple profiles have been specified for the same
205 // function.
206 if (!R.second)
207 return createProfileParseError("duplicate profile for function '" +
208 Values.front() + "'");
209 FI = R.first;
210 CurrentCluster = 0;
211 FuncBBIDs.clear();
212 // We won't need DIFilename anymore. Clean it up to avoid its application
213 // on the next function.
214 DIFilename = "";
215 continue;
216 }
217 case 'c': // Basic block cluster specifier.
218 // Skip the profile when we the profile iterator (FI) refers to the
219 // past-the-end element.
220 if (FI == ProgramPathAndClusterInfo.end())
221 continue;
222 // Reset current cluster position.
223 CurrentPosition = 0;
224 for (auto BasicBlockIDStr : Values) {
225 auto BasicBlockID = parseUniqueBBID(BasicBlockIDStr);
226 if (!BasicBlockID)
227 return BasicBlockID.takeError();
228 if (!FuncBBIDs.insert(*BasicBlockID).second)
229 return createProfileParseError(
230 Twine("duplicate basic block id found '") + BasicBlockIDStr +
231 "'");
232
233 FI->second.ClusterInfo.emplace_back(BBClusterInfo{
234 *std::move(BasicBlockID), CurrentCluster, CurrentPosition++});
235 }
236 CurrentCluster++;
237 continue;
238 case 'p': { // Basic block cloning path specifier.
239 // Skip the profile when we the profile iterator (FI) refers to the
240 // past-the-end element.
241 if (FI == ProgramPathAndClusterInfo.end())
242 continue;
243 SmallSet<unsigned, 5> BBsInPath;
244 FI->second.ClonePaths.push_back({});
245 for (size_t I = 0; I < Values.size(); ++I) {
246 auto BaseBBIDStr = Values[I];
247 unsigned long long BaseBBID = 0;
248 if (getAsUnsignedInteger(BaseBBIDStr, 10, BaseBBID))
249 return createProfileParseError(Twine("unsigned integer expected: '") +
250 BaseBBIDStr + "'");
251 if (I != 0 && !BBsInPath.insert(BaseBBID).second)
252 return createProfileParseError(
253 Twine("duplicate cloned block in path: '") + BaseBBIDStr + "'");
254 FI->second.ClonePaths.back().push_back(BaseBBID);
255 }
256 continue;
257 }
258 case 'g': { // CFG profile specifier.
259 // Skip the profile when we the profile iterator (FI) refers to the
260 // past-the-end element.
261 if (FI == ProgramPathAndClusterInfo.end())
262 continue;
263 // For each node, its CFG profile is encoded as
264 // <src>:<count>,<sink_1>:<count_1>,<sink_2>:<count_2>,...
265 for (auto BasicBlockEdgeProfile : Values) {
266 if (BasicBlockEdgeProfile.empty())
267 continue;
268 SmallVector<StringRef, 4> NodeEdgeCounts;
269 BasicBlockEdgeProfile.split(NodeEdgeCounts, ',');
270 UniqueBBID SrcBBID;
271 for (size_t i = 0; i < NodeEdgeCounts.size(); ++i) {
272 auto [BBIDStr, CountStr] = NodeEdgeCounts[i].split(':');
273 auto BBID = parseUniqueBBID(BBIDStr);
274 if (!BBID)
275 return BBID.takeError();
276 unsigned long long Count = 0;
277 if (getAsUnsignedInteger(CountStr, 10, Count))
278 return createProfileParseError(
279 Twine("unsigned integer expected: '") + CountStr + "'");
280 if (i == 0) {
281 // The first element represents the source and its total count.
282 FI->second.NodeCounts[SrcBBID = *BBID] = Count;
283 continue;
284 }
285 FI->second.EdgeCounts[SrcBBID][*BBID] = Count;
286 }
287 }
288 continue;
289 }
290 default:
291 return createProfileParseError(Twine("invalid specifier: '") +
292 Twine(Specifier) + "'");
293 }
294 llvm_unreachable("should not break from this switch statement");
295 }
296 return Error::success();
297}
298
299Error BasicBlockSectionsProfileReader::ReadV0Profile() {
300 auto FI = ProgramPathAndClusterInfo.end();
301 // Current cluster ID corresponding to this function.
302 unsigned CurrentCluster = 0;
303 // Current position in the current cluster.
304 unsigned CurrentPosition = 0;
305
306 // Temporary set to ensure every basic block ID appears once in the clusters
307 // of a function.
308 SmallSet<unsigned, 4> FuncBBIDs;
309
310 for (; !LineIt.is_at_eof(); ++LineIt) {
311 StringRef S(*LineIt);
312 if (S[0] == '@')
313 continue;
314 // Check for the leading "!"
315 if (!S.consume_front("!") || S.empty())
316 break;
317 // Check for second "!" which indicates a cluster of basic blocks.
318 if (S.consume_front("!")) {
319 // Skip the profile when we the profile iterator (FI) refers to the
320 // past-the-end element.
321 if (FI == ProgramPathAndClusterInfo.end())
322 continue;
324 S.split(BBIDs, ' ');
325 // Reset current cluster position.
326 CurrentPosition = 0;
327 for (auto BBIDStr : BBIDs) {
328 unsigned long long BBID;
329 if (getAsUnsignedInteger(BBIDStr, 10, BBID))
330 return createProfileParseError(Twine("unsigned integer expected: '") +
331 BBIDStr + "'");
332 if (!FuncBBIDs.insert(BBID).second)
333 return createProfileParseError(
334 Twine("duplicate basic block id found '") + BBIDStr + "'");
335
336 FI->second.ClusterInfo.emplace_back(
337 BBClusterInfo({{static_cast<unsigned>(BBID), 0},
338 CurrentCluster,
339 CurrentPosition++}));
340 }
341 CurrentCluster++;
342 } else {
343 // This is a function name specifier. It may include a debug info filename
344 // specifier starting with `M=`.
345 auto [AliasesStr, DIFilenameStr] = S.split(' ');
346 SmallString<128> DIFilename;
347 if (DIFilenameStr.starts_with("M=")) {
348 DIFilename =
349 sys::path::remove_leading_dotslash(DIFilenameStr.substr(2));
350 if (DIFilename.empty())
351 return createProfileParseError("empty module name specifier");
352 } else if (!DIFilenameStr.empty()) {
353 return createProfileParseError("unknown string found: '" +
354 DIFilenameStr + "'");
355 }
356 // Function aliases are separated using '/'. We use the first function
357 // name for the cluster info mapping and delegate all other aliases to
358 // this one.
360 AliasesStr.split(Aliases, '/');
361 bool FunctionFound = any_of(Aliases, [&](StringRef Alias) {
362 auto It = FunctionNameToDIFilename.find(Alias);
363 // No match if this function name is not found in this module.
364 if (It == FunctionNameToDIFilename.end())
365 return false;
366 // Return a match if debug-info-filename is not specified. Otherwise,
367 // check for equality.
368 return DIFilename.empty() || It->second == DIFilename;
369 });
370 if (!FunctionFound) {
371 // Skip the following profile by setting the profile iterator (FI) to
372 // the past-the-end element.
373 FI = ProgramPathAndClusterInfo.end();
374 continue;
375 }
376 for (size_t i = 1; i < Aliases.size(); ++i)
377 FuncAliasMap.try_emplace(Aliases[i], Aliases.front());
378
379 // Prepare for parsing clusters of this function name.
380 // Start a new cluster map for this function name.
381 auto R = ProgramPathAndClusterInfo.try_emplace(Aliases.front());
382 // Report error when multiple profiles have been specified for the same
383 // function.
384 if (!R.second)
385 return createProfileParseError("duplicate profile for function '" +
386 Aliases.front() + "'");
387 FI = R.first;
388 CurrentCluster = 0;
389 FuncBBIDs.clear();
390 }
391 }
392 return Error::success();
393}
394
395// Basic Block Sections can be enabled for a subset of machine basic blocks.
396// This is done by passing a file containing names of functions for which basic
397// block sections are desired. Additionally, machine basic block ids of the
398// functions can also be specified for a finer granularity. Moreover, a cluster
399// of basic blocks could be assigned to the same section.
400// Optionally, a debug-info filename can be specified for each function to allow
401// distinguishing internal-linkage functions of the same name.
402// A file with basic block sections for all of function main and three blocks
403// for function foo (of which 1 and 2 are placed in a cluster) looks like this:
404// (Profile for function foo is only loaded when its debug-info filename
405// matches 'path/to/foo_file.cc').
406// ----------------------------
407// list.txt:
408// !main
409// !foo M=path/to/foo_file.cc
410// !!1 2
411// !!4
412Error BasicBlockSectionsProfileReader::ReadProfile() {
413 assert(MBuf);
414
415 unsigned long long Version = 0;
416 StringRef FirstLine(*LineIt);
417 if (FirstLine.consume_front("v")) {
418 if (getAsUnsignedInteger(FirstLine, 10, Version)) {
419 return createProfileParseError(Twine("version number expected: '") +
420 FirstLine + "'");
421 }
422 if (Version > 1) {
423 return createProfileParseError(Twine("invalid profile version: ") +
424 Twine(Version));
425 }
426 ++LineIt;
427 }
428
429 switch (Version) {
430 case 0:
431 // TODO: Deprecate V0 once V1 is fully integrated downstream.
432 return ReadV0Profile();
433 case 1:
434 return ReadV1Profile();
435 default:
436 llvm_unreachable("Invalid profile version.");
437 }
438}
439
441 if (!BBSPR.MBuf)
442 return false;
443 // Get the function name to debug info filename mapping.
444 BBSPR.FunctionNameToDIFilename.clear();
445 for (const Function &F : M) {
446 SmallString<128> DIFilename;
447 if (F.isDeclaration())
448 continue;
449 DISubprogram *Subprogram = F.getSubprogram();
450 if (Subprogram) {
451 llvm::DICompileUnit *CU = Subprogram->getUnit();
452 if (CU)
453 DIFilename = sys::path::remove_leading_dotslash(CU->getFilename());
454 }
455 [[maybe_unused]] bool inserted =
456 BBSPR.FunctionNameToDIFilename.try_emplace(F.getName(), DIFilename)
457 .second;
458 assert(inserted);
459 }
460 if (auto Err = BBSPR.ReadProfile())
461 report_fatal_error(std::move(Err));
462 return false;
463}
464
466
472
474 StringRef FuncName) const {
475 return BBSPR.isFunctionHot(FuncName);
476}
477
478std::pair<bool, SmallVector<BBClusterInfo>>
480 StringRef FuncName) const {
481 return BBSPR.getClusterInfoForFunction(FuncName);
482}
483
486 StringRef FuncName) const {
487 return BBSPR.getClonePathsForFunction(FuncName);
488}
489
491 StringRef FuncName, const UniqueBBID &SrcBBID,
492 const UniqueBBID &SinkBBID) const {
493 return BBSPR.getEdgeCount(FuncName, SrcBBID, SinkBBID);
494}
495
500
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file defines the StringMap class.
Function Alias Analysis false
This file defines the DenseSet and SmallDenseSet classes.
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallSet class.
This file defines the SmallString class.
This file defines the SmallVector class.
Result run(Function &F, FunctionAnalysisManager &AM)
bool doInitialization(Module &M) override
doInitialization - Virtual method overridden by subclasses to do any necessary initialization before ...
SmallVector< SmallVector< unsigned > > getClonePathsForFunction(StringRef FuncName) const
uint64_t getEdgeCount(StringRef FuncName, const UniqueBBID &SrcBBID, const UniqueBBID &DestBBID) const
std::pair< bool, SmallVector< BBClusterInfo > > getClusterInfoForFunction(StringRef FuncName) const
bool isFunctionHot(StringRef FuncName) const
std::pair< bool, SmallVector< BBClusterInfo > > getClusterInfoForFunction(StringRef FuncName) const
SmallVector< SmallVector< unsigned > > getClonePathsForFunction(StringRef FuncName) const
uint64_t getEdgeCount(StringRef FuncName, const UniqueBBID &SrcBBID, const UniqueBBID &SinkBBID) const
Subprogram description. Uses SubclassData1.
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
static ErrorSuccess success()
Create a success value.
Definition Error.h:336
Tagged union holding either a T or a Error.
Definition Error.h:485
ImmutablePass class - This class is used to provide information that does not need to be run.
Definition Pass.h:285
This interface provides simple read-only access to a block of memory, and provides simple methods for...
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:183
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:702
constexpr bool empty() const
empty - Check if the string is empty.
Definition StringRef.h:143
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:202
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_ABI StringRef remove_leading_dotslash(StringRef path LLVM_LIFETIME_BOUND, Style style=Style::native)
Remove redundant leading "./" pieces and consecutive separators.
This is an optimization pass for GlobalISel generic memory operations.
ImmutablePass * createBasicBlockSectionsProfileReaderWrapperPass(const MemoryBuffer *Buf)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1712
FunctionAddr VTableAddr uintptr_t uintptr_t Version
Definition InstrProf.h:302
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
LLVM_ABI bool getAsUnsignedInteger(StringRef Str, unsigned Radix, unsigned long long &Result)
Helper functions for StringRef::getAsInteger.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition Analysis.h:29