-
Notifications
You must be signed in to change notification settings - Fork 13.4k
[MemProf] Add v4 which contains CalleeGuids to CallSiteInfo. #137394
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[MemProf] Add v4 which contains CalleeGuids to CallSiteInfo. #137394
Conversation
✅ With the latest revision this PR passed the C/C++ code formatter. |
@llvm/pr-subscribers-pgo Author: Snehasish Kumar (snehasish) ChangesThis patch adds CalleeGuids to the serialized format and increments the version number to 4. The unit tests are updated to include a new test for v4 and the YAML format is also updated to be able to roundtrip the v4 format. Patch is 24.24 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/137394.diff 9 Files Affected:
diff --git a/llvm/include/llvm/ProfileData/InstrProfReader.h b/llvm/include/llvm/ProfileData/InstrProfReader.h
index f1010b312ee56..c250a9ede39bc 100644
--- a/llvm/include/llvm/ProfileData/InstrProfReader.h
+++ b/llvm/include/llvm/ProfileData/InstrProfReader.h
@@ -705,7 +705,8 @@ class IndexedMemProfReader {
unsigned RadixTreeSize = 0;
Error deserializeV2(const unsigned char *Start, const unsigned char *Ptr);
- Error deserializeV3(const unsigned char *Start, const unsigned char *Ptr);
+ Error deserializeRadixTreeBased(const unsigned char *Start,
+ const unsigned char *Ptr);
public:
IndexedMemProfReader() = default;
diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h
index e07a3189e4259..06d17438fa70f 100644
--- a/llvm/include/llvm/ProfileData/MemProf.h
+++ b/llvm/include/llvm/ProfileData/MemProf.h
@@ -35,10 +35,12 @@ enum IndexedVersion : uint64_t {
// Version 3: Added a radix tree for call stacks. Switched to linear IDs for
// frames and call stacks.
Version3 = 3,
+ // Version 4: Added CalleeGuids to call site info.
+ Version4 = 4,
};
constexpr uint64_t MinimumSupportedVersion = Version2;
-constexpr uint64_t MaximumSupportedVersion = Version3;
+constexpr uint64_t MaximumSupportedVersion = Version4;
// Verify that the minimum and maximum satisfy the obvious constraint.
static_assert(MinimumSupportedVersion <= MaximumSupportedVersion);
diff --git a/llvm/include/llvm/ProfileData/MemProfYAML.h b/llvm/include/llvm/ProfileData/MemProfYAML.h
index a3ca03eb92d2e..08dee253f615a 100644
--- a/llvm/include/llvm/ProfileData/MemProfYAML.h
+++ b/llvm/include/llvm/ProfileData/MemProfYAML.h
@@ -1,6 +1,7 @@
#ifndef LLVM_PROFILEDATA_MEMPROFYAML_H_
#define LLVM_PROFILEDATA_MEMPROFYAML_H_
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ProfileData/MemProf.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/YAMLTraits.h"
@@ -28,8 +29,9 @@ struct AllMemProfData {
namespace yaml {
template <> struct ScalarTraits<memprof::GUIDHex64> {
static void output(const memprof::GUIDHex64 &Val, void *, raw_ostream &Out) {
- // Print GUID as a 16-digit hexadecimal number.
- Out << format("0x%016" PRIx64, (uint64_t)Val);
+ // Print GUID as a hexadecimal number with 0x prefix, no padding to keep
+ // test strings compact.
+ Out << format("0x%" PRIx64, (uint64_t)Val);
}
static StringRef input(StringRef Scalar, void *, memprof::GUIDHex64 &Val) {
// Reject decimal GUIDs.
@@ -156,10 +158,43 @@ template <> struct MappingTraits<memprof::AllocationInfo> {
// treat the GUID and the fields within MemProfRecord at the same level as if
// the GUID were part of MemProfRecord.
template <> struct MappingTraits<memprof::CallSiteInfo> {
+ // Helper class to normalize CalleeGuids to use GUIDHex64 for YAML I/O.
+ class CallSiteInfoWithHex64Guids {
+ public:
+ CallSiteInfoWithHex64Guids(IO &) {}
+ CallSiteInfoWithHex64Guids(IO &, const memprof::CallSiteInfo &CS)
+ : Frames(CS.Frames) {
+ // Convert uint64_t GUIDs to GUIDHex64 for serialization.
+ CalleeGuids.reserve(CS.CalleeGuids.size());
+ for (uint64_t Guid : CS.CalleeGuids)
+ CalleeGuids.push_back(memprof::GUIDHex64(Guid));
+ }
+
+ memprof::CallSiteInfo denormalize(IO &) {
+ memprof::CallSiteInfo CS;
+ CS.Frames = Frames;
+ // Convert GUIDHex64 back to uint64_t GUIDs after deserialization.
+ CS.CalleeGuids.reserve(CalleeGuids.size());
+ for (memprof::GUIDHex64 HexGuid : CalleeGuids)
+ CS.CalleeGuids.push_back(HexGuid.value);
+ return CS;
+ }
+
+ // Keep Frames as is, since MappingTraits<memprof::Frame> handles its
+ // Function GUID.
+ decltype(memprof::CallSiteInfo::Frames) Frames;
+ // Use a vector of GUIDHex64 for CalleeGuids to leverage its ScalarTraits.
+ SmallVector<memprof::GUIDHex64> CalleeGuids;
+ };
+
static void mapping(IO &Io, memprof::CallSiteInfo &CS) {
- Io.mapRequired("Frames", CS.Frames);
- // Keep this optional to make it easier to write tests.
- Io.mapOptional("CalleeGuids", CS.CalleeGuids);
+ // Use MappingNormalization to handle the conversion between
+ // memprof::CallSiteInfo and CallSiteInfoWithHex64Guids.
+ MappingNormalization<CallSiteInfoWithHex64Guids, memprof::CallSiteInfo>
+ Keys(Io, CS);
+ Io.mapRequired("Frames", Keys->Frames);
+ // Map the normalized CalleeGuids (which are now GUIDHex64).
+ Io.mapOptional("CalleeGuids", Keys->CalleeGuids);
}
};
@@ -176,6 +211,20 @@ template <> struct MappingTraits<memprof::AllMemProfData> {
Io.mapRequired("HeapProfileRecords", Data.HeapProfileRecords);
}
};
+
+template <> struct SequenceTraits<SmallVector<memprof::GUIDHex64>> {
+ static size_t size(IO &io, SmallVector<memprof::GUIDHex64> &Seq) {
+ return Seq.size();
+ }
+ static memprof::GUIDHex64 &
+ element(IO &io, SmallVector<memprof::GUIDHex64> &Seq, size_t Index) {
+ if (Index >= Seq.size())
+ Seq.resize(Index + 1);
+ return Seq[Index];
+ }
+ static const bool flow = true;
+};
+
} // namespace yaml
} // namespace llvm
@@ -184,5 +233,6 @@ LLVM_YAML_IS_SEQUENCE_VECTOR(std::vector<memprof::Frame>)
LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::AllocationInfo)
LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::CallSiteInfo)
LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::GUIDMemProfRecordPair)
+LLVM_YAML_IS_SEQUENCE_VECTOR(memprof::GUIDHex64) // Used for CalleeGuids
#endif // LLVM_PROFILEDATA_MEMPROFYAML_H_
diff --git a/llvm/lib/ProfileData/IndexedMemProfData.cpp b/llvm/lib/ProfileData/IndexedMemProfData.cpp
index 5e78ffdb86d67..6026dee077fa9 100644
--- a/llvm/lib/ProfileData/IndexedMemProfData.cpp
+++ b/llvm/lib/ProfileData/IndexedMemProfData.cpp
@@ -214,23 +214,13 @@ static Error writeMemProfV2(ProfOStream &OS,
return Error::success();
}
-// Write out MemProf Version3 as follows:
-// uint64_t Version
-// uint64_t CallStackPayloadOffset = Offset for the call stack payload
-// uint64_t RecordPayloadOffset = Offset for the record payload
-// uint64_t RecordTableOffset = RecordTableGenerator.Emit
-// uint64_t Num schema entries
-// uint64_t Schema entry 0
-// uint64_t Schema entry 1
-// ....
-// uint64_t Schema entry N - 1
-// Frames serialized one after another
-// Call stacks encoded as a radix tree
-// OnDiskChainedHashTable MemProfRecordData
-static Error writeMemProfV3(ProfOStream &OS,
- memprof::IndexedMemProfData &MemProfData,
- bool MemProfFullSchema) {
- OS.write(memprof::Version3);
+static Error writeMemProfRadixTreeBased(
+ ProfOStream &OS, memprof::IndexedMemProfData &MemProfData,
+ memprof::IndexedVersion Version, bool MemProfFullSchema) {
+ assert((Version == memprof::Version3 || Version == memprof::Version4) &&
+ "Unsupported version for radix tree format");
+
+ OS.write(Version); // Write the specific version (V3 or V4)
uint64_t HeaderUpdatePos = OS.tell();
OS.write(0ULL); // Reserve space for the memprof call stack payload offset.
OS.write(0ULL); // Reserve space for the memprof record payload offset.
@@ -258,13 +248,12 @@ static Error writeMemProfV3(ProfOStream &OS,
NumElements);
uint64_t RecordPayloadOffset = OS.tell();
- uint64_t RecordTableOffset =
- writeMemProfRecords(OS, MemProfData.Records, &Schema, memprof::Version3,
- &MemProfCallStackIndexes);
+ uint64_t RecordTableOffset = writeMemProfRecords(
+ OS, MemProfData.Records, &Schema, Version, // Pass Version
+ &MemProfCallStackIndexes);
- // IndexedMemProfReader::deserializeV3 computes the number of elements in the
- // call stack array from the difference between CallStackPayloadOffset and
- // RecordPayloadOffset. Verify that the computation works.
+ // Verify that the computation for the number of elements in the call stack
+ // array works.
assert(CallStackPayloadOffset +
NumElements * sizeof(memprof::LinearFrameId) ==
RecordPayloadOffset);
@@ -279,15 +268,34 @@ static Error writeMemProfV3(ProfOStream &OS,
return Error::success();
}
+// Write out MemProf Version3
+static Error writeMemProfV3(ProfOStream &OS,
+ memprof::IndexedMemProfData &MemProfData,
+ bool MemProfFullSchema) {
+ return writeMemProfRadixTreeBased(OS, MemProfData, memprof::Version3,
+ MemProfFullSchema);
+}
+
+// Write out MemProf Version4
+static Error writeMemProfV4(ProfOStream &OS,
+ memprof::IndexedMemProfData &MemProfData,
+ bool MemProfFullSchema) {
+ return writeMemProfRadixTreeBased(OS, MemProfData, memprof::Version4,
+ MemProfFullSchema);
+}
+
// Write out the MemProf data in a requested version.
-Error writeMemProf(ProfOStream &OS, memprof::IndexedMemProfData &MemProfData,
- memprof::IndexedVersion MemProfVersionRequested,
- bool MemProfFullSchema) {
+Error writeMemProf(ProfOStream &OS,
+ memprof::IndexedMemProfData &MemProfData,
+ memprof::IndexedVersion MemProfVersionRequested,
+ bool MemProfFullSchema) {
switch (MemProfVersionRequested) {
case memprof::Version2:
return writeMemProfV2(OS, MemProfData, MemProfFullSchema);
case memprof::Version3:
return writeMemProfV3(OS, MemProfData, MemProfFullSchema);
+ case memprof::Version4:
+ return writeMemProfV4(OS, MemProfData, MemProfFullSchema);
}
return make_error<InstrProfError>(
@@ -350,8 +358,8 @@ Error IndexedMemProfReader::deserializeV2(const unsigned char *Start,
return Error::success();
}
-Error IndexedMemProfReader::deserializeV3(const unsigned char *Start,
- const unsigned char *Ptr) {
+Error IndexedMemProfReader::deserializeRadixTreeBased(
+ const unsigned char *Start, const unsigned char *Ptr) {
// The offset in the stream right before invoking
// CallStackTableGenerator.Emit.
const uint64_t CallStackPayloadOffset =
@@ -382,7 +390,7 @@ Error IndexedMemProfReader::deserializeV3(const unsigned char *Start,
MemProfRecordTable.reset(MemProfRecordHashTable::Create(
/*Buckets=*/Start + RecordTableOffset,
/*Payload=*/Start + RecordPayloadOffset,
- /*Base=*/Start, memprof::RecordLookupTrait(memprof::Version3, Schema)));
+ /*Base=*/Start, memprof::RecordLookupTrait(Version, Schema)));
return Error::success();
}
@@ -395,8 +403,10 @@ Error IndexedMemProfReader::deserialize(const unsigned char *Start,
const uint64_t FirstWord =
support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
- if (FirstWord == memprof::Version2 || FirstWord == memprof::Version3) {
- // Everything is good. We can proceed to deserialize the rest.
+ // Check if the version is supported
+ if (FirstWord >= memprof::MinimumSupportedVersion &&
+ FirstWord <= memprof::MaximumSupportedVersion) {
+ // Everything is good. We can proceed to deserialize the rest.
Version = static_cast<memprof::IndexedVersion>(FirstWord);
} else {
return make_error<InstrProfError>(
@@ -413,12 +423,13 @@ Error IndexedMemProfReader::deserialize(const unsigned char *Start,
return E;
break;
case memprof::Version3:
- if (Error E = deserializeV3(Start, Ptr))
+ case memprof::Version4:
+ // V3 and V4 share the same high-level structure (radix tree, linear IDs).
+ if (Error E = deserializeRadixTreeBased(Start, Ptr))
return E;
break;
}
return Error::success();
}
-
} // namespace llvm
diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp
index 295f2a633e6c7..e6c83430cd8e9 100644
--- a/llvm/lib/ProfileData/InstrProfReader.cpp
+++ b/llvm/lib/ProfileData/InstrProfReader.cpp
@@ -1456,16 +1456,6 @@ getMemProfRecordV2(const memprof::IndexedMemProfRecord &IndexedRecord,
return Record;
}
-static Expected<memprof::MemProfRecord>
-getMemProfRecordV3(const memprof::IndexedMemProfRecord &IndexedRecord,
- const unsigned char *FrameBase,
- const unsigned char *CallStackBase) {
- memprof::LinearFrameIdConverter FrameIdConv(FrameBase);
- memprof::LinearCallStackIdConverter CSIdConv(CallStackBase, FrameIdConv);
- memprof::MemProfRecord Record = IndexedRecord.toMemProfRecord(CSIdConv);
- return Record;
-}
-
Expected<memprof::MemProfRecord>
IndexedMemProfReader::getMemProfRecord(const uint64_t FuncNameHash) const {
// TODO: Add memprof specific errors.
@@ -1485,13 +1475,20 @@ IndexedMemProfReader::getMemProfRecord(const uint64_t FuncNameHash) const {
assert(MemProfCallStackTable && "MemProfCallStackTable must be available");
return getMemProfRecordV2(IndexedRecord, *MemProfFrameTable,
*MemProfCallStackTable);
+ // Combine V3 and V4 cases as the record conversion logic is the same.
case memprof::Version3:
+ case memprof::Version4:
assert(!MemProfFrameTable && "MemProfFrameTable must not be available");
assert(!MemProfCallStackTable &&
"MemProfCallStackTable must not be available");
assert(FrameBase && "FrameBase must be available");
assert(CallStackBase && "CallStackBase must be available");
- return getMemProfRecordV3(IndexedRecord, FrameBase, CallStackBase);
+ {
+ memprof::LinearFrameIdConverter FrameIdConv(FrameBase);
+ memprof::LinearCallStackIdConverter CSIdConv(CallStackBase, FrameIdConv);
+ memprof::MemProfRecord Record = IndexedRecord.toMemProfRecord(CSIdConv);
+ return Record;
+ }
}
return make_error<InstrProfError>(
@@ -1505,7 +1502,7 @@ IndexedMemProfReader::getMemProfRecord(const uint64_t FuncNameHash) const {
DenseMap<uint64_t, SmallVector<memprof::CallEdgeTy, 0>>
IndexedMemProfReader::getMemProfCallerCalleePairs() const {
assert(MemProfRecordTable);
- assert(Version == memprof::Version3);
+ assert(Version == memprof::Version3 || Version == memprof::Version4);
memprof::LinearFrameIdConverter FrameIdConv(FrameBase);
memprof::CallerCalleePairExtractor Extractor(CallStackBase, FrameIdConv,
diff --git a/llvm/lib/ProfileData/MemProf.cpp b/llvm/lib/ProfileData/MemProf.cpp
index 0af08ca51481f..fc8ea6848595c 100644
--- a/llvm/lib/ProfileData/MemProf.cpp
+++ b/llvm/lib/ProfileData/MemProf.cpp
@@ -48,7 +48,9 @@ size_t IndexedAllocationInfo::serializedSize(const MemProfSchema &Schema,
switch (Version) {
case Version2:
return serializedSizeV2(*this, Schema);
+ // Combine V3 and V4 as the size calculation is the same
case Version3:
+ case Version4:
return serializedSizeV3(*this, Schema);
}
llvm_unreachable("unsupported MemProf version");
@@ -78,10 +80,26 @@ static size_t serializedSizeV3(const IndexedMemProfRecord &Record,
// The number of callsites we have information for.
Result += sizeof(uint64_t);
// The linear call stack ID.
+ // Note: V3 only stored the LinearCallStackId per call site.
Result += Record.CallSites.size() * sizeof(LinearCallStackId);
return Result;
}
+static size_t serializedSizeV4(const IndexedMemProfRecord &Record,
+ const MemProfSchema &Schema) {
+ // The number of alloc sites to serialize.
+ size_t Result = sizeof(uint64_t);
+ for (const IndexedAllocationInfo &N : Record.AllocSites)
+ Result += N.serializedSize(Schema, Version4);
+
+ // The number of callsites we have information for.
+ Result += sizeof(uint64_t);
+ for (const auto &CS : Record.CallSites)
+ Result += sizeof(LinearCallStackId) + sizeof(uint64_t) +
+ CS.CalleeGuids.size() * sizeof(GlobalValue::GUID);
+ return Result;
+}
+
size_t IndexedMemProfRecord::serializedSize(const MemProfSchema &Schema,
IndexedVersion Version) const {
switch (Version) {
@@ -89,6 +107,8 @@ size_t IndexedMemProfRecord::serializedSize(const MemProfSchema &Schema,
return serializedSizeV2(*this, Schema);
case Version3:
return serializedSizeV3(*this, Schema);
+ case Version4:
+ return serializedSizeV4(*this, Schema);
}
llvm_unreachable("unsupported MemProf version");
}
@@ -134,6 +154,32 @@ static void serializeV3(
}
}
+static void serializeV4(
+ const IndexedMemProfRecord &Record, const MemProfSchema &Schema,
+ raw_ostream &OS,
+ llvm::DenseMap<CallStackId, LinearCallStackId> &MemProfCallStackIndexes) {
+ using namespace support;
+
+ endian::Writer LE(OS, llvm::endianness::little);
+
+ LE.write<uint64_t>(Record.AllocSites.size());
+ for (const IndexedAllocationInfo &N : Record.AllocSites) {
+ assert(MemProfCallStackIndexes.contains(N.CSId));
+ LE.write<LinearCallStackId>(MemProfCallStackIndexes[N.CSId]);
+ N.Info.serialize(Schema, OS);
+ }
+
+ // Related contexts.
+ LE.write<uint64_t>(Record.CallSites.size());
+ for (const auto &CS : Record.CallSites) {
+ assert(MemProfCallStackIndexes.contains(CS.CSId));
+ LE.write<LinearCallStackId>(MemProfCallStackIndexes[CS.CSId]);
+ LE.write<uint64_t>(CS.CalleeGuids.size());
+ for (const auto &Guid : CS.CalleeGuids)
+ LE.write<GlobalValue::GUID>(Guid);
+ }
+}
+
void IndexedMemProfRecord::serialize(
const MemProfSchema &Schema, raw_ostream &OS, IndexedVersion Version,
llvm::DenseMap<CallStackId, LinearCallStackId> *MemProfCallStackIndexes)
@@ -145,6 +191,9 @@ void IndexedMemProfRecord::serialize(
case Version3:
serializeV3(*this, Schema, OS, *MemProfCallStackIndexes);
return;
+ case Version4:
+ serializeV4(*this, Schema, OS, *MemProfCallStackIndexes);
+ return;
}
llvm_unreachable("unsupported MemProf version");
}
@@ -216,6 +265,46 @@ static IndexedMemProfRecord deserializeV3(const MemProfSchema &Schema,
return Record;
}
+static IndexedMemProfRecord deserializeV4(const MemProfSchema &Schema,
+ const unsigned char *Ptr) {
+ using namespace support;
+
+ IndexedMemProfRecord Record;
+
+ // Read the meminfo nodes.
+ const uint64_t NumNodes =
+ endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
+ Record.AllocSites.reserve(NumNodes);
+ for (uint64_t I = 0; I < NumNodes; I++) {
+ IndexedAllocationInfo Node;
+ Node.CSId =
+ endian::readNext<LinearCallStackId, llvm::endianness::little>(Ptr);
+ Node.Info.deserialize(Schema, Ptr);
+ Ptr += PortableMemInfoBlock::serializedSize(Schema);
+ Record.AllocSites.push_back(Node);
+ }
+
+ // Read the callsite information.
+ const uint64_t NumCtxs =
+ endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
+ Record.CallSites.reserve(NumCtxs);
+ for (uint64_t J = 0; J < NumCtxs; J++) {
+ static_assert(sizeof(LinearCallStackId) <= sizeof(CallStackId));
+ LinearCallStackId CSId =
+ endian::readNext<LinearCallStackId, llvm::endianness::little>(Ptr);
+ const uint64_t NumGuids =
+ endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
+ SmallVector<GlobalValue::GUID, 1> Guids;
+ Guids.reserve(NumGuids);
+ for (uint64_t K = 0; K < NumGuids; ++K)
+ Guids.push_back(
+ endian::readNext<GlobalValue::GUID, llvm::endianness::little>(Ptr));
+ Record.CallSites.emplace_back(CSId, std::move(Guids));
+ }
+
+ return Record;
+}
+
IndexedMemProfRecord
IndexedMemProfRecord::deserialize(const MemProfSchema &Schema,
const unsigned char *Ptr,
@@ -225,6 +314,8 @@ IndexedMemProfRecord::deserialize(const MemProfSchema &Schema,
return deserializeV2(Schema, Ptr);
case Version3:
return deserializeV3(Schema, Ptr);
+ case Version4:
+ return deserializeV4(Schema, Ptr);
}
llvm_unreachable("unsupported MemProf version");
}
diff --git a/llvm/test/tools/llvm-profdata/memprof-yaml.test b/llvm/test/tools/llvm-profdata/memprof-yaml.test
index a72ef5925a844..1a9875d08444a 100644
--- a/llvm/test/tools/llvm-profdata/memprof-yaml.test
+++ b/llvm/test/tools/llvm-profdata/memprof-yaml.test
@@ -1,10 +1,8 @@
; RUN: split-file %s %t
-; RUN: llvm-profdata merge %t/memprof-in.yaml -o %t/memprof-out.indexed
+; COM: The text format only supports the latest version.
+; RUN: llvm-profdata merge --memprof-version=4 %t/memprof-in.yaml -o %t/memprof-out.indexed
; RUN: llvm-profdata show...
[truncated]
|
@snehasish I see that you are not adding fields to various structs in |
No, the fields that I need were added in #130441. The fact that the roundtrip yaml test works indicates that all structs have been updated. Is there something I missed? |
llvm/lib/ProfileData/MemProf.cpp
Outdated
Node.CSId = | ||
endian::readNext<LinearCallStackId, llvm::endianness::little>(Ptr); | ||
Node.Info.deserialize(Schema, Ptr); | ||
Ptr += PortableMemInfoBlock::serializedSize(Schema); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: if I read correctly, PortableMemInfoBlock::serializedSize(Schema)
could be computed once outside of the loop starting at L278.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for the suggestion. Also made the change to v3 and submitted it early.
Oh, I totally forgot about that. Thank you for reminding me! |
Similar to the suggestion in #137394. In this case apply it to the current binary format (v3).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for the reviews, I'll verify the profile size changes with some large workloads and report back.
llvm/lib/ProfileData/MemProf.cpp
Outdated
Node.CSId = | ||
endian::readNext<LinearCallStackId, llvm::endianness::little>(Ptr); | ||
Node.Info.deserialize(Schema, Ptr); | ||
Ptr += PortableMemInfoBlock::serializedSize(Schema); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for the suggestion. Also made the change to v3 and submitted it early.
For a set of representative targets this results in a 17% increase in profile size (compared to no calleeguids in v3). If I reduce the number of frames from the leaf to consider when attaching this information, for a limit of 10 frames (inline and non-inline) I see an increase of 14.7%. Note that the overall v4 sizes for each of these profiles was between 1.2M and 18M. So I'm going to leave the encoding as is for now and we can revisit if it significantly impacts profile sizes for specific targets. |
The Windows CI seens to be unhappy because the diff executable doesn't understand the |
This patch adds CalleeGuids to the serialized format and increments the version number to 4.
507c2ee
to
4794261
Compare
Merge activity
|
…7479) Similar to the suggestion in llvm#137394. In this case apply it to the current binary format (v3).
…7394) This patch adds CalleeGuids to the serialized format and increments the version number to 4. The unit tests are updated to include a new test for v4 and the YAML format is also updated to be able to roundtrip the v4 format.
…7479) Similar to the suggestion in llvm#137394. In this case apply it to the current binary format (v3).
…7394) This patch adds CalleeGuids to the serialized format and increments the version number to 4. The unit tests are updated to include a new test for v4 and the YAML format is also updated to be able to roundtrip the v4 format.
…7479) Similar to the suggestion in llvm#137394. In this case apply it to the current binary format (v3).
…7394) This patch adds CalleeGuids to the serialized format and increments the version number to 4. The unit tests are updated to include a new test for v4 and the YAML format is also updated to be able to roundtrip the v4 format.
…7479) Similar to the suggestion in llvm#137394. In this case apply it to the current binary format (v3).
…7394) This patch adds CalleeGuids to the serialized format and increments the version number to 4. The unit tests are updated to include a new test for v4 and the YAML format is also updated to be able to roundtrip the v4 format.
This patch adds CalleeGuids to the serialized format and increments the version number to 4. The unit tests are updated to include a new test for v4 and the YAML format is also updated to be able to roundtrip the v4 format.