Thanks to visit codestin.com
Credit goes to llvm.org

LLVM 22.0.0git
IR2Vec.cpp
Go to the documentation of this file.
1//===- IR2Vec.cpp - Implementation of IR2Vec -----------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM
4// Exceptions. See the LICENSE file for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements the IR2Vec algorithm.
11///
12//===----------------------------------------------------------------------===//
13
15
17#include "llvm/ADT/Sequence.h"
19#include "llvm/ADT/Statistic.h"
20#include "llvm/IR/CFG.h"
21#include "llvm/IR/Module.h"
22#include "llvm/IR/PassManager.h"
23#include "llvm/Support/Debug.h"
24#include "llvm/Support/Errc.h"
25#include "llvm/Support/Error.h"
27#include "llvm/Support/Format.h"
29
30using namespace llvm;
31using namespace ir2vec;
32
33#define DEBUG_TYPE "ir2vec"
34
35STATISTIC(VocabMissCounter,
36 "Number of lookups to entities not present in the vocabulary");
37
38namespace llvm {
39namespace ir2vec {
41
42// FIXME: Use a default vocab when not specified
44 VocabFile("ir2vec-vocab-path", cl::Optional,
45 cl::desc("Path to the vocabulary file for IR2Vec"), cl::init(""),
47cl::opt<float> OpcWeight("ir2vec-opc-weight", cl::Optional, cl::init(1.0),
48 cl::desc("Weight for opcode embeddings"),
50cl::opt<float> TypeWeight("ir2vec-type-weight", cl::Optional, cl::init(0.5),
51 cl::desc("Weight for type embeddings"),
53cl::opt<float> ArgWeight("ir2vec-arg-weight", cl::Optional, cl::init(0.2),
54 cl::desc("Weight for argument embeddings"),
57 "ir2vec-kind", cl::Optional,
59 "Generate symbolic embeddings"),
61 "Generate flow-aware embeddings")),
62 cl::init(IR2VecKind::Symbolic), cl::desc("IR2Vec embedding kind"),
64
65} // namespace ir2vec
66} // namespace llvm
67
69
70// ==----------------------------------------------------------------------===//
71// Local helper functions
72//===----------------------------------------------------------------------===//
73namespace llvm::json {
74inline bool fromJSON(const llvm::json::Value &E, Embedding &Out,
76 std::vector<double> TempOut;
77 if (!llvm::json::fromJSON(E, TempOut, P))
78 return false;
79 Out = Embedding(std::move(TempOut));
80 return true;
81}
82} // namespace llvm::json
83
84// ==----------------------------------------------------------------------===//
85// Embedding
86//===----------------------------------------------------------------------===//
88 assert(this->size() == RHS.size() && "Vectors must have the same dimension");
89 std::transform(this->begin(), this->end(), RHS.begin(), this->begin(),
90 std::plus<double>());
91 return *this;
92}
93
95 Embedding Result(*this);
96 Result += RHS;
97 return Result;
98}
99
101 assert(this->size() == RHS.size() && "Vectors must have the same dimension");
102 std::transform(this->begin(), this->end(), RHS.begin(), this->begin(),
103 std::minus<double>());
104 return *this;
105}
106
108 Embedding Result(*this);
109 Result -= RHS;
110 return Result;
111}
112
114 std::transform(this->begin(), this->end(), this->begin(),
115 [Factor](double Elem) { return Elem * Factor; });
116 return *this;
117}
118
119Embedding Embedding::operator*(double Factor) const {
120 Embedding Result(*this);
121 Result *= Factor;
122 return Result;
123}
124
125Embedding &Embedding::scaleAndAdd(const Embedding &Src, float Factor) {
126 assert(this->size() == Src.size() && "Vectors must have the same dimension");
127 for (size_t Itr = 0; Itr < this->size(); ++Itr)
128 (*this)[Itr] += Src[Itr] * Factor;
129 return *this;
130}
131
133 double Tolerance) const {
134 assert(this->size() == RHS.size() && "Vectors must have the same dimension");
135 for (size_t Itr = 0; Itr < this->size(); ++Itr)
136 if (std::abs((*this)[Itr] - RHS[Itr]) > Tolerance) {
137 LLVM_DEBUG(errs() << "Embedding mismatch at index " << Itr << ": "
138 << (*this)[Itr] << " vs " << RHS[Itr]
139 << "; Tolerance: " << Tolerance << "\n");
140 return false;
141 }
142 return true;
143}
144
146 OS << " [";
147 for (const auto &Elem : Data)
148 OS << " " << format("%.2f", Elem) << " ";
149 OS << "]\n";
150}
151
152// ==----------------------------------------------------------------------===//
153// Embedder and its subclasses
154//===----------------------------------------------------------------------===//
155
160
161std::unique_ptr<Embedder> Embedder::create(IR2VecKind Mode, const Function &F,
162 const Vocabulary &Vocab) {
163 switch (Mode) {
165 return std::make_unique<SymbolicEmbedder>(F, Vocab);
167 return std::make_unique<FlowAwareEmbedder>(F, Vocab);
168 }
169 return nullptr;
170}
171
173 if (InstVecMap.empty())
175 return InstVecMap;
176}
177
179 if (BBVecMap.empty())
181 return BBVecMap;
182}
183
185 auto It = BBVecMap.find(&BB);
186 if (It != BBVecMap.end())
187 return It->second;
189 return BBVecMap[&BB];
190}
191
193 // Currently, we always (re)compute the embeddings for the function.
194 // This is cheaper than caching the vector.
196 return FuncVector;
197}
198
200 if (F.isDeclaration())
201 return;
202
203 // Consider only the basic blocks that are reachable from entry
204 for (const BasicBlock *BB : depth_first(&F)) {
206 FuncVector += BBVecMap[BB];
207 }
208}
209
211 Embedding BBVector(Dimension, 0);
212
213 // We consider only the non-debug and non-pseudo instructions
214 for (const auto &I : BB.instructionsWithoutDebug()) {
215 Embedding ArgEmb(Dimension, 0);
216 for (const auto &Op : I.operands())
217 ArgEmb += Vocab[*Op];
218 auto InstVector =
219 Vocab[I.getOpcode()] + Vocab[I.getType()->getTypeID()] + ArgEmb;
220 if (const auto *IC = dyn_cast<CmpInst>(&I))
221 InstVector += Vocab[IC->getPredicate()];
222 InstVecMap[&I] = InstVector;
223 BBVector += InstVector;
224 }
225 BBVecMap[&BB] = BBVector;
226}
227
229 Embedding BBVector(Dimension, 0);
230
231 // We consider only the non-debug and non-pseudo instructions
232 for (const auto &I : BB.instructionsWithoutDebug()) {
233 // TODO: Handle call instructions differently.
234 // For now, we treat them like other instructions
235 Embedding ArgEmb(Dimension, 0);
236 for (const auto &Op : I.operands()) {
237 // If the operand is defined elsewhere, we use its embedding
238 if (const auto *DefInst = dyn_cast<Instruction>(Op)) {
239 auto DefIt = InstVecMap.find(DefInst);
240 assert(DefIt != InstVecMap.end() &&
241 "Instruction should have been processed before its operands");
242 ArgEmb += DefIt->second;
243 continue;
244 }
245 // If the operand is not defined by an instruction, we use the vocabulary
246 else {
247 LLVM_DEBUG(errs() << "Using embedding from vocabulary for operand: "
248 << *Op << "=" << Vocab[*Op][0] << "\n");
249 ArgEmb += Vocab[*Op];
250 }
251 }
252 // Create the instruction vector by combining opcode, type, and arguments
253 // embeddings
254 auto InstVector =
255 Vocab[I.getOpcode()] + Vocab[I.getType()->getTypeID()] + ArgEmb;
256 // Add compare predicate embedding as an additional operand if applicable
257 if (const auto *IC = dyn_cast<CmpInst>(&I))
258 InstVector += Vocab[IC->getPredicate()];
259 InstVecMap[&I] = InstVector;
260 BBVector += InstVector;
261 }
262 BBVecMap[&BB] = BBVector;
263}
264
265// ==----------------------------------------------------------------------===//
266// VocabStorage
267//===----------------------------------------------------------------------===//
268
269VocabStorage::VocabStorage(std::vector<std::vector<Embedding>> &&SectionData)
270 : Sections(std::move(SectionData)), TotalSize([&] {
271 assert(!Sections.empty() && "Vocabulary has no sections");
272 // Compute total size across all sections
273 size_t Size = 0;
274 for (const auto &Section : Sections) {
275 assert(!Section.empty() && "Vocabulary section is empty");
276 Size += Section.size();
277 }
278 return Size;
279 }()),
280 Dimension([&] {
281 // Get dimension from the first embedding in the first section - all
282 // embeddings must have the same dimension
283 assert(!Sections.empty() && "Vocabulary has no sections");
284 assert(!Sections[0].empty() && "First section of vocabulary is empty");
285 unsigned ExpectedDim = static_cast<unsigned>(Sections[0][0].size());
286
287 // Verify that all embeddings across all sections have the same
288 // dimension
289 [[maybe_unused]] auto allSameDim =
290 [ExpectedDim](const std::vector<Embedding> &Section) {
291 return std::all_of(Section.begin(), Section.end(),
292 [ExpectedDim](const Embedding &Emb) {
293 return Emb.size() == ExpectedDim;
294 });
295 };
296 assert(std::all_of(Sections.begin(), Sections.end(), allSameDim) &&
297 "All embeddings must have the same dimension");
298
299 return ExpectedDim;
300 }()) {}
301
303 assert(SectionId < Storage->Sections.size() && "Invalid section ID");
304 assert(LocalIndex < Storage->Sections[SectionId].size() &&
305 "Local index out of range");
306 return Storage->Sections[SectionId][LocalIndex];
307}
308
310 ++LocalIndex;
311 // Check if we need to move to the next section
312 if (SectionId < Storage->getNumSections() &&
313 LocalIndex >= Storage->Sections[SectionId].size()) {
314 assert(LocalIndex == Storage->Sections[SectionId].size() &&
315 "Local index should be at the end of the current section");
316 LocalIndex = 0;
317 ++SectionId;
318 }
319 return *this;
320}
321
323 const const_iterator &Other) const {
324 return Storage == Other.Storage && SectionId == Other.SectionId &&
325 LocalIndex == Other.LocalIndex;
326}
327
329 const const_iterator &Other) const {
330 return !(*this == Other);
331}
332
333// ==----------------------------------------------------------------------===//
334// Vocabulary
335//===----------------------------------------------------------------------===//
336
338 assert(Opcode >= 1 && Opcode <= MaxOpcodes && "Invalid opcode");
339#define HANDLE_INST(NUM, OPCODE, CLASS) \
340 if (Opcode == NUM) { \
341 return #OPCODE; \
342 }
343#include "llvm/IR/Instruction.def"
344#undef HANDLE_INST
345 return "UnknownOpcode";
346}
347
348// Helper function to classify an operand into OperandKind
358
359unsigned Vocabulary::getPredicateLocalIndex(CmpInst::Predicate P) {
362 else
365}
366
367CmpInst::Predicate Vocabulary::getPredicateFromLocalIndex(unsigned LocalIndex) {
368 unsigned fcmpRange =
370 if (LocalIndex < fcmpRange)
372 LocalIndex);
373 else
375 LocalIndex - fcmpRange);
376}
377
379 static SmallString<16> PredNameBuffer;
381 PredNameBuffer = "FCMP_";
382 else
383 PredNameBuffer = "ICMP_";
384 PredNameBuffer += CmpInst::getPredicateName(Pred);
385 return PredNameBuffer;
386}
387
389 assert(Pos < NumCanonicalEntries && "Position out of bounds in vocabulary");
390 // Opcode
391 if (Pos < MaxOpcodes)
392 return getVocabKeyForOpcode(Pos + 1);
393 // Type
394 if (Pos < OperandBaseOffset)
395 return getVocabKeyForCanonicalTypeID(
396 static_cast<CanonicalTypeID>(Pos - MaxOpcodes));
397 // Operand
398 if (Pos < PredicateBaseOffset)
400 static_cast<OperandKind>(Pos - OperandBaseOffset));
401 // Predicates
402 return getVocabKeyForPredicate(getPredicate(Pos - PredicateBaseOffset));
403}
404
405// For now, assume vocabulary is stable unless explicitly invalidated.
407 ModuleAnalysisManager::Invalidator &Inv) const {
408 auto PAC = PA.getChecker<IR2VecVocabAnalysis>();
409 return !(PAC.preservedWhenStateless());
410}
411
413 float DummyVal = 0.1f;
414
415 // Create sections for opcodes, types, operands, and predicates
416 // Order must match Vocabulary::Section enum
417 std::vector<std::vector<Embedding>> Sections;
418 Sections.reserve(4);
419
420 // Opcodes section
421 std::vector<Embedding> OpcodeSec;
422 OpcodeSec.reserve(MaxOpcodes);
423 for (unsigned I = 0; I < MaxOpcodes; ++I) {
424 OpcodeSec.emplace_back(Dim, DummyVal);
425 DummyVal += 0.1f;
426 }
427 Sections.push_back(std::move(OpcodeSec));
428
429 // Types section
430 std::vector<Embedding> TypeSec;
431 TypeSec.reserve(MaxCanonicalTypeIDs);
432 for (unsigned I = 0; I < MaxCanonicalTypeIDs; ++I) {
433 TypeSec.emplace_back(Dim, DummyVal);
434 DummyVal += 0.1f;
435 }
436 Sections.push_back(std::move(TypeSec));
437
438 // Operands section
439 std::vector<Embedding> OperandSec;
440 OperandSec.reserve(MaxOperandKinds);
441 for (unsigned I = 0; I < MaxOperandKinds; ++I) {
442 OperandSec.emplace_back(Dim, DummyVal);
443 DummyVal += 0.1f;
444 }
445 Sections.push_back(std::move(OperandSec));
446
447 // Predicates section
448 std::vector<Embedding> PredicateSec;
449 PredicateSec.reserve(MaxPredicateKinds);
450 for (unsigned I = 0; I < MaxPredicateKinds; ++I) {
451 PredicateSec.emplace_back(Dim, DummyVal);
452 DummyVal += 0.1f;
453 }
454 Sections.push_back(std::move(PredicateSec));
455
456 return VocabStorage(std::move(Sections));
457}
458
459// ==----------------------------------------------------------------------===//
460// IR2VecVocabAnalysis
461//===----------------------------------------------------------------------===//
462
463Error IR2VecVocabAnalysis::parseVocabSection(
464 StringRef Key, const json::Value &ParsedVocabValue, VocabMap &TargetVocab,
465 unsigned &Dim) {
466 json::Path::Root Path("");
467 const json::Object *RootObj = ParsedVocabValue.getAsObject();
468 if (!RootObj)
470 "JSON root is not an object");
471
472 const json::Value *SectionValue = RootObj->get(Key);
473 if (!SectionValue)
475 "Missing '" + std::string(Key) +
476 "' section in vocabulary file");
477 if (!json::fromJSON(*SectionValue, TargetVocab, Path))
479 "Unable to parse '" + std::string(Key) +
480 "' section from vocabulary");
481
482 Dim = TargetVocab.begin()->second.size();
483 if (Dim == 0)
485 "Dimension of '" + std::string(Key) +
486 "' section of the vocabulary is zero");
487
488 if (!std::all_of(TargetVocab.begin(), TargetVocab.end(),
489 [Dim](const std::pair<StringRef, Embedding> &Entry) {
490 return Entry.second.size() == Dim;
491 }))
492 return createStringError(
494 "All vectors in the '" + std::string(Key) +
495 "' section of the vocabulary are not of the same dimension");
496
497 return Error::success();
498}
499
500// FIXME: Make this optional. We can avoid file reads
501// by auto-generating a default vocabulary during the build time.
502Error IR2VecVocabAnalysis::readVocabulary(VocabMap &OpcVocab,
503 VocabMap &TypeVocab,
504 VocabMap &ArgVocab) {
505 auto BufOrError = MemoryBuffer::getFileOrSTDIN(VocabFile, /*IsText=*/true);
506 if (!BufOrError)
507 return createFileError(VocabFile, BufOrError.getError());
508
509 auto Content = BufOrError.get()->getBuffer();
510
511 Expected<json::Value> ParsedVocabValue = json::parse(Content);
512 if (!ParsedVocabValue)
513 return ParsedVocabValue.takeError();
514
515 unsigned OpcodeDim = 0, TypeDim = 0, ArgDim = 0;
516 if (auto Err =
517 parseVocabSection("Opcodes", *ParsedVocabValue, OpcVocab, OpcodeDim))
518 return Err;
519
520 if (auto Err =
521 parseVocabSection("Types", *ParsedVocabValue, TypeVocab, TypeDim))
522 return Err;
523
524 if (auto Err =
525 parseVocabSection("Arguments", *ParsedVocabValue, ArgVocab, ArgDim))
526 return Err;
527
528 if (!(OpcodeDim == TypeDim && TypeDim == ArgDim))
530 "Vocabulary sections have different dimensions");
531
532 return Error::success();
533}
534
535void IR2VecVocabAnalysis::generateVocabStorage(VocabMap &OpcVocab,
536 VocabMap &TypeVocab,
537 VocabMap &ArgVocab) {
538
539 // Helper for handling missing entities in the vocabulary.
540 // Currently, we use a zero vector. In the future, we will throw an error to
541 // ensure that *all* known entities are present in the vocabulary.
542 auto handleMissingEntity = [](const std::string &Val) {
543 LLVM_DEBUG(errs() << Val
544 << " is not in vocabulary, using zero vector; This "
545 "would result in an error in future.\n");
546 ++VocabMissCounter;
547 };
548
549 unsigned Dim = OpcVocab.begin()->second.size();
550 assert(Dim > 0 && "Vocabulary dimension must be greater than zero");
551
552 // Handle Opcodes
553 std::vector<Embedding> NumericOpcodeEmbeddings(Vocabulary::MaxOpcodes,
554 Embedding(Dim));
555 for (unsigned Opcode : seq(0u, Vocabulary::MaxOpcodes)) {
556 StringRef VocabKey = Vocabulary::getVocabKeyForOpcode(Opcode + 1);
557 auto It = OpcVocab.find(VocabKey.str());
558 if (It != OpcVocab.end())
559 NumericOpcodeEmbeddings[Opcode] = It->second;
560 else
561 handleMissingEntity(VocabKey.str());
562 }
563
564 // Handle Types - only canonical types are present in vocabulary
565 std::vector<Embedding> NumericTypeEmbeddings(Vocabulary::MaxCanonicalTypeIDs,
566 Embedding(Dim));
567 for (unsigned CTypeID : seq(0u, Vocabulary::MaxCanonicalTypeIDs)) {
568 StringRef VocabKey = Vocabulary::getVocabKeyForCanonicalTypeID(
569 static_cast<Vocabulary::CanonicalTypeID>(CTypeID));
570 if (auto It = TypeVocab.find(VocabKey.str()); It != TypeVocab.end()) {
571 NumericTypeEmbeddings[CTypeID] = It->second;
572 continue;
573 }
574 handleMissingEntity(VocabKey.str());
575 }
576
577 // Handle Arguments/Operands
578 std::vector<Embedding> NumericArgEmbeddings(Vocabulary::MaxOperandKinds,
579 Embedding(Dim));
580 for (unsigned OpKind : seq(0u, Vocabulary::MaxOperandKinds)) {
582 StringRef VocabKey = Vocabulary::getVocabKeyForOperandKind(Kind);
583 auto It = ArgVocab.find(VocabKey.str());
584 if (It != ArgVocab.end()) {
585 NumericArgEmbeddings[OpKind] = It->second;
586 continue;
587 }
588 handleMissingEntity(VocabKey.str());
589 }
590
591 // Handle Predicates: part of Operands section. We look up predicate keys
592 // in ArgVocab.
593 std::vector<Embedding> NumericPredEmbeddings(Vocabulary::MaxPredicateKinds,
594 Embedding(Dim, 0));
595 for (unsigned PK : seq(0u, Vocabulary::MaxPredicateKinds)) {
596 StringRef VocabKey =
597 Vocabulary::getVocabKeyForPredicate(Vocabulary::getPredicate(PK));
598 auto It = ArgVocab.find(VocabKey.str());
599 if (It != ArgVocab.end()) {
600 NumericPredEmbeddings[PK] = It->second;
601 continue;
602 }
603 handleMissingEntity(VocabKey.str());
604 }
605
606 // Create section-based storage instead of flat vocabulary
607 // Order must match Vocabulary::Section enum
608 std::vector<std::vector<Embedding>> Sections(4);
609 Sections[static_cast<unsigned>(Vocabulary::Section::Opcodes)] =
610 std::move(NumericOpcodeEmbeddings); // Section::Opcodes
611 Sections[static_cast<unsigned>(Vocabulary::Section::CanonicalTypes)] =
612 std::move(NumericTypeEmbeddings); // Section::CanonicalTypes
613 Sections[static_cast<unsigned>(Vocabulary::Section::Operands)] =
614 std::move(NumericArgEmbeddings); // Section::Operands
615 Sections[static_cast<unsigned>(Vocabulary::Section::Predicates)] =
616 std::move(NumericPredEmbeddings); // Section::Predicates
617
618 // Create VocabStorage from organized sections
619 Vocab.emplace(std::move(Sections));
620}
621
622void IR2VecVocabAnalysis::emitError(Error Err, LLVMContext &Ctx) {
623 handleAllErrors(std::move(Err), [&](const ErrorInfoBase &EI) {
624 Ctx.emitError("Error reading vocabulary: " + EI.message());
625 });
626}
627
630 auto Ctx = &M.getContext();
631 // If vocabulary is already populated by the constructor, use it.
632 if (Vocab.has_value())
633 return Vocabulary(std::move(Vocab.value()));
634
635 // Otherwise, try to read from the vocabulary file.
636 if (VocabFile.empty()) {
637 // FIXME: Use default vocabulary
638 Ctx->emitError("IR2Vec vocabulary file path not specified; You may need to "
639 "set it using --ir2vec-vocab-path");
640 return Vocabulary(); // Return invalid result
641 }
642
643 VocabMap OpcVocab, TypeVocab, ArgVocab;
644 if (auto Err = readVocabulary(OpcVocab, TypeVocab, ArgVocab)) {
645 emitError(std::move(Err), *Ctx);
646 return Vocabulary();
647 }
648
649 // Scale the vocabulary sections based on the provided weights
650 auto scaleVocabSection = [](VocabMap &Vocab, double Weight) {
651 for (auto &Entry : Vocab)
652 Entry.second *= Weight;
653 };
654 scaleVocabSection(OpcVocab, OpcWeight);
655 scaleVocabSection(TypeVocab, TypeWeight);
656 scaleVocabSection(ArgVocab, ArgWeight);
657
658 // Generate the numeric lookup vocabulary
659 generateVocabStorage(OpcVocab, TypeVocab, ArgVocab);
660
661 return Vocabulary(std::move(Vocab.value()));
662}
663
664// ==----------------------------------------------------------------------===//
665// Printer Passes
666//===----------------------------------------------------------------------===//
667
670 auto &Vocabulary = MAM.getResult<IR2VecVocabAnalysis>(M);
671 assert(Vocabulary.isValid() && "IR2Vec Vocabulary is invalid");
672
673 for (Function &F : M) {
675 if (!Emb) {
676 OS << "Error creating IR2Vec embeddings \n";
677 continue;
678 }
679
680 OS << "IR2Vec embeddings for function " << F.getName() << ":\n";
681 OS << "Function vector: ";
682 Emb->getFunctionVector().print(OS);
683
684 OS << "Basic block vectors:\n";
685 const auto &BBMap = Emb->getBBVecMap();
686 for (const BasicBlock &BB : F) {
687 auto It = BBMap.find(&BB);
688 if (It != BBMap.end()) {
689 OS << "Basic block: " << BB.getName() << ":\n";
690 It->second.print(OS);
691 }
692 }
693
694 OS << "Instruction vectors:\n";
695 const auto &InstMap = Emb->getInstVecMap();
696 for (const BasicBlock &BB : F) {
697 for (const Instruction &I : BB) {
698 auto It = InstMap.find(&I);
699 if (It != InstMap.end()) {
700 OS << "Instruction: ";
701 I.print(OS);
702 It->second.print(OS);
703 }
704 }
705 }
706 }
707 return PreservedAnalyses::all();
708}
709
712 auto &IR2VecVocabulary = MAM.getResult<IR2VecVocabAnalysis>(M);
713 assert(IR2VecVocabulary.isValid() && "IR2Vec Vocabulary is invalid");
714
715 // Print each entry
716 unsigned Pos = 0;
717 for (const auto &Entry : IR2VecVocabulary) {
718 OS << "Key: " << IR2VecVocabulary.getStringKey(Pos++) << ": ";
719 Entry.print(OS);
720 }
721 return PreservedAnalyses::all();
722}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
This file defines the IR2Vec vocabulary analysis(IR2VecVocabAnalysis), the core ir2vec::Embedder inte...
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
#define P(N)
ModuleAnalysisManager MAM
Provides some synthesis utilities to produce sequences of values.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
LLVM Basic Block Representation.
Definition BasicBlock.h:62
LLVM_ABI iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:678
static LLVM_ABI StringRef getPredicateName(Predicate P)
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:167
iterator end()
Definition DenseMap.h:81
virtual std::string message() const
Return the error message as a string.
Definition Error.h:52
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
static ErrorSuccess success()
Create a success value.
Definition Error.h:336
Tagged union holding either a T or a Error.
Definition Error.h:485
Error takeError()
Take ownership of the stored error.
Definition Error.h:612
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
Definition IR2Vec.cpp:668
This analysis provides the vocabulary for IR2Vec.
Definition IR2Vec.h:597
ir2vec::Vocabulary Result
Definition IR2Vec.h:614
LLVM_ABI Result run(Module &M, ModuleAnalysisManager &MAM)
Definition IR2Vec.cpp:629
static LLVM_ABI AnalysisKey Key
Definition IR2Vec.h:610
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM)
Definition IR2Vec.cpp:710
LLVM_ABI void emitError(const Instruction *I, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFileOrSTDIN(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, or open stdin if the Filename is "-".
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalysisChecker getChecker() const
Build a checker for this PreservedAnalyses and the specified analysis type.
Definition Analysis.h:275
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::string str() const
str - Get the contents as an std::string.
Definition StringRef.h:225
LLVM Value Representation.
Definition Value.h:75
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
LLVM_ABI const Embedding & getBBVector(const BasicBlock &BB) const
Returns the embedding for a given basic block in the function F if it has been computed.
Definition IR2Vec.cpp:184
static LLVM_ABI std::unique_ptr< Embedder > create(IR2VecKind Mode, const Function &F, const Vocabulary &Vocab)
Factory method to create an Embedder object.
Definition IR2Vec.cpp:161
BBEmbeddingsMap BBVecMap
Definition IR2Vec.h:529
LLVM_ABI const BBEmbeddingsMap & getBBVecMap() const
Returns a map containing basic block and the corresponding embeddings for the function F if it has be...
Definition IR2Vec.cpp:178
const Vocabulary & Vocab
Definition IR2Vec.h:517
void computeEmbeddings() const
Function to compute embeddings.
Definition IR2Vec.cpp:199
const float TypeWeight
Definition IR2Vec.h:524
LLVM_ABI const InstEmbeddingsMap & getInstVecMap() const
Returns a map containing instructions and the corresponding embeddings for the function F if it has b...
Definition IR2Vec.cpp:172
const float OpcWeight
Weights for different entities (like opcode, arguments, types) in the IR instructions to generate the...
Definition IR2Vec.h:524
const unsigned Dimension
Dimension of the vector representation; captured from the input vocabulary.
Definition IR2Vec.h:520
LLVM_ABI Embedder(const Function &F, const Vocabulary &Vocab)
Definition IR2Vec.cpp:156
const float ArgWeight
Definition IR2Vec.h:524
Embedding FuncVector
Definition IR2Vec.h:528
LLVM_ABI const Embedding & getFunctionVector() const
Computes and returns the embedding for the current function.
Definition IR2Vec.cpp:192
InstEmbeddingsMap InstVecMap
Definition IR2Vec.h:530
const Function & F
Definition IR2Vec.h:516
Iterator support for section-based access.
Definition IR2Vec.h:193
const_iterator(const VocabStorage *Storage, unsigned SectionId, size_t LocalIndex)
Definition IR2Vec.h:199
LLVM_ABI bool operator!=(const const_iterator &Other) const
Definition IR2Vec.cpp:328
LLVM_ABI const_iterator & operator++()
Definition IR2Vec.cpp:309
LLVM_ABI const Embedding & operator*() const
Definition IR2Vec.cpp:302
LLVM_ABI bool operator==(const const_iterator &Other) const
Definition IR2Vec.cpp:322
Generic storage class for section-based vocabularies.
Definition IR2Vec.h:151
unsigned getNumSections() const
Get number of sections.
Definition IR2Vec.h:176
VocabStorage()
Default constructor creates empty storage (invalid state)
Definition IR2Vec.h:161
size_t size() const
Get total number of entries across all sections.
Definition IR2Vec.h:173
Class for storing and accessing the IR2Vec vocabulary.
Definition IR2Vec.h:232
static LLVM_ABI StringRef getVocabKeyForOperandKind(OperandKind Kind)
Function to get vocabulary key for a given OperandKind.
Definition IR2Vec.h:342
LLVM_ABI bool invalidate(Module &M, const PreservedAnalyses &PA, ModuleAnalysisManager::Invalidator &Inv) const
Definition IR2Vec.cpp:406
static LLVM_ABI OperandKind getOperandKind(const Value *Op)
Function to classify an operand into OperandKind.
Definition IR2Vec.cpp:349
friend class llvm::IR2VecVocabAnalysis
Definition IR2Vec.h:233
static LLVM_ABI StringRef getStringKey(unsigned Pos)
Returns the string key for a given index position in the vocabulary.
Definition IR2Vec.cpp:388
static constexpr unsigned MaxCanonicalTypeIDs
Definition IR2Vec.h:302
static constexpr unsigned MaxOperandKinds
Definition IR2Vec.h:304
OperandKind
Operand kinds supported by IR2Vec Vocabulary.
Definition IR2Vec.h:288
static LLVM_ABI StringRef getVocabKeyForPredicate(CmpInst::Predicate P)
Function to get vocabulary key for a given predicate.
Definition IR2Vec.cpp:378
static LLVM_ABI StringRef getVocabKeyForOpcode(unsigned Opcode)
Function to get vocabulary key for a given Opcode.
Definition IR2Vec.cpp:337
LLVM_ABI bool isValid() const
Definition IR2Vec.h:320
static LLVM_ABI VocabStorage createDummyVocabForTest(unsigned Dim=1)
Create a dummy vocabulary for testing purposes.
Definition IR2Vec.cpp:412
static constexpr unsigned MaxPredicateKinds
Definition IR2Vec.h:308
CanonicalTypeID
Canonical type IDs supported by IR2Vec Vocabulary.
Definition IR2Vec.h:271
An Object is a JSON object, which maps strings to heterogenous JSON values.
Definition JSON.h:98
LLVM_ABI Value * get(StringRef K)
Definition JSON.cpp:30
The root is the trivial Path to the root value.
Definition JSON.h:713
A "cursor" marking a position within a Value.
Definition JSON.h:666
A Value is an JSON value of unknown type.
Definition JSON.h:290
const json::Object * getAsObject() const
Definition JSON.h:464
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
DenseMap< const Instruction *, Embedding > InstEmbeddingsMap
Definition IR2Vec.h:145
static cl::opt< std::string > VocabFile("ir2vec-vocab-path", cl::Optional, cl::desc("Path to the vocabulary file for IR2Vec"), cl::init(""), cl::cat(IR2VecCategory))
LLVM_ABI cl::opt< float > ArgWeight
DenseMap< const BasicBlock *, Embedding > BBEmbeddingsMap
Definition IR2Vec.h:146
LLVM_ABI cl::opt< float > OpcWeight
LLVM_ABI cl::opt< float > TypeWeight
LLVM_ABI cl::opt< IR2VecKind > IR2VecEmbeddingKind
llvm::cl::OptionCategory IR2VecCategory
LLVM_ABI llvm::Expected< Value > parse(llvm::StringRef JSON)
Parses the provided JSON source, or returns a ParseError.
Definition JSON.cpp:675
bool fromJSON(const Value &E, std::string &Out, Path P)
Definition JSON.h:742
This is an optimization pass for GlobalISel generic memory operations.
Error createFileError(const Twine &F, Error E)
Concatenate a source file path and/or name with an Error.
Definition Error.h:1399
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:644
void handleAllErrors(Error E, HandlerTs &&... Handlers)
Behaves the same as handleErrors, except that by contract all errors must be handled by the given han...
Definition Error.h:990
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
Definition Error.h:1305
@ illegal_byte_sequence
Definition Errc.h:52
@ invalid_argument
Definition Errc.h:56
IR2VecKind
IR2Vec computes two kinds of embeddings: Symbolic and Flow-aware.
Definition IR2Vec.h:71
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition Format.h:118
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
@ Other
Any other memory.
Definition ModRef.h:68
DWARFExpression::Operation Op
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1847
iterator_range< df_iterator< T > > depth_first(const T &G)
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
Definition MIRParser.h:39
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:870
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition Analysis.h:29
Embedding is a datatype that wraps std::vector<double>.
Definition IR2Vec.h:87
LLVM_ABI bool approximatelyEquals(const Embedding &RHS, double Tolerance=1e-4) const
Returns true if the embedding is approximately equal to the RHS embedding within the specified tolera...
Definition IR2Vec.cpp:132
LLVM_ABI Embedding & operator+=(const Embedding &RHS)
Arithmetic operators.
Definition IR2Vec.cpp:87
LLVM_ABI Embedding operator-(const Embedding &RHS) const
Definition IR2Vec.cpp:107
LLVM_ABI Embedding & operator-=(const Embedding &RHS)
Definition IR2Vec.cpp:100
LLVM_ABI Embedding operator*(double Factor) const
Definition IR2Vec.cpp:119
size_t size() const
Definition IR2Vec.h:100
LLVM_ABI Embedding & operator*=(double Factor)
Definition IR2Vec.cpp:113
LLVM_ABI Embedding operator+(const Embedding &RHS) const
Definition IR2Vec.cpp:94
LLVM_ABI Embedding & scaleAndAdd(const Embedding &Src, float Factor)
Adds Src Embedding scaled by Factor with the called Embedding.
Definition IR2Vec.cpp:125
LLVM_ABI void print(raw_ostream &OS) const
Definition IR2Vec.cpp:145