Thanks to visit codestin.com
Credit goes to llvm.org

LLVM 22.0.0git
MemoryDependenceAnalysis.cpp
Go to the documentation of this file.
1//===- MemoryDependenceAnalysis.cpp - Mem Deps Implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements an analysis that determines, for a given memory
10// operation, what preceding memory operations it depends on. It builds on
11// alias analysis information, and tries to provide a lazy, caching interface to
12// a common kind of alias information query.
13//
14//===----------------------------------------------------------------------===//
15
17#include "llvm/ADT/DenseMap.h"
18#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/Statistic.h"
29#include "llvm/IR/BasicBlock.h"
30#include "llvm/IR/Dominators.h"
31#include "llvm/IR/Function.h"
32#include "llvm/IR/InstrTypes.h"
33#include "llvm/IR/Instruction.h"
36#include "llvm/IR/LLVMContext.h"
37#include "llvm/IR/Metadata.h"
38#include "llvm/IR/Module.h"
40#include "llvm/IR/Type.h"
41#include "llvm/IR/Use.h"
42#include "llvm/IR/Value.h"
44#include "llvm/Pass.h"
49#include "llvm/Support/Debug.h"
50#include <algorithm>
51#include <cassert>
52#include <iterator>
53#include <utility>
54
55using namespace llvm;
56
57#define DEBUG_TYPE "memdep"
58
59STATISTIC(NumCacheNonLocal, "Number of fully cached non-local responses");
60STATISTIC(NumCacheDirtyNonLocal, "Number of dirty cached non-local responses");
61STATISTIC(NumUncacheNonLocal, "Number of uncached non-local responses");
62
63STATISTIC(NumCacheNonLocalPtr,
64 "Number of fully cached non-local ptr responses");
65STATISTIC(NumCacheDirtyNonLocalPtr,
66 "Number of cached, but dirty, non-local ptr responses");
67STATISTIC(NumUncacheNonLocalPtr, "Number of uncached non-local ptr responses");
68STATISTIC(NumCacheCompleteNonLocalPtr,
69 "Number of block queries that were completely cached");
70
71// Limit for the number of instructions to scan in a block.
72
74 "memdep-block-scan-limit", cl::Hidden, cl::init(100),
75 cl::desc("The number of instructions to scan in a block in memory "
76 "dependency analysis (default = 100)"));
77
79 BlockNumberLimit("memdep-block-number-limit", cl::Hidden, cl::init(200),
80 cl::desc("The number of blocks to scan during memory "
81 "dependency analysis (default = 200)"));
82
84 "memdep-cache-global-limit", cl::Hidden, cl::init(10000),
85 cl::desc("The max number of entries allowed in a cache (default = 10000)"));
86
87// Limit on the number of memdep results to process.
88static const unsigned int NumResultsLimit = 100;
89
90/// This is a helper function that removes Val from 'Inst's set in ReverseMap.
91///
92/// If the set becomes empty, remove Inst's entry.
93template <typename KeyTy>
94static void
96 Instruction *Inst, KeyTy Val) {
98 ReverseMap.find(Inst);
99 assert(InstIt != ReverseMap.end() && "Reverse map out of sync?");
100 bool Found = InstIt->second.erase(Val);
101 assert(Found && "Invalid reverse map!");
102 (void)Found;
103 if (InstIt->second.empty())
104 ReverseMap.erase(InstIt);
105}
106
107/// If the given instruction references a specific memory location, fill in Loc
108/// with the details, otherwise set Loc.Ptr to null.
109///
110/// Returns a ModRefInfo value describing the general behavior of the
111/// instruction.
113 const TargetLibraryInfo &TLI) {
114 if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
115 if (LI->isUnordered()) {
117 return ModRefInfo::Ref;
118 }
119 if (LI->getOrdering() == AtomicOrdering::Monotonic) {
121 return ModRefInfo::ModRef;
122 }
124 return ModRefInfo::ModRef;
125 }
126
127 if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
128 if (SI->isUnordered()) {
130 return ModRefInfo::Mod;
131 }
132 if (SI->getOrdering() == AtomicOrdering::Monotonic) {
134 return ModRefInfo::ModRef;
135 }
137 return ModRefInfo::ModRef;
138 }
139
140 if (const VAArgInst *V = dyn_cast<VAArgInst>(Inst)) {
142 return ModRefInfo::ModRef;
143 }
144
145 if (const CallBase *CB = dyn_cast<CallBase>(Inst)) {
146 if (Value *FreedOp = getFreedOperand(CB, &TLI)) {
147 // calls to free() deallocate the entire structure
148 Loc = MemoryLocation::getAfter(FreedOp);
149 return ModRefInfo::Mod;
150 }
151 }
152
153 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
154 switch (II->getIntrinsicID()) {
155 case Intrinsic::lifetime_start:
156 case Intrinsic::lifetime_end:
158 // These intrinsics don't really modify the memory, but returning Mod
159 // will allow them to be handled conservatively.
160 return ModRefInfo::Mod;
161 case Intrinsic::invariant_start:
163 // These intrinsics don't really modify the memory, but returning Mod
164 // will allow them to be handled conservatively.
165 return ModRefInfo::Mod;
166 case Intrinsic::invariant_end:
168 // These intrinsics don't really modify the memory, but returning Mod
169 // will allow them to be handled conservatively.
170 return ModRefInfo::Mod;
171 case Intrinsic::masked_load:
173 return ModRefInfo::Ref;
174 case Intrinsic::masked_store:
176 return ModRefInfo::Mod;
177 default:
178 break;
179 }
180 }
181
182 // Otherwise, just do the coarse-grained thing that always works.
183 if (Inst->mayWriteToMemory())
184 return ModRefInfo::ModRef;
185 if (Inst->mayReadFromMemory())
186 return ModRefInfo::Ref;
188}
189
190/// Private helper for finding the local dependencies of a call site.
191MemDepResult MemoryDependenceResults::getCallDependencyFrom(
192 CallBase *Call, bool isReadOnlyCall, BasicBlock::iterator ScanIt,
193 BasicBlock *BB) {
194 unsigned Limit = getDefaultBlockScanLimit();
195
196 // Walk backwards through the block, looking for dependencies.
197 while (ScanIt != BB->begin()) {
198 Instruction *Inst = &*--ScanIt;
199
200 // Limit the amount of scanning we do so we don't end up with quadratic
201 // running time on extreme testcases.
202 --Limit;
203 if (!Limit)
205
206 // If this inst is a memory op, get the pointer it accessed
207 MemoryLocation Loc;
208 ModRefInfo MR = GetLocation(Inst, Loc, TLI);
209 if (Loc.Ptr) {
210 // A simple instruction.
211 if (isModOrRefSet(AA.getModRefInfo(Call, Loc)))
212 return MemDepResult::getClobber(Inst);
213 continue;
214 }
215
216 if (auto *CallB = dyn_cast<CallBase>(Inst)) {
217 // If these two calls do not interfere, look past it.
218 if (isNoModRef(AA.getModRefInfo(Call, CallB))) {
219 // If the two calls are the same, return Inst as a Def, so that
220 // Call can be found redundant and eliminated.
221 if (isReadOnlyCall && !isModSet(MR) &&
223 return MemDepResult::getDef(Inst);
224
225 // Otherwise if the two calls don't interact (e.g. CallB is readnone)
226 // keep scanning.
227 continue;
228 } else
229 return MemDepResult::getClobber(Inst);
230 }
231
232 // If we could not obtain a pointer for the instruction and the instruction
233 // touches memory then assume that this is a dependency.
234 if (isModOrRefSet(MR))
235 return MemDepResult::getClobber(Inst);
236 }
237
238 // No dependence found. If this is the entry block of the function, it is
239 // unknown, otherwise it is non-local.
240 if (BB != &BB->getParent()->getEntryBlock())
243}
244
246 const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
247 BasicBlock *BB, Instruction *QueryInst, unsigned *Limit,
248 BatchAAResults &BatchAA) {
249 MemDepResult InvariantGroupDependency = MemDepResult::getUnknown();
250 if (QueryInst != nullptr) {
251 if (auto *LI = dyn_cast<LoadInst>(QueryInst)) {
252 InvariantGroupDependency = getInvariantGroupPointerDependency(LI, BB);
253
254 if (InvariantGroupDependency.isDef())
255 return InvariantGroupDependency;
256 }
257 }
259 MemLoc, isLoad, ScanIt, BB, QueryInst, Limit, BatchAA);
260 if (SimpleDep.isDef())
261 return SimpleDep;
262 // Non-local invariant group dependency indicates there is non local Def
263 // (it only returns nonLocal if it finds nonLocal def), which is better than
264 // local clobber and everything else.
265 if (InvariantGroupDependency.isNonLocal())
266 return InvariantGroupDependency;
267
268 assert(InvariantGroupDependency.isUnknown() &&
269 "InvariantGroupDependency should be only unknown at this point");
270 return SimpleDep;
271}
272
274 const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
275 BasicBlock *BB, Instruction *QueryInst, unsigned *Limit) {
276 BatchAAResults BatchAA(AA, &EEA);
277 return getPointerDependencyFrom(MemLoc, isLoad, ScanIt, BB, QueryInst, Limit,
278 BatchAA);
279}
280
283 BasicBlock *BB) {
284
285 if (!LI->hasMetadata(LLVMContext::MD_invariant_group))
287
288 // Take the ptr operand after all casts and geps 0. This way we can search
289 // cast graph down only.
290 Value *LoadOperand = LI->getPointerOperand()->stripPointerCasts();
291
292 // It's is not safe to walk the use list of global value, because function
293 // passes aren't allowed to look outside their functions.
294 // FIXME: this could be fixed by filtering instructions from outside
295 // of current function.
296 if (isa<GlobalValue>(LoadOperand))
298
299 Instruction *ClosestDependency = nullptr;
300 // Order of instructions in uses list is unpredictible. In order to always
301 // get the same result, we will look for the closest dominance.
302 auto GetClosestDependency = [this](Instruction *Best, Instruction *Other) {
303 assert(Other && "Must call it with not null instruction");
304 if (Best == nullptr || DT.dominates(Best, Other))
305 return Other;
306 return Best;
307 };
308
309 for (const Use &Us : LoadOperand->uses()) {
310 auto *U = dyn_cast<Instruction>(Us.getUser());
311 if (!U || U == LI || !DT.dominates(U, LI))
312 continue;
313
314 // If we hit load/store with the same invariant.group metadata (and the
315 // same pointer operand) we can assume that value pointed by pointer
316 // operand didn't change.
317 if ((isa<LoadInst>(U) ||
318 (isa<StoreInst>(U) &&
319 cast<StoreInst>(U)->getPointerOperand() == LoadOperand)) &&
320 U->hasMetadata(LLVMContext::MD_invariant_group))
321 ClosestDependency = GetClosestDependency(ClosestDependency, U);
322 }
323
324 if (!ClosestDependency)
326 if (ClosestDependency->getParent() == BB)
327 return MemDepResult::getDef(ClosestDependency);
328 // Def(U) can't be returned here because it is non-local. If local
329 // dependency won't be found then return nonLocal counting that the
330 // user will call getNonLocalPointerDependency, which will return cached
331 // result.
332 NonLocalDefsCache.try_emplace(
333 LI, NonLocalDepResult(ClosestDependency->getParent(),
334 MemDepResult::getDef(ClosestDependency), nullptr));
335 ReverseNonLocalDefsCache[ClosestDependency].insert(LI);
337}
338
339// Check if SI that may alias with MemLoc can be safely skipped. This is
340// possible in case if SI can only must alias or no alias with MemLoc (no
341// partial overlapping possible) and it writes the same value that MemLoc
342// contains now (it was loaded before this store and was not modified in
343// between).
345 const MemoryLocation &MemLoc,
346 Align MemLocAlign, BatchAAResults &BatchAA,
347 unsigned ScanLimit) {
348 if (!MemLoc.Size.hasValue())
349 return false;
350 if (MemoryLocation::get(SI).Size != MemLoc.Size)
351 return false;
352 if (MemLoc.Size.isScalable())
353 return false;
354 if (std::min(MemLocAlign, SI->getAlign()).value() <
355 MemLoc.Size.getValue().getKnownMinValue())
356 return false;
357
358 auto *LI = dyn_cast<LoadInst>(SI->getValueOperand());
359 if (!LI || LI->getParent() != SI->getParent())
360 return false;
361 if (BatchAA.alias(MemoryLocation::get(LI), MemLoc) != AliasResult::MustAlias)
362 return false;
363 unsigned NumVisitedInsts = 0;
364 for (const Instruction *I = LI; I != SI; I = I->getNextNode())
365 if (++NumVisitedInsts > ScanLimit ||
366 isModSet(BatchAA.getModRefInfo(I, MemLoc)))
367 return false;
368
369 return true;
370}
371
373 const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
374 BasicBlock *BB, Instruction *QueryInst, unsigned *Limit,
375 BatchAAResults &BatchAA) {
376 bool isInvariantLoad = false;
377 Align MemLocAlign =
379
380 unsigned DefaultLimit = getDefaultBlockScanLimit();
381 if (!Limit)
382 Limit = &DefaultLimit;
383
384 // We must be careful with atomic accesses, as they may allow another thread
385 // to touch this location, clobbering it. We are conservative: if the
386 // QueryInst is not a simple (non-atomic) memory access, we automatically
387 // return getClobber.
388 // If it is simple, we know based on the results of
389 // "Compiler testing via a theory of sound optimisations in the C11/C++11
390 // memory model" in PLDI 2013, that a non-atomic location can only be
391 // clobbered between a pair of a release and an acquire action, with no
392 // access to the location in between.
393 // Here is an example for giving the general intuition behind this rule.
394 // In the following code:
395 // store x 0;
396 // release action; [1]
397 // acquire action; [4]
398 // %val = load x;
399 // It is unsafe to replace %val by 0 because another thread may be running:
400 // acquire action; [2]
401 // store x 42;
402 // release action; [3]
403 // with synchronization from 1 to 2 and from 3 to 4, resulting in %val
404 // being 42. A key property of this program however is that if either
405 // 1 or 4 were missing, there would be a race between the store of 42
406 // either the store of 0 or the load (making the whole program racy).
407 // The paper mentioned above shows that the same property is respected
408 // by every program that can detect any optimization of that kind: either
409 // it is racy (undefined) or there is a release followed by an acquire
410 // between the pair of accesses under consideration.
411
412 // If the load is invariant, we "know" that it doesn't alias *any* write. We
413 // do want to respect mustalias results since defs are useful for value
414 // forwarding, but any mayalias write can be assumed to be noalias.
415 // Arguably, this logic should be pushed inside AliasAnalysis itself.
416 if (isLoad && QueryInst)
417 if (LoadInst *LI = dyn_cast<LoadInst>(QueryInst)) {
418 if (LI->hasMetadata(LLVMContext::MD_invariant_load))
419 isInvariantLoad = true;
420 MemLocAlign = LI->getAlign();
421 }
422
423 // True for volatile instruction.
424 // For Load/Store return true if atomic ordering is stronger than AO,
425 // for other instruction just true if it can read or write to memory.
426 auto isComplexForReordering = [](Instruction * I, AtomicOrdering AO)->bool {
427 if (I->isVolatile())
428 return true;
429 if (auto *LI = dyn_cast<LoadInst>(I))
430 return isStrongerThan(LI->getOrdering(), AO);
431 if (auto *SI = dyn_cast<StoreInst>(I))
432 return isStrongerThan(SI->getOrdering(), AO);
433 return I->mayReadOrWriteMemory();
434 };
435
436 // Walk backwards through the basic block, looking for dependencies.
437 while (ScanIt != BB->begin()) {
438 Instruction *Inst = &*--ScanIt;
439
440 // Limit the amount of scanning we do so we don't end up with quadratic
441 // running time on extreme testcases.
442 --*Limit;
443 if (!*Limit)
445
447 // If we reach a lifetime begin or end marker, then the query ends here
448 // because the value is undefined.
449 Intrinsic::ID ID = II->getIntrinsicID();
450 switch (ID) {
451 case Intrinsic::lifetime_start: {
452 MemoryLocation ArgLoc = MemoryLocation::getAfter(II->getArgOperand(0));
453 if (BatchAA.isMustAlias(ArgLoc, MemLoc))
454 return MemDepResult::getDef(II);
455 continue;
456 }
457 case Intrinsic::masked_load:
458 case Intrinsic::masked_store: {
460 /*ModRefInfo MR =*/ GetLocation(II, Loc, TLI);
461 AliasResult R = BatchAA.alias(Loc, MemLoc);
462 if (R == AliasResult::NoAlias)
463 continue;
464 if (R == AliasResult::MustAlias)
465 return MemDepResult::getDef(II);
466 if (ID == Intrinsic::masked_load)
467 continue;
469 }
470 }
471 }
472
473 // Values depend on loads if the pointers are must aliased. This means
474 // that a load depends on another must aliased load from the same value.
475 // One exception is atomic loads: a value can depend on an atomic load that
476 // it does not alias with when this atomic load indicates that another
477 // thread may be accessing the location.
478 if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
479 // While volatile access cannot be eliminated, they do not have to clobber
480 // non-aliasing locations, as normal accesses, for example, can be safely
481 // reordered with volatile accesses.
482 if (LI->isVolatile()) {
483 if (!QueryInst)
484 // Original QueryInst *may* be volatile
485 return MemDepResult::getClobber(LI);
486 if (QueryInst->isVolatile())
487 // Ordering required if QueryInst is itself volatile
488 return MemDepResult::getClobber(LI);
489 // Otherwise, volatile doesn't imply any special ordering
490 }
491
492 // Atomic loads have complications involved.
493 // A Monotonic (or higher) load is OK if the query inst is itself not
494 // atomic.
495 // FIXME: This is overly conservative.
496 if (LI->isAtomic() && isStrongerThanUnordered(LI->getOrdering())) {
497 if (!QueryInst ||
498 isComplexForReordering(QueryInst, AtomicOrdering::NotAtomic))
499 return MemDepResult::getClobber(LI);
500 if (LI->getOrdering() != AtomicOrdering::Monotonic)
501 return MemDepResult::getClobber(LI);
502 }
503
505
506 // If we found a pointer, check if it could be the same as our pointer.
507 AliasResult R = BatchAA.alias(LoadLoc, MemLoc);
508
509 if (R == AliasResult::NoAlias)
510 continue;
511
512 if (isLoad) {
513 // Must aliased loads are defs of each other.
514 if (R == AliasResult::MustAlias)
515 return MemDepResult::getDef(Inst);
516
517 // If we have a partial alias, then return this as a clobber for the
518 // client to handle.
519 if (R == AliasResult::PartialAlias && R.hasOffset()) {
520 ClobberOffsets[LI] = R.getOffset();
521 return MemDepResult::getClobber(Inst);
522 }
523
524 // Random may-alias loads don't depend on each other without a
525 // dependence.
526 continue;
527 }
528
529 // Stores don't alias loads from read-only memory.
530 if (!isModSet(BatchAA.getModRefInfoMask(LoadLoc)))
531 continue;
532
533 // Stores depend on may/must aliased loads.
534 return MemDepResult::getDef(Inst);
535 }
536
537 if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
538 // Atomic stores have complications involved.
539 // A Monotonic store is OK if the query inst is itself not atomic.
540 // FIXME: This is overly conservative.
541 if (!SI->isUnordered() && SI->isAtomic()) {
542 if (!QueryInst ||
543 isComplexForReordering(QueryInst, AtomicOrdering::Unordered))
545 // Ok, if we are here the guard above guarantee us that
546 // QueryInst is a non-atomic or unordered load/store.
547 // SI is atomic with monotonic or release semantic (seq_cst for store
548 // is actually a release semantic plus total order over other seq_cst
549 // instructions, as soon as QueryInst is not seq_cst we can consider it
550 // as simple release semantic).
551 // Monotonic and Release semantic allows re-ordering before store
552 // so we are safe to go further and check the aliasing. It will prohibit
553 // re-ordering in case locations are may or must alias.
554 }
555
556 // While volatile access cannot be eliminated, they do not have to clobber
557 // non-aliasing locations, as normal accesses can for example be reordered
558 // with volatile accesses.
559 if (SI->isVolatile())
560 if (!QueryInst || QueryInst->isVolatile())
562
563 // If alias analysis can tell that this store is guaranteed to not modify
564 // the query pointer, ignore it. Use getModRefInfo to handle cases where
565 // the query pointer points to constant memory etc.
566 if (!isModOrRefSet(BatchAA.getModRefInfo(SI, MemLoc)))
567 continue;
568
569 // Ok, this store might clobber the query pointer. Check to see if it is
570 // a must alias: in this case, we want to return this as a def.
571 // FIXME: Use ModRefInfo::Must bit from getModRefInfo call above.
573
574 // If we found a pointer, check if it could be the same as our pointer.
575 AliasResult R = BatchAA.alias(StoreLoc, MemLoc);
576
577 if (R == AliasResult::NoAlias)
578 continue;
579 if (R == AliasResult::MustAlias)
580 return MemDepResult::getDef(Inst);
581 if (isInvariantLoad)
582 continue;
583 if (canSkipClobberingStore(SI, MemLoc, MemLocAlign, BatchAA, *Limit))
584 continue;
585 return MemDepResult::getClobber(Inst);
586 }
587
588 // If this is an allocation, and if we know that the accessed pointer is to
589 // the allocation, return Def. This means that there is no dependence and
590 // the access can be optimized based on that. For example, a load could
591 // turn into undef. Note that we can bypass the allocation itself when
592 // looking for a clobber in many cases; that's an alias property and is
593 // handled by BasicAA.
594 if (isa<AllocaInst>(Inst) || isNoAliasCall(Inst)) {
595 const Value *AccessPtr = getUnderlyingObject(MemLoc.Ptr);
596 if (AccessPtr == Inst || BatchAA.isMustAlias(Inst, AccessPtr))
597 return MemDepResult::getDef(Inst);
598 }
599
600 // If we found a select instruction for MemLoc pointer, return it as Def
601 // dependency.
602 if (isa<SelectInst>(Inst) && MemLoc.Ptr == Inst)
603 return MemDepResult::getDef(Inst);
604
605 if (isInvariantLoad)
606 continue;
607
608 // A release fence requires that all stores complete before it, but does
609 // not prevent the reordering of following loads or stores 'before' the
610 // fence. As a result, we look past it when finding a dependency for
611 // loads. DSE uses this to find preceding stores to delete and thus we
612 // can't bypass the fence if the query instruction is a store.
613 if (FenceInst *FI = dyn_cast<FenceInst>(Inst))
614 if (isLoad && FI->getOrdering() == AtomicOrdering::Release)
615 continue;
616
617 // See if this instruction (e.g. a call or vaarg) mod/ref's the pointer.
618 switch (BatchAA.getModRefInfo(Inst, MemLoc)) {
620 // If the call has no effect on the queried pointer, just ignore it.
621 continue;
622 case ModRefInfo::Mod:
623 return MemDepResult::getClobber(Inst);
624 case ModRefInfo::Ref:
625 // If the call is known to never store to the pointer, and if this is a
626 // load query, we can safely ignore it (scan past it).
627 if (isLoad)
628 continue;
629 [[fallthrough]];
630 default:
631 // Otherwise, there is a potential dependence. Return a clobber.
632 return MemDepResult::getClobber(Inst);
633 }
634 }
635
636 // No dependence found. If this is the entry block of the function, it is
637 // unknown, otherwise it is non-local.
638 if (BB != &BB->getParent()->getEntryBlock())
641}
642
644 ClobberOffsets.clear();
645 Instruction *ScanPos = QueryInst;
646
647 // Check for a cached result
648 MemDepResult &LocalCache = LocalDeps[QueryInst];
649
650 // If the cached entry is non-dirty, just return it. Note that this depends
651 // on MemDepResult's default constructing to 'dirty'.
652 if (!LocalCache.isDirty())
653 return LocalCache;
654
655 // Otherwise, if we have a dirty entry, we know we can start the scan at that
656 // instruction, which may save us some work.
657 if (Instruction *Inst = LocalCache.getInst()) {
658 ScanPos = Inst;
659
660 RemoveFromReverseMap(ReverseLocalDeps, Inst, QueryInst);
661 }
662
663 BasicBlock *QueryParent = QueryInst->getParent();
664
665 // Do the scan.
666 if (BasicBlock::iterator(QueryInst) == QueryParent->begin()) {
667 // No dependence found. If this is the entry block of the function, it is
668 // unknown, otherwise it is non-local.
669 if (QueryParent != &QueryParent->getParent()->getEntryBlock())
670 LocalCache = MemDepResult::getNonLocal();
671 else
672 LocalCache = MemDepResult::getNonFuncLocal();
673 } else {
674 MemoryLocation MemLoc;
675 ModRefInfo MR = GetLocation(QueryInst, MemLoc, TLI);
676 if (MemLoc.Ptr) {
677 // If we can do a pointer scan, make it happen.
678 bool isLoad = !isModSet(MR);
679 if (auto *II = dyn_cast<IntrinsicInst>(QueryInst))
680 isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_start;
681
682 LocalCache =
683 getPointerDependencyFrom(MemLoc, isLoad, ScanPos->getIterator(),
684 QueryParent, QueryInst, nullptr);
685 } else if (auto *QueryCall = dyn_cast<CallBase>(QueryInst)) {
686 bool isReadOnly = AA.onlyReadsMemory(QueryCall);
687 LocalCache = getCallDependencyFrom(QueryCall, isReadOnly,
688 ScanPos->getIterator(), QueryParent);
689 } else
690 // Non-memory instruction.
691 LocalCache = MemDepResult::getUnknown();
692 }
693
694 // Remember the result!
695 if (Instruction *I = LocalCache.getInst())
696 ReverseLocalDeps[I].insert(QueryInst);
697
698 return LocalCache;
699}
700
701#ifndef NDEBUG
702/// This method is used when -debug is specified to verify that cache arrays
703/// are properly kept sorted.
705 int Count = -1) {
706 if (Count == -1)
707 Count = Cache.size();
708 assert(std::is_sorted(Cache.begin(), Cache.begin() + Count) &&
709 "Cache isn't sorted!");
710}
711#endif
712
715 assert(getDependency(QueryCall).isNonLocal() &&
716 "getNonLocalCallDependency should only be used on calls with "
717 "non-local deps!");
718 PerInstNLInfo &CacheP = NonLocalDepsMap[QueryCall];
719 NonLocalDepInfo &Cache = CacheP.first;
720
721 // This is the set of blocks that need to be recomputed. In the cached case,
722 // this can happen due to instructions being deleted etc. In the uncached
723 // case, this starts out as the set of predecessors we care about.
725
726 if (!Cache.empty()) {
727 // Okay, we have a cache entry. If we know it is not dirty, just return it
728 // with no computation.
729 if (!CacheP.second) {
730 ++NumCacheNonLocal;
731 return Cache;
732 }
733
734 // If we already have a partially computed set of results, scan them to
735 // determine what is dirty, seeding our initial DirtyBlocks worklist.
736 for (auto &Entry : Cache)
737 if (Entry.getResult().isDirty())
738 DirtyBlocks.push_back(Entry.getBB());
739
740 // Sort the cache so that we can do fast binary search lookups below.
741 llvm::sort(Cache);
742
743 ++NumCacheDirtyNonLocal;
744 } else {
745 // Seed DirtyBlocks with each of the preds of QueryInst's block.
746 BasicBlock *QueryBB = QueryCall->getParent();
747 append_range(DirtyBlocks, PredCache.get(QueryBB));
748 ++NumUncacheNonLocal;
749 }
750
751 // isReadonlyCall - If this is a read-only call, we can be more aggressive.
752 bool isReadonlyCall = AA.onlyReadsMemory(QueryCall);
753
755
756 unsigned NumSortedEntries = Cache.size();
757 LLVM_DEBUG(AssertSorted(Cache));
758
759 // Iterate while we still have blocks to update.
760 while (!DirtyBlocks.empty()) {
761 BasicBlock *DirtyBB = DirtyBlocks.pop_back_val();
762
763 // Already processed this block?
764 if (!Visited.insert(DirtyBB).second)
765 continue;
766
767 // Do a binary search to see if we already have an entry for this block in
768 // the cache set. If so, find it.
769 LLVM_DEBUG(AssertSorted(Cache, NumSortedEntries));
770 NonLocalDepInfo::iterator Entry =
771 std::upper_bound(Cache.begin(), Cache.begin() + NumSortedEntries,
772 NonLocalDepEntry(DirtyBB));
773 if (Entry != Cache.begin() && std::prev(Entry)->getBB() == DirtyBB)
774 --Entry;
775
776 NonLocalDepEntry *ExistingResult = nullptr;
777 if (Entry != Cache.begin() + NumSortedEntries &&
778 Entry->getBB() == DirtyBB) {
779 // If we already have an entry, and if it isn't already dirty, the block
780 // is done.
781 if (!Entry->getResult().isDirty())
782 continue;
783
784 // Otherwise, remember this slot so we can update the value.
785 ExistingResult = &*Entry;
786 }
787
788 // If the dirty entry has a pointer, start scanning from it so we don't have
789 // to rescan the entire block.
790 BasicBlock::iterator ScanPos = DirtyBB->end();
791 if (ExistingResult) {
792 if (Instruction *Inst = ExistingResult->getResult().getInst()) {
793 ScanPos = Inst->getIterator();
794 // We're removing QueryInst's use of Inst.
795 RemoveFromReverseMap<Instruction *>(ReverseNonLocalDeps, Inst,
796 QueryCall);
797 }
798 }
799
800 // Find out if this block has a local dependency for QueryInst.
801 MemDepResult Dep;
802
803 if (ScanPos != DirtyBB->begin()) {
804 Dep = getCallDependencyFrom(QueryCall, isReadonlyCall, ScanPos, DirtyBB);
805 } else if (DirtyBB != &DirtyBB->getParent()->getEntryBlock()) {
806 // No dependence found. If this is the entry block of the function, it is
807 // a clobber, otherwise it is unknown.
809 } else {
811 }
812
813 // If we had a dirty entry for the block, update it. Otherwise, just add
814 // a new entry.
815 if (ExistingResult)
816 ExistingResult->setResult(Dep);
817 else
818 Cache.push_back(NonLocalDepEntry(DirtyBB, Dep));
819
820 // If the block has a dependency (i.e. it isn't completely transparent to
821 // the value), remember the association!
822 if (!Dep.isNonLocal()) {
823 // Keep the ReverseNonLocalDeps map up to date so we can efficiently
824 // update this when we remove instructions.
825 if (Instruction *Inst = Dep.getInst())
826 ReverseNonLocalDeps[Inst].insert(QueryCall);
827 } else {
828
829 // If the block *is* completely transparent to the load, we need to check
830 // the predecessors of this block. Add them to our worklist.
831 append_range(DirtyBlocks, PredCache.get(DirtyBB));
832 }
833 }
834
835 return Cache;
836}
837
840 const MemoryLocation Loc = MemoryLocation::get(QueryInst);
841 bool isLoad = isa<LoadInst>(QueryInst);
842 BasicBlock *FromBB = QueryInst->getParent();
843 assert(FromBB);
844
845 assert(Loc.Ptr->getType()->isPointerTy() &&
846 "Can't get pointer deps of a non-pointer!");
847 Result.clear();
848 {
849 // Check if there is cached Def with invariant.group.
850 auto NonLocalDefIt = NonLocalDefsCache.find(QueryInst);
851 if (NonLocalDefIt != NonLocalDefsCache.end()) {
852 Result.push_back(NonLocalDefIt->second);
853 ReverseNonLocalDefsCache[NonLocalDefIt->second.getResult().getInst()]
854 .erase(QueryInst);
855 NonLocalDefsCache.erase(NonLocalDefIt);
856 return;
857 }
858 }
859 // This routine does not expect to deal with volatile instructions.
860 // Doing so would require piping through the QueryInst all the way through.
861 // TODO: volatiles can't be elided, but they can be reordered with other
862 // non-volatile accesses.
863
864 // We currently give up on any instruction which is ordered, but we do handle
865 // atomic instructions which are unordered.
866 // TODO: Handle ordered instructions
867 auto isOrdered = [](Instruction *Inst) {
868 if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
869 return !LI->isUnordered();
870 } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
871 return !SI->isUnordered();
872 }
873 return false;
874 };
875 if (QueryInst->isVolatile() || isOrdered(QueryInst)) {
876 Result.push_back(NonLocalDepResult(FromBB, MemDepResult::getUnknown(),
877 const_cast<Value *>(Loc.Ptr)));
878 return;
879 }
880 const DataLayout &DL = FromBB->getDataLayout();
881 PHITransAddr Address(const_cast<Value *>(Loc.Ptr), DL, &AC);
882
883 // This is the set of blocks we've inspected, and the pointer we consider in
884 // each block. Because of critical edges, we currently bail out if querying
885 // a block with multiple different pointers. This can happen during PHI
886 // translation.
888 if (getNonLocalPointerDepFromBB(QueryInst, Address, Loc, isLoad, FromBB,
889 Result, Visited, true))
890 return;
891 Result.clear();
892 Result.push_back(NonLocalDepResult(FromBB, MemDepResult::getUnknown(),
893 const_cast<Value *>(Loc.Ptr)));
894}
895
896/// Compute the memdep value for BB with Pointer/PointeeSize using either
897/// cached information in Cache or by doing a lookup (which may use dirty cache
898/// info if available).
899///
900/// If we do a lookup, add the result to the cache.
901MemDepResult MemoryDependenceResults::getNonLocalInfoForBlock(
902 Instruction *QueryInst, const MemoryLocation &Loc, bool isLoad,
903 BasicBlock *BB, NonLocalDepInfo *Cache, unsigned NumSortedEntries,
904 BatchAAResults &BatchAA) {
905
906 bool isInvariantLoad = false;
907
908 if (LoadInst *LI = dyn_cast_or_null<LoadInst>(QueryInst))
909 isInvariantLoad = LI->getMetadata(LLVMContext::MD_invariant_load);
910
911 // Do a binary search to see if we already have an entry for this block in
912 // the cache set. If so, find it.
913 NonLocalDepInfo::iterator Entry = std::upper_bound(
914 Cache->begin(), Cache->begin() + NumSortedEntries, NonLocalDepEntry(BB));
915 if (Entry != Cache->begin() && (Entry - 1)->getBB() == BB)
916 --Entry;
917
918 NonLocalDepEntry *ExistingResult = nullptr;
919 if (Entry != Cache->begin() + NumSortedEntries && Entry->getBB() == BB)
920 ExistingResult = &*Entry;
921
922 // Use cached result for invariant load only if there is no dependency for non
923 // invariant load. In this case invariant load can not have any dependency as
924 // well.
925 if (ExistingResult && isInvariantLoad &&
926 !ExistingResult->getResult().isNonFuncLocal())
927 ExistingResult = nullptr;
928
929 // If we have a cached entry, and it is non-dirty, use it as the value for
930 // this dependency.
931 if (ExistingResult && !ExistingResult->getResult().isDirty()) {
932 ++NumCacheNonLocalPtr;
933 return ExistingResult->getResult();
934 }
935
936 // Otherwise, we have to scan for the value. If we have a dirty cache
937 // entry, start scanning from its position, otherwise we scan from the end
938 // of the block.
939 BasicBlock::iterator ScanPos = BB->end();
940 if (ExistingResult && ExistingResult->getResult().getInst()) {
941 assert(ExistingResult->getResult().getInst()->getParent() == BB &&
942 "Instruction invalidated?");
943 ++NumCacheDirtyNonLocalPtr;
944 ScanPos = ExistingResult->getResult().getInst()->getIterator();
945
946 // Eliminating the dirty entry from 'Cache', so update the reverse info.
947 ValueIsLoadPair CacheKey(Loc.Ptr, isLoad);
948 RemoveFromReverseMap(ReverseNonLocalPtrDeps, &*ScanPos, CacheKey);
949 } else {
950 ++NumUncacheNonLocalPtr;
951 }
952
953 // Scan the block for the dependency.
954 MemDepResult Dep = getPointerDependencyFrom(Loc, isLoad, ScanPos, BB,
955 QueryInst, nullptr, BatchAA);
956
957 // Don't cache results for invariant load.
958 if (isInvariantLoad)
959 return Dep;
960
961 // If we had a dirty entry for the block, update it. Otherwise, just add
962 // a new entry.
963 if (ExistingResult)
964 ExistingResult->setResult(Dep);
965 else
966 Cache->push_back(NonLocalDepEntry(BB, Dep));
967
968 // If the block has a dependency (i.e. it isn't completely transparent to
969 // the value), remember the reverse association because we just added it
970 // to Cache!
971 if (!Dep.isLocal())
972 return Dep;
973
974 // Keep the ReverseNonLocalPtrDeps map up to date so we can efficiently
975 // update MemDep when we remove instructions.
976 Instruction *Inst = Dep.getInst();
977 assert(Inst && "Didn't depend on anything?");
978 ValueIsLoadPair CacheKey(Loc.Ptr, isLoad);
979 ReverseNonLocalPtrDeps[Inst].insert(CacheKey);
980 return Dep;
981}
982
983/// Sort the NonLocalDepInfo cache, given a certain number of elements in the
984/// array that are already properly ordered.
985///
986/// This is optimized for the case when only a few entries are added.
987static void
989 unsigned NumSortedEntries) {
990
991 // If only one entry, don't sort.
992 if (Cache.size() < 2)
993 return;
994
995 unsigned s = Cache.size() - NumSortedEntries;
996
997 // If the cache is already sorted, don't sort it again.
998 if (s == 0)
999 return;
1000
1001 // If no entry is sorted, sort the whole cache.
1002 if (NumSortedEntries == 0) {
1003 llvm::sort(Cache);
1004 return;
1005 }
1006
1007 // If the number of unsorted entires is small and the cache size is big, using
1008 // insertion sort is faster. Here use Log2_32 to quickly choose the sort
1009 // method.
1010 if (s < Log2_32(Cache.size())) {
1011 while (s > 0) {
1012 NonLocalDepEntry Val = Cache.back();
1013 Cache.pop_back();
1014 MemoryDependenceResults::NonLocalDepInfo::iterator Entry =
1015 std::upper_bound(Cache.begin(), Cache.end() - s + 1, Val);
1016 Cache.insert(Entry, Val);
1017 s--;
1018 }
1019 } else {
1020 llvm::sort(Cache);
1021 }
1022}
1023
1024/// Perform a dependency query based on pointer/pointeesize starting at the end
1025/// of StartBB.
1026///
1027/// Add any clobber/def results to the results vector and keep track of which
1028/// blocks are visited in 'Visited'.
1029///
1030/// This has special behavior for the first block queries (when SkipFirstBlock
1031/// is true). In this special case, it ignores the contents of the specified
1032/// block and starts returning dependence info for its predecessors.
1033///
1034/// This function returns true on success, or false to indicate that it could
1035/// not compute dependence information for some reason. This should be treated
1036/// as a clobber dependence on the first instruction in the predecessor block.
1037bool MemoryDependenceResults::getNonLocalPointerDepFromBB(
1038 Instruction *QueryInst, const PHITransAddr &Pointer,
1039 const MemoryLocation &Loc, bool isLoad, BasicBlock *StartBB,
1041 SmallDenseMap<BasicBlock *, Value *, 16> &Visited, bool SkipFirstBlock,
1042 bool IsIncomplete) {
1043 // Look up the cached info for Pointer.
1044 ValueIsLoadPair CacheKey(Pointer.getAddr(), isLoad);
1045
1046 // Set up a temporary NLPI value. If the map doesn't yet have an entry for
1047 // CacheKey, this value will be inserted as the associated value. Otherwise,
1048 // it'll be ignored, and we'll have to check to see if the cached size and
1049 // aa tags are consistent with the current query.
1050 NonLocalPointerInfo InitialNLPI;
1051 InitialNLPI.Size = Loc.Size;
1052 InitialNLPI.AATags = Loc.AATags;
1053
1054 bool isInvariantLoad = false;
1055 if (LoadInst *LI = dyn_cast_or_null<LoadInst>(QueryInst))
1056 isInvariantLoad = LI->getMetadata(LLVMContext::MD_invariant_load);
1057
1058 // Get the NLPI for CacheKey, inserting one into the map if it doesn't
1059 // already have one.
1060 std::pair<CachedNonLocalPointerInfo::iterator, bool> Pair =
1061 NonLocalPointerDeps.insert(std::make_pair(CacheKey, InitialNLPI));
1062 NonLocalPointerInfo *CacheInfo = &Pair.first->second;
1063
1064 // If we already have a cache entry for this CacheKey, we may need to do some
1065 // work to reconcile the cache entry and the current query.
1066 // Invariant loads don't participate in caching. Thus no need to reconcile.
1067 if (!isInvariantLoad && !Pair.second) {
1068 if (CacheInfo->Size != Loc.Size) {
1069 // The query's Size is not equal to the cached one. Throw out the cached
1070 // data and proceed with the query with the new size.
1071 CacheInfo->Pair = BBSkipFirstBlockPair();
1072 CacheInfo->Size = Loc.Size;
1073 for (auto &Entry : CacheInfo->NonLocalDeps)
1074 if (Instruction *Inst = Entry.getResult().getInst())
1075 RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
1076 CacheInfo->NonLocalDeps.clear();
1077 // The cache is cleared (in the above line) so we will have lost
1078 // information about blocks we have already visited. We therefore must
1079 // assume that the cache information is incomplete.
1080 IsIncomplete = true;
1081 }
1082
1083 // If the query's AATags are inconsistent with the cached one,
1084 // conservatively throw out the cached data and restart the query with
1085 // no tag if needed.
1086 if (CacheInfo->AATags != Loc.AATags) {
1087 if (CacheInfo->AATags) {
1088 CacheInfo->Pair = BBSkipFirstBlockPair();
1089 CacheInfo->AATags = AAMDNodes();
1090 for (auto &Entry : CacheInfo->NonLocalDeps)
1091 if (Instruction *Inst = Entry.getResult().getInst())
1092 RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
1093 CacheInfo->NonLocalDeps.clear();
1094 // The cache is cleared (in the above line) so we will have lost
1095 // information about blocks we have already visited. We therefore must
1096 // assume that the cache information is incomplete.
1097 IsIncomplete = true;
1098 }
1099 if (Loc.AATags)
1100 return getNonLocalPointerDepFromBB(
1101 QueryInst, Pointer, Loc.getWithoutAATags(), isLoad, StartBB, Result,
1102 Visited, SkipFirstBlock, IsIncomplete);
1103 }
1104 }
1105
1106 NonLocalDepInfo *Cache = &CacheInfo->NonLocalDeps;
1107
1108 // If we have valid cached information for exactly the block we are
1109 // investigating, just return it with no recomputation.
1110 // Don't use cached information for invariant loads since it is valid for
1111 // non-invariant loads only.
1112 if (!IsIncomplete && !isInvariantLoad &&
1113 CacheInfo->Pair == BBSkipFirstBlockPair(StartBB, SkipFirstBlock)) {
1114 // We have a fully cached result for this query then we can just return the
1115 // cached results and populate the visited set. However, we have to verify
1116 // that we don't already have conflicting results for these blocks. Check
1117 // to ensure that if a block in the results set is in the visited set that
1118 // it was for the same pointer query.
1119 if (!Visited.empty()) {
1120 for (auto &Entry : *Cache) {
1122 Visited.find(Entry.getBB());
1123 if (VI == Visited.end() || VI->second == Pointer.getAddr())
1124 continue;
1125
1126 // We have a pointer mismatch in a block. Just return false, saying
1127 // that something was clobbered in this result. We could also do a
1128 // non-fully cached query, but there is little point in doing this.
1129 return false;
1130 }
1131 }
1132
1133 Value *Addr = Pointer.getAddr();
1134 for (auto &Entry : *Cache) {
1135 Visited.insert(std::make_pair(Entry.getBB(), Addr));
1136 if (Entry.getResult().isNonLocal()) {
1137 continue;
1138 }
1139
1140 if (DT.isReachableFromEntry(Entry.getBB())) {
1141 Result.push_back(
1142 NonLocalDepResult(Entry.getBB(), Entry.getResult(), Addr));
1143 }
1144 }
1145 ++NumCacheCompleteNonLocalPtr;
1146 return true;
1147 }
1148
1149 // If the size of this cache has surpassed the global limit, stop here.
1150 if (Cache->size() > CacheGlobalLimit)
1151 return false;
1152
1153 // Otherwise, either this is a new block, a block with an invalid cache
1154 // pointer or one that we're about to invalidate by putting more info into
1155 // it than its valid cache info. If empty and not explicitly indicated as
1156 // incomplete, the result will be valid cache info, otherwise it isn't.
1157 //
1158 // Invariant loads don't affect cache in any way thus no need to update
1159 // CacheInfo as well.
1160 if (!isInvariantLoad) {
1161 if (!IsIncomplete && Cache->empty())
1162 CacheInfo->Pair = BBSkipFirstBlockPair(StartBB, SkipFirstBlock);
1163 else
1164 CacheInfo->Pair = BBSkipFirstBlockPair();
1165 }
1166
1168 Worklist.push_back(StartBB);
1169
1170 // PredList used inside loop.
1172
1173 // Keep track of the entries that we know are sorted. Previously cached
1174 // entries will all be sorted. The entries we add we only sort on demand (we
1175 // don't insert every element into its sorted position). We know that we
1176 // won't get any reuse from currently inserted values, because we don't
1177 // revisit blocks after we insert info for them.
1178 unsigned NumSortedEntries = Cache->size();
1179 unsigned WorklistEntries = BlockNumberLimit;
1180 bool GotWorklistLimit = false;
1181 LLVM_DEBUG(AssertSorted(*Cache));
1182
1183 BatchAAResults BatchAA(AA, &EEA);
1184 while (!Worklist.empty()) {
1185 BasicBlock *BB = Worklist.pop_back_val();
1186
1187 // If we do process a large number of blocks it becomes very expensive and
1188 // likely it isn't worth worrying about
1189 if (Result.size() > NumResultsLimit) {
1190 // Sort it now (if needed) so that recursive invocations of
1191 // getNonLocalPointerDepFromBB and other routines that could reuse the
1192 // cache value will only see properly sorted cache arrays.
1193 if (Cache && NumSortedEntries != Cache->size()) {
1194 SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
1195 }
1196 // Since we bail out, the "Cache" set won't contain all of the
1197 // results for the query. This is ok (we can still use it to accelerate
1198 // specific block queries) but we can't do the fastpath "return all
1199 // results from the set". Clear out the indicator for this.
1200 CacheInfo->Pair = BBSkipFirstBlockPair();
1201 return false;
1202 }
1203
1204 // Skip the first block if we have it.
1205 if (!SkipFirstBlock) {
1206 // Analyze the dependency of *Pointer in FromBB. See if we already have
1207 // been here.
1208 assert(Visited.count(BB) && "Should check 'visited' before adding to WL");
1209
1210 // Get the dependency info for Pointer in BB. If we have cached
1211 // information, we will use it, otherwise we compute it.
1212 LLVM_DEBUG(AssertSorted(*Cache, NumSortedEntries));
1213 MemDepResult Dep = getNonLocalInfoForBlock(
1214 QueryInst, Loc, isLoad, BB, Cache, NumSortedEntries, BatchAA);
1215
1216 // If we got a Def or Clobber, add this to the list of results.
1217 if (!Dep.isNonLocal()) {
1218 if (DT.isReachableFromEntry(BB)) {
1219 Result.push_back(NonLocalDepResult(BB, Dep, Pointer.getAddr()));
1220 continue;
1221 }
1222 }
1223 }
1224
1225 // If 'Pointer' is an instruction defined in this block, then we need to do
1226 // phi translation to change it into a value live in the predecessor block.
1227 // If not, we just add the predecessors to the worklist and scan them with
1228 // the same Pointer.
1229 if (!Pointer.needsPHITranslationFromBlock(BB)) {
1230 SkipFirstBlock = false;
1231 SmallVector<BasicBlock *, 16> NewBlocks;
1232 for (BasicBlock *Pred : PredCache.get(BB)) {
1233 // Verify that we haven't looked at this block yet.
1234 std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> InsertRes =
1235 Visited.insert(std::make_pair(Pred, Pointer.getAddr()));
1236 if (InsertRes.second) {
1237 // First time we've looked at *PI.
1238 NewBlocks.push_back(Pred);
1239 continue;
1240 }
1241
1242 // If we have seen this block before, but it was with a different
1243 // pointer then we have a phi translation failure and we have to treat
1244 // this as a clobber.
1245 if (InsertRes.first->second != Pointer.getAddr()) {
1246 // Make sure to clean up the Visited map before continuing on to
1247 // PredTranslationFailure.
1248 for (auto *NewBlock : NewBlocks)
1249 Visited.erase(NewBlock);
1250 goto PredTranslationFailure;
1251 }
1252 }
1253 if (NewBlocks.size() > WorklistEntries) {
1254 // Make sure to clean up the Visited map before continuing on to
1255 // PredTranslationFailure.
1256 for (auto *NewBlock : NewBlocks)
1257 Visited.erase(NewBlock);
1258 GotWorklistLimit = true;
1259 goto PredTranslationFailure;
1260 }
1261 WorklistEntries -= NewBlocks.size();
1262 Worklist.append(NewBlocks.begin(), NewBlocks.end());
1263 continue;
1264 }
1265
1266 // We do need to do phi translation, if we know ahead of time we can't phi
1267 // translate this value, don't even try.
1268 if (!Pointer.isPotentiallyPHITranslatable())
1269 goto PredTranslationFailure;
1270
1271 // We may have added values to the cache list before this PHI translation.
1272 // If so, we haven't done anything to ensure that the cache remains sorted.
1273 // Sort it now (if needed) so that recursive invocations of
1274 // getNonLocalPointerDepFromBB and other routines that could reuse the cache
1275 // value will only see properly sorted cache arrays.
1276 if (Cache && NumSortedEntries != Cache->size()) {
1277 SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
1278 NumSortedEntries = Cache->size();
1279 }
1280 Cache = nullptr;
1281
1282 PredList.clear();
1283 for (BasicBlock *Pred : PredCache.get(BB)) {
1284 PredList.push_back(std::make_pair(Pred, Pointer));
1285
1286 // Get the PHI translated pointer in this predecessor. This can fail if
1287 // not translatable, in which case the getAddr() returns null.
1288 PHITransAddr &PredPointer = PredList.back().second;
1289 Value *PredPtrVal =
1290 PredPointer.translateValue(BB, Pred, &DT, /*MustDominate=*/false);
1291
1292 // Check to see if we have already visited this pred block with another
1293 // pointer. If so, we can't do this lookup. This failure can occur
1294 // with PHI translation when a critical edge exists and the PHI node in
1295 // the successor translates to a pointer value different than the
1296 // pointer the block was first analyzed with.
1297 std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> InsertRes =
1298 Visited.insert(std::make_pair(Pred, PredPtrVal));
1299
1300 if (!InsertRes.second) {
1301 // We found the pred; take it off the list of preds to visit.
1302 PredList.pop_back();
1303
1304 // If the predecessor was visited with PredPtr, then we already did
1305 // the analysis and can ignore it.
1306 if (InsertRes.first->second == PredPtrVal)
1307 continue;
1308
1309 // Otherwise, the block was previously analyzed with a different
1310 // pointer. We can't represent the result of this case, so we just
1311 // treat this as a phi translation failure.
1312
1313 // Make sure to clean up the Visited map before continuing on to
1314 // PredTranslationFailure.
1315 for (const auto &Pred : PredList)
1316 Visited.erase(Pred.first);
1317
1318 goto PredTranslationFailure;
1319 }
1320 }
1321
1322 // Actually process results here; this need to be a separate loop to avoid
1323 // calling getNonLocalPointerDepFromBB for blocks we don't want to return
1324 // any results for. (getNonLocalPointerDepFromBB will modify our
1325 // datastructures in ways the code after the PredTranslationFailure label
1326 // doesn't expect.)
1327 for (auto &I : PredList) {
1328 BasicBlock *Pred = I.first;
1329 PHITransAddr &PredPointer = I.second;
1330 Value *PredPtrVal = PredPointer.getAddr();
1331
1332 bool CanTranslate = true;
1333 // If PHI translation was unable to find an available pointer in this
1334 // predecessor, then we have to assume that the pointer is clobbered in
1335 // that predecessor. We can still do PRE of the load, which would insert
1336 // a computation of the pointer in this predecessor.
1337 if (!PredPtrVal)
1338 CanTranslate = false;
1339
1340 // FIXME: it is entirely possible that PHI translating will end up with
1341 // the same value. Consider PHI translating something like:
1342 // X = phi [x, bb1], [y, bb2]. PHI translating for bb1 doesn't *need*
1343 // to recurse here, pedantically speaking.
1344
1345 // If getNonLocalPointerDepFromBB fails here, that means the cached
1346 // result conflicted with the Visited list; we have to conservatively
1347 // assume it is unknown, but this also does not block PRE of the load.
1348 if (!CanTranslate ||
1349 !getNonLocalPointerDepFromBB(QueryInst, PredPointer,
1350 Loc.getWithNewPtr(PredPtrVal), isLoad,
1351 Pred, Result, Visited)) {
1352 // Add the entry to the Result list.
1353 NonLocalDepResult Entry(Pred, MemDepResult::getUnknown(), PredPtrVal);
1354 Result.push_back(Entry);
1355
1356 // Since we had a phi translation failure, the cache for CacheKey won't
1357 // include all of the entries that we need to immediately satisfy future
1358 // queries. Mark this in NonLocalPointerDeps by setting the
1359 // BBSkipFirstBlockPair pointer to null. This requires reuse of the
1360 // cached value to do more work but not miss the phi trans failure.
1361 NonLocalPointerInfo &NLPI = NonLocalPointerDeps[CacheKey];
1362 NLPI.Pair = BBSkipFirstBlockPair();
1363 continue;
1364 }
1365 }
1366
1367 // Refresh the CacheInfo/Cache pointer so that it isn't invalidated.
1368 CacheInfo = &NonLocalPointerDeps[CacheKey];
1369 Cache = &CacheInfo->NonLocalDeps;
1370 NumSortedEntries = Cache->size();
1371
1372 // Since we did phi translation, the "Cache" set won't contain all of the
1373 // results for the query. This is ok (we can still use it to accelerate
1374 // specific block queries) but we can't do the fastpath "return all
1375 // results from the set" Clear out the indicator for this.
1376 CacheInfo->Pair = BBSkipFirstBlockPair();
1377 SkipFirstBlock = false;
1378 continue;
1379
1380 PredTranslationFailure:
1381 // The following code is "failure"; we can't produce a sane translation
1382 // for the given block. It assumes that we haven't modified any of
1383 // our datastructures while processing the current block.
1384
1385 if (!Cache) {
1386 // Refresh the CacheInfo/Cache pointer if it got invalidated.
1387 CacheInfo = &NonLocalPointerDeps[CacheKey];
1388 Cache = &CacheInfo->NonLocalDeps;
1389 NumSortedEntries = Cache->size();
1390 }
1391
1392 // Since we failed phi translation, the "Cache" set won't contain all of the
1393 // results for the query. This is ok (we can still use it to accelerate
1394 // specific block queries) but we can't do the fastpath "return all
1395 // results from the set". Clear out the indicator for this.
1396 CacheInfo->Pair = BBSkipFirstBlockPair();
1397
1398 // If *nothing* works, mark the pointer as unknown.
1399 //
1400 // If this is the magic first block, return this as a clobber of the whole
1401 // incoming value. Since we can't phi translate to one of the predecessors,
1402 // we have to bail out.
1403 if (SkipFirstBlock)
1404 return false;
1405
1406 // Results of invariant loads are not cached thus no need to update cached
1407 // information.
1408 if (!isInvariantLoad) {
1409 for (NonLocalDepEntry &I : llvm::reverse(*Cache)) {
1410 if (I.getBB() != BB)
1411 continue;
1412
1413 assert((GotWorklistLimit || I.getResult().isNonLocal() ||
1414 !DT.isReachableFromEntry(BB)) &&
1415 "Should only be here with transparent block");
1416
1417 I.setResult(MemDepResult::getUnknown());
1418
1419
1420 break;
1421 }
1422 }
1423 (void)GotWorklistLimit;
1424 // Go ahead and report unknown dependence.
1425 Result.push_back(
1426 NonLocalDepResult(BB, MemDepResult::getUnknown(), Pointer.getAddr()));
1427 }
1428
1429 // Okay, we're done now. If we added new values to the cache, re-sort it.
1430 SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
1431 LLVM_DEBUG(AssertSorted(*Cache));
1432 return true;
1433}
1434
1435/// If P exists in CachedNonLocalPointerInfo or NonLocalDefsCache, remove it.
1436void MemoryDependenceResults::removeCachedNonLocalPointerDependencies(
1437 ValueIsLoadPair P) {
1438
1439 // Most of the time this cache is empty.
1440 if (!NonLocalDefsCache.empty()) {
1441 auto it = NonLocalDefsCache.find(P.getPointer());
1442 if (it != NonLocalDefsCache.end()) {
1443 RemoveFromReverseMap(ReverseNonLocalDefsCache,
1444 it->second.getResult().getInst(), P.getPointer());
1445 NonLocalDefsCache.erase(it);
1446 }
1447
1448 if (auto *I = dyn_cast<Instruction>(P.getPointer())) {
1449 auto toRemoveIt = ReverseNonLocalDefsCache.find(I);
1450 if (toRemoveIt != ReverseNonLocalDefsCache.end()) {
1451 for (const auto *entry : toRemoveIt->second)
1452 NonLocalDefsCache.erase(entry);
1453 ReverseNonLocalDefsCache.erase(toRemoveIt);
1454 }
1455 }
1456 }
1457
1458 CachedNonLocalPointerInfo::iterator It = NonLocalPointerDeps.find(P);
1459 if (It == NonLocalPointerDeps.end())
1460 return;
1461
1462 // Remove all of the entries in the BB->val map. This involves removing
1463 // instructions from the reverse map.
1464 NonLocalDepInfo &PInfo = It->second.NonLocalDeps;
1465
1466 for (const NonLocalDepEntry &DE : PInfo) {
1467 Instruction *Target = DE.getResult().getInst();
1468 if (!Target)
1469 continue; // Ignore non-local dep results.
1470 assert(Target->getParent() == DE.getBB());
1471
1472 // Eliminating the dirty entry from 'Cache', so update the reverse info.
1473 RemoveFromReverseMap(ReverseNonLocalPtrDeps, Target, P);
1474 }
1475
1476 // Remove P from NonLocalPointerDeps (which deletes NonLocalDepInfo).
1477 NonLocalPointerDeps.erase(It);
1478}
1479
1481 // If Ptr isn't really a pointer, just ignore it.
1482 if (!Ptr->getType()->isPointerTy())
1483 return;
1484 // Flush store info for the pointer.
1485 removeCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, false));
1486 // Flush load info for the pointer.
1487 removeCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, true));
1488}
1489
1491 PredCache.clear();
1492}
1493
1495 EEA.removeInstruction(RemInst);
1496
1497 // Walk through the Non-local dependencies, removing this one as the value
1498 // for any cached queries.
1499 NonLocalDepMapType::iterator NLDI = NonLocalDepsMap.find(RemInst);
1500 if (NLDI != NonLocalDepsMap.end()) {
1501 NonLocalDepInfo &BlockMap = NLDI->second.first;
1502 for (auto &Entry : BlockMap)
1503 if (Instruction *Inst = Entry.getResult().getInst())
1504 RemoveFromReverseMap(ReverseNonLocalDeps, Inst, RemInst);
1505 NonLocalDepsMap.erase(NLDI);
1506 }
1507
1508 // If we have a cached local dependence query for this instruction, remove it.
1509 LocalDepMapType::iterator LocalDepEntry = LocalDeps.find(RemInst);
1510 if (LocalDepEntry != LocalDeps.end()) {
1511 // Remove us from DepInst's reverse set now that the local dep info is gone.
1512 if (Instruction *Inst = LocalDepEntry->second.getInst())
1513 RemoveFromReverseMap(ReverseLocalDeps, Inst, RemInst);
1514
1515 // Remove this local dependency info.
1516 LocalDeps.erase(LocalDepEntry);
1517 }
1518
1519 // If we have any cached dependencies on this instruction, remove
1520 // them.
1521
1522 // If the instruction is a pointer, remove it from both the load info and the
1523 // store info.
1524 if (RemInst->getType()->isPointerTy()) {
1525 removeCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, false));
1526 removeCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, true));
1527 } else {
1528 // Otherwise, if the instructions is in the map directly, it must be a load.
1529 // Remove it.
1530 auto toRemoveIt = NonLocalDefsCache.find(RemInst);
1531 if (toRemoveIt != NonLocalDefsCache.end()) {
1532 assert(isa<LoadInst>(RemInst) &&
1533 "only load instructions should be added directly");
1534 const Instruction *DepV = toRemoveIt->second.getResult().getInst();
1535 ReverseNonLocalDefsCache.find(DepV)->second.erase(RemInst);
1536 NonLocalDefsCache.erase(toRemoveIt);
1537 }
1538 }
1539
1540 // Loop over all of the things that depend on the instruction we're removing.
1542
1543 // If we find RemInst as a clobber or Def in any of the maps for other values,
1544 // we need to replace its entry with a dirty version of the instruction after
1545 // it. If RemInst is a terminator, we use a null dirty value.
1546 //
1547 // Using a dirty version of the instruction after RemInst saves having to scan
1548 // the entire block to get to this point.
1549 MemDepResult NewDirtyVal;
1550 if (!RemInst->isTerminator())
1551 NewDirtyVal = MemDepResult::getDirty(&*++RemInst->getIterator());
1552
1553 ReverseDepMapType::iterator ReverseDepIt = ReverseLocalDeps.find(RemInst);
1554 if (ReverseDepIt != ReverseLocalDeps.end()) {
1555 // RemInst can't be the terminator if it has local stuff depending on it.
1556 assert(!ReverseDepIt->second.empty() && !RemInst->isTerminator() &&
1557 "Nothing can locally depend on a terminator");
1558
1559 for (Instruction *InstDependingOnRemInst : ReverseDepIt->second) {
1560 assert(InstDependingOnRemInst != RemInst &&
1561 "Already removed our local dep info");
1562
1563 LocalDeps[InstDependingOnRemInst] = NewDirtyVal;
1564
1565 // Make sure to remember that new things depend on NewDepInst.
1566 assert(NewDirtyVal.getInst() &&
1567 "There is no way something else can have "
1568 "a local dep on this if it is a terminator!");
1569 ReverseDepsToAdd.push_back(
1570 std::make_pair(NewDirtyVal.getInst(), InstDependingOnRemInst));
1571 }
1572
1573 ReverseLocalDeps.erase(ReverseDepIt);
1574
1575 // Add new reverse deps after scanning the set, to avoid invalidating the
1576 // 'ReverseDeps' reference.
1577 while (!ReverseDepsToAdd.empty()) {
1578 ReverseLocalDeps[ReverseDepsToAdd.back().first].insert(
1579 ReverseDepsToAdd.back().second);
1580 ReverseDepsToAdd.pop_back();
1581 }
1582 }
1583
1584 ReverseDepIt = ReverseNonLocalDeps.find(RemInst);
1585 if (ReverseDepIt != ReverseNonLocalDeps.end()) {
1586 for (Instruction *I : ReverseDepIt->second) {
1587 assert(I != RemInst && "Already removed NonLocalDep info for RemInst");
1588
1589 PerInstNLInfo &INLD = NonLocalDepsMap[I];
1590 // The information is now dirty!
1591 INLD.second = true;
1592
1593 for (auto &Entry : INLD.first) {
1594 if (Entry.getResult().getInst() != RemInst)
1595 continue;
1596
1597 // Convert to a dirty entry for the subsequent instruction.
1598 Entry.setResult(NewDirtyVal);
1599
1600 if (Instruction *NextI = NewDirtyVal.getInst())
1601 ReverseDepsToAdd.push_back(std::make_pair(NextI, I));
1602 }
1603 }
1604
1605 ReverseNonLocalDeps.erase(ReverseDepIt);
1606
1607 // Add new reverse deps after scanning the set, to avoid invalidating 'Set'
1608 while (!ReverseDepsToAdd.empty()) {
1609 ReverseNonLocalDeps[ReverseDepsToAdd.back().first].insert(
1610 ReverseDepsToAdd.back().second);
1611 ReverseDepsToAdd.pop_back();
1612 }
1613 }
1614
1615 // If the instruction is in ReverseNonLocalPtrDeps then it appears as a
1616 // value in the NonLocalPointerDeps info.
1617 ReverseNonLocalPtrDepTy::iterator ReversePtrDepIt =
1618 ReverseNonLocalPtrDeps.find(RemInst);
1619 if (ReversePtrDepIt != ReverseNonLocalPtrDeps.end()) {
1621 ReversePtrDepsToAdd;
1622
1623 for (ValueIsLoadPair P : ReversePtrDepIt->second) {
1624 assert(P.getPointer() != RemInst &&
1625 "Already removed NonLocalPointerDeps info for RemInst");
1626
1627 auto &NLPD = NonLocalPointerDeps[P];
1628
1629 NonLocalDepInfo &NLPDI = NLPD.NonLocalDeps;
1630
1631 // The cache is not valid for any specific block anymore.
1632 NLPD.Pair = BBSkipFirstBlockPair();
1633
1634 // Update any entries for RemInst to use the instruction after it.
1635 for (auto &Entry : NLPDI) {
1636 if (Entry.getResult().getInst() != RemInst)
1637 continue;
1638
1639 // Convert to a dirty entry for the subsequent instruction.
1640 Entry.setResult(NewDirtyVal);
1641
1642 if (Instruction *NewDirtyInst = NewDirtyVal.getInst())
1643 ReversePtrDepsToAdd.push_back(std::make_pair(NewDirtyInst, P));
1644 }
1645
1646 // Re-sort the NonLocalDepInfo. Changing the dirty entry to its
1647 // subsequent value may invalidate the sortedness.
1648 llvm::sort(NLPDI);
1649 }
1650
1651 ReverseNonLocalPtrDeps.erase(ReversePtrDepIt);
1652
1653 while (!ReversePtrDepsToAdd.empty()) {
1654 ReverseNonLocalPtrDeps[ReversePtrDepsToAdd.back().first].insert(
1655 ReversePtrDepsToAdd.back().second);
1656 ReversePtrDepsToAdd.pop_back();
1657 }
1658 }
1659
1660 assert(!NonLocalDepsMap.count(RemInst) && "RemInst got reinserted?");
1661 LLVM_DEBUG(verifyRemoved(RemInst));
1662}
1663
1664/// Verify that the specified instruction does not occur in our internal data
1665/// structures.
1666///
1667/// This function verifies by asserting in debug builds.
1668void MemoryDependenceResults::verifyRemoved(Instruction *D) const {
1669#ifndef NDEBUG
1670 for (const auto &DepKV : LocalDeps) {
1671 assert(DepKV.first != D && "Inst occurs in data structures");
1672 assert(DepKV.second.getInst() != D && "Inst occurs in data structures");
1673 }
1674
1675 for (const auto &DepKV : NonLocalPointerDeps) {
1676 assert(DepKV.first.getPointer() != D && "Inst occurs in NLPD map key");
1677 for (const auto &Entry : DepKV.second.NonLocalDeps)
1678 assert(Entry.getResult().getInst() != D && "Inst occurs as NLPD value");
1679 }
1680
1681 for (const auto &DepKV : NonLocalDepsMap) {
1682 assert(DepKV.first != D && "Inst occurs in data structures");
1683 const PerInstNLInfo &INLD = DepKV.second;
1684 for (const auto &Entry : INLD.first)
1685 assert(Entry.getResult().getInst() != D &&
1686 "Inst occurs in data structures");
1687 }
1688
1689 for (const auto &DepKV : ReverseLocalDeps) {
1690 assert(DepKV.first != D && "Inst occurs in data structures");
1691 for (Instruction *Inst : DepKV.second)
1692 assert(Inst != D && "Inst occurs in data structures");
1693 }
1694
1695 for (const auto &DepKV : ReverseNonLocalDeps) {
1696 assert(DepKV.first != D && "Inst occurs in data structures");
1697 for (Instruction *Inst : DepKV.second)
1698 assert(Inst != D && "Inst occurs in data structures");
1699 }
1700
1701 for (const auto &DepKV : ReverseNonLocalPtrDeps) {
1702 assert(DepKV.first != D && "Inst occurs in rev NLPD map");
1703
1704 for (ValueIsLoadPair P : DepKV.second)
1705 assert(P != ValueIsLoadPair(D, false) && P != ValueIsLoadPair(D, true) &&
1706 "Inst occurs in ReverseNonLocalPtrDeps map");
1707 }
1708#endif
1709}
1710
1711AnalysisKey MemoryDependenceAnalysis::Key;
1712
1715
1718 auto &AA = AM.getResult<AAManager>(F);
1719 auto &AC = AM.getResult<AssumptionAnalysis>(F);
1720 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
1721 auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
1722 return MemoryDependenceResults(AA, AC, TLI, DT, DefaultBlockScanLimit);
1723}
1724
1726
1728 "Memory Dependence Analysis", false, true)
1734 "Memory Dependence Analysis", false, true)
1735
1737
1739
1741 MemDep.reset();
1742}
1743
1751
1753 FunctionAnalysisManager::Invalidator &Inv) {
1754 // Check whether our analysis is preserved.
1755 auto PAC = PA.getChecker<MemoryDependenceAnalysis>();
1756 if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Function>>())
1757 // If not, give up now.
1758 return true;
1759
1760 // Check whether the analyses we depend on became invalid for any reason.
1761 if (Inv.invalidate<AAManager>(F, PA) ||
1762 Inv.invalidate<AssumptionAnalysis>(F, PA) ||
1763 Inv.invalidate<DominatorTreeAnalysis>(F, PA))
1764 return true;
1765
1766 // Otherwise this analysis result remains valid.
1767 return false;
1768}
1769
1771 return DefaultBlockScanLimit;
1772}
1773
1775 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
1776 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1777 auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
1778 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1779 MemDep.emplace(AA, AC, TLI, DT, BlockScanLimit);
1780 return false;
1781}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool isLoad(int Opcode)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Atomic ordering constants.
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
This file defines the DenseMap class.
Module.h This file contains the declarations for the Module class.
This defines the Use class.
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
static const unsigned int NumResultsLimit
static cl::opt< unsigned > CacheGlobalLimit("memdep-cache-global-limit", cl::Hidden, cl::init(10000), cl::desc("The max number of entries allowed in a cache (default = 10000)"))
static ModRefInfo GetLocation(const Instruction *Inst, MemoryLocation &Loc, const TargetLibraryInfo &TLI)
If the given instruction references a specific memory location, fill in Loc with the details,...
static cl::opt< unsigned > BlockNumberLimit("memdep-block-number-limit", cl::Hidden, cl::init(200), cl::desc("The number of blocks to scan during memory " "dependency analysis (default = 200)"))
static void RemoveFromReverseMap(DenseMap< Instruction *, SmallPtrSet< KeyTy, 4 > > &ReverseMap, Instruction *Inst, KeyTy Val)
This is a helper function that removes Val from 'Inst's set in ReverseMap.
static void SortNonLocalDepInfoCache(MemoryDependenceResults::NonLocalDepInfo &Cache, unsigned NumSortedEntries)
Sort the NonLocalDepInfo cache, given a certain number of elements in the array that are already prop...
static void AssertSorted(MemoryDependenceResults::NonLocalDepInfo &Cache, int Count=-1)
This method is used when -debug is specified to verify that cache arrays are properly kept sorted.
static bool canSkipClobberingStore(const StoreInst *SI, const MemoryLocation &MemLoc, Align MemLocAlign, BatchAAResults &BatchAA, unsigned ScanLimit)
static cl::opt< unsigned > BlockScanLimit("memdep-block-scan-limit", cl::Hidden, cl::init(100), cl::desc("The number of instructions to scan in a block in memory " "dependency analysis (default = 100)"))
This file provides utility analysis objects describing memory locations.
static bool isOrdered(const Instruction *I)
This file contains the declarations for metadata subclasses.
static bool isInvariantLoad(const LoadInst *LI, const bool IsKernelFn)
uint64_t IntrinsicInst * II
#define P(N)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
A manager for alias analyses.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
The possible results of an alias query.
@ NoAlias
The two locations do not alias at all.
@ PartialAlias
The two locations alias, but only due to a partial overlap.
@ MustAlias
The two locations precisely alias each other.
This templated class represents "all analyses that operate over <aparticular IR unit>" (e....
Definition Analysis.h:50
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesAll()
Set by analyses that do not transform their input at all.
AnalysisUsage & addRequiredTransitive()
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator end()
Definition BasicBlock.h:472
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:459
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB)
bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
ModRefInfo getModRefInfoMask(const MemoryLocation &Loc, bool IgnoreLocals=false)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:165
bool erase(const KeyT &Val)
Definition DenseMap.h:303
DenseMapIterator< KeyT, ValueT, KeyInfoT, BucketT > iterator
Definition DenseMap.h:74
bool empty() const
Definition DenseMap.h:107
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition DenseMap.h:161
iterator end()
Definition DenseMap.h:81
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:214
Analysis pass which computes a DominatorTree.
Definition Dominators.h:284
Legacy analysis pass which computes a DominatorTree.
Definition Dominators.h:322
An instruction for ordering other memory operations.
FunctionPass(char &pid)
Definition Pass.h:316
const BasicBlock & getEntryBlock() const
Definition Function.h:807
LLVM_ABI bool mayWriteToMemory() const LLVM_READONLY
Return true if this instruction may modify memory.
bool hasMetadata() const
Return true if this instruction has any metadata attached to it.
LLVM_ABI bool isIdenticalToWhenDefined(const Instruction *I, bool IntersectAttrs=false) const LLVM_READONLY
This is like isIdenticalTo, except that it ignores the SubclassOptionalData flags,...
bool isTerminator() const
LLVM_ABI bool mayReadFromMemory() const LLVM_READONLY
Return true if this instruction may read memory.
LLVM_ABI bool isVolatile() const LLVM_READONLY
Return true if this instruction has a volatile memory access.
A wrapper class for inspecting calls to intrinsic functions.
An instruction for reading from memory.
Value * getPointerOperand()
bool hasValue() const
bool isScalable() const
TypeSize getValue() const
A memory dependence query can return one of three different answers.
bool isNonLocal() const
Tests if this MemDepResult represents a query that is transparent to the start of the block,...
static MemDepResult getNonLocal()
bool isNonFuncLocal() const
Tests if this MemDepResult represents a query that is transparent to the start of the function.
static MemDepResult getClobber(Instruction *Inst)
bool isDef() const
Tests if this MemDepResult represents a query that is an instruction definition dependency.
static MemDepResult getUnknown()
bool isLocal() const
Tests if this MemDepResult represents a valid local query (Clobber/Def).
bool isUnknown() const
Tests if this MemDepResult represents a query which cannot and/or will not be computed.
static MemDepResult getNonFuncLocal()
static MemDepResult getDef(Instruction *Inst)
get methods: These are static ctor methods for creating various MemDepResult kinds.
Instruction * getInst() const
If this is a normal dependency, returns the instruction that is depended on.
An analysis that produces MemoryDependenceResults for a function.
MemoryDependenceResults run(Function &F, FunctionAnalysisManager &AM)
Provides a lazy, caching interface for making common memory aliasing information queries,...
MemDepResult getSimplePointerDependencyFrom(const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt, BasicBlock *BB, Instruction *QueryInst, unsigned *Limit, BatchAAResults &BatchAA)
std::vector< NonLocalDepEntry > NonLocalDepInfo
void invalidateCachedPredecessors()
Clears the PredIteratorCache info.
void invalidateCachedPointerInfo(Value *Ptr)
Invalidates cached information about the specified pointer, because it may be too conservative in mem...
MemDepResult getPointerDependencyFrom(const MemoryLocation &Loc, bool isLoad, BasicBlock::iterator ScanIt, BasicBlock *BB, Instruction *QueryInst=nullptr, unsigned *Limit=nullptr)
Returns the instruction on which a memory location depends.
void removeInstruction(Instruction *InstToRemove)
Removes an instruction from the dependence analysis, updating the dependence of instructions that pre...
MemDepResult getInvariantGroupPointerDependency(LoadInst *LI, BasicBlock *BB)
This analysis looks for other loads and stores with invariant.group metadata and the same pointer ope...
unsigned getDefaultBlockScanLimit() const
Some methods limit the number of instructions they will examine.
MemDepResult getDependency(Instruction *QueryInst)
Returns the instruction on which a memory operation depends.
const NonLocalDepInfo & getNonLocalCallDependency(CallBase *QueryCall)
Perform a full dependency query for the specified call, returning the set of blocks that the value is...
void getNonLocalPointerDependency(Instruction *QueryInst, SmallVectorImpl< NonLocalDepResult > &Result)
Perform a full dependency query for an access to the QueryInst's specified memory location,...
bool invalidate(Function &F, const PreservedAnalyses &PA, FunctionAnalysisManager::Invalidator &Inv)
Handle invalidation in the new PM.
A wrapper analysis pass for the legacy pass manager that exposes a MemoryDepnedenceResults instance.
bool runOnFunction(Function &) override
Pass Implementation stuff. This doesn't do any analysis eagerly.
void getAnalysisUsage(AnalysisUsage &AU) const override
Does not modify anything. It uses Value Numbering and Alias Analysis.
void releaseMemory() override
Clean up memory in between runs.
Representation for a specific memory location.
MemoryLocation getWithoutAATags() const
static LLVM_ABI MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
LocationSize Size
The maximum size of the location, in address-units, or UnknownSize if the size is not known.
static MemoryLocation getAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location after Ptr, while remaining within the underlying objec...
MemoryLocation getWithNewPtr(const Value *NewPtr) const
AAMDNodes AATags
The metadata nodes which describes the aliasing of the location (each member is null if that kind of ...
const Value * Ptr
The address of the start of the location.
static LLVM_ABI MemoryLocation getForArgument(const CallBase *Call, unsigned ArgIdx, const TargetLibraryInfo *TLI)
Return a location representing a particular argument of a call.
This is an entry in the NonLocalDepInfo cache.
void setResult(const MemDepResult &R)
const MemDepResult & getResult() const
This is a result from a NonLocal dependence query.
PHITransAddr - An address value which tracks and handles phi translation.
LLVM_ABI Value * translateValue(BasicBlock *CurBB, BasicBlock *PredBB, const DominatorTree *DT, bool MustDominate)
translateValue - PHI translate the current address up the CFG from CurBB to Pred, updating our state ...
Value * getAddr() const
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
PreservedAnalysisChecker getChecker() const
Build a checker for this PreservedAnalyses and the specified analysis type.
Definition Analysis.h:275
size_type size() const
Definition SmallPtrSet.h:99
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
iterator erase(const_iterator CI)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:267
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
This class represents the va_arg llvm instruction, which returns an argument of the specified type gi...
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition Value.cpp:956
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition Value.cpp:701
iterator_range< use_iterator > uses()
Definition Value.h:380
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:166
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:130
CallInst * Call
Abstract Attribute helper functions.
Definition Attributor.h:165
@ Entry
Definition COFF.h:862
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
initializer< Ty > init(const Ty &Val)
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2116
bool isStrongerThanUnordered(AtomicOrdering AO)
LLVM_ABI bool isNoAliasCall(const Value *V)
Return true if this pointer is returned by a noalias function.
const Value * getPointerOperand(const Value *V)
A helper function that returns the pointer operand of a load, store or GEP instruction.
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:759
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:342
auto reverse(ContainerTy &&C)
Definition STLExtras.h:408
bool isModSet(const ModRefInfo MRI)
Definition ModRef.h:49
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1624
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
bool isModOrRefSet(const ModRefInfo MRI)
Definition ModRef.h:43
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
AtomicOrdering
Atomic ordering for LLVM's memory model.
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
Definition ModRef.h:28
@ Ref
The access may reference the value stored in memory.
Definition ModRef.h:32
@ ModRef
The access may reference and may modify the value stored in memory.
Definition ModRef.h:36
@ Mod
The access may modify the value stored in memory.
Definition ModRef.h:34
@ NoModRef
The access neither references nor modifies the value stored in memory.
Definition ModRef.h:30
@ Other
Any other memory.
Definition ModRef.h:68
LLVM_ABI Value * getFreedOperand(const CallBase *CB, const TargetLibraryInfo *TLI)
If this if a call to a free function, return the freed operand.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
bool isNoModRef(const ModRefInfo MRI)
Definition ModRef.h:40
bool isStrongerThan(AtomicOrdering AO, AtomicOrdering Other)
Returns true if ao is stronger than other as defined by the AtomicOrdering lattice,...
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition Analysis.h:29