33#define DEBUG_TYPE "loop-data-prefetch"
41 cl::desc(
"Prefetch write addresses"));
45 cl::desc(
"Number of instructions to prefetch ahead"),
53 "max-prefetch-iters-ahead",
56STATISTIC(NumPrefetches,
"Number of prefetches inserted");
61class LoopDataPrefetch {
66 : AC(AC), DT(DT), LI(LI), SE(SE),
TTI(
TTI), ORE(ORE) {}
71 bool runOnLoop(Loop *L);
75 bool isStrideLargeEnough(
const SCEVAddRecExpr *AR,
unsigned TargetMinStride);
77 unsigned getMinPrefetchStride(
unsigned NumMemAccesses,
78 unsigned NumStridedMemAccesses,
79 unsigned NumPrefetches,
83 return TTI->getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
84 NumPrefetches, HasCall);
87 unsigned getPrefetchDistance() {
90 return TTI->getPrefetchDistance();
93 unsigned getMaxPrefetchIterationsAhead() {
96 return TTI->getMaxPrefetchIterationsAhead();
99 bool doPrefetchWrites() {
102 return TTI->enableWritePrefetching();
109 const TargetTransformInfo *TTI;
110 OptimizationRemarkEmitter *ORE;
117 LoopDataPrefetchLegacyPass() : FunctionPass(ID) {
121 void getAnalysisUsage(AnalysisUsage &AU)
const override {
129 AU.
addRequired<OptimizationRemarkEmitterWrapperPass>();
139char LoopDataPrefetchLegacyPass::ID = 0;
141 "Loop Data Prefetch",
false,
false)
152 return new LoopDataPrefetchLegacyPass();
155bool LoopDataPrefetch::isStrideLargeEnough(
const SCEVAddRecExpr *AR,
156 unsigned TargetMinStride) {
158 if (TargetMinStride <= 1)
167 unsigned AbsStride = std::abs(ConstStride->getAPInt().getSExtValue());
168 return TargetMinStride <= AbsStride;
181 LoopDataPrefetch LDP(AC, DT, LI, SE,
TTI, ORE);
194bool LoopDataPrefetchLegacyPass::runOnFunction(
Function &
F) {
198 DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
199 LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
200 ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
202 &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
F);
204 &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
206 &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
F);
208 LoopDataPrefetch LDP(AC, DT, LI, SE,
TTI, ORE);
212bool LoopDataPrefetch::run() {
216 if (getPrefetchDistance() == 0 ||
TTI->getCacheLineSize() == 0) {
217 LLVM_DEBUG(
dbgs() <<
"Please set both PrefetchDistance and CacheLineSize "
218 "for loop data prefetch.\n");
222 bool MadeChange =
false;
226 MadeChange |= runOnLoop(L);
253 int64_t PtrDiff = 0) {
261 if (PrefBB != InsBB) {
273bool LoopDataPrefetch::runOnLoop(Loop *L) {
274 bool MadeChange =
false;
277 if (!
L->isInnermost())
280 SmallPtrSet<const Value *, 32> EphValues;
285 bool HasCall =
false;
286 for (
const auto BB :
L->blocks()) {
289 for (
auto &
I : *BB) {
292 if (
F->getIntrinsicID() == Intrinsic::prefetch)
301 Metrics.analyzeBasicBlock(BB, *
TTI, EphValues);
304 if (!
Metrics.NumInsts.isValid())
307 unsigned LoopSize =
Metrics.NumInsts.getValue();
311 unsigned ItersAhead = getPrefetchDistance() / LoopSize;
315 if (ItersAhead > getMaxPrefetchIterationsAhead())
319 if (ConstantMaxTripCount && ConstantMaxTripCount < ItersAhead + 1)
322 unsigned NumMemAccesses = 0;
323 unsigned NumStridedMemAccesses = 0;
325 for (
const auto BB :
L->blocks())
326 for (
auto &
I : *BB) {
332 PtrValue = LMemI->getPointerOperand();
334 if (!doPrefetchWrites())
continue;
336 PtrValue = SMemI->getPointerOperand();
343 if (
L->isLoopInvariant(PtrValue))
346 const SCEV *LSCEV = SE->
getSCEV(PtrValue);
350 NumStridedMemAccesses++;
355 bool DupPref =
false;
356 for (
auto &Pref : Prefetches) {
357 const SCEV *PtrDiff = SE->
getMinusSCEV(LSCEVAddRec, Pref.LSCEVAddRec);
358 if (
const SCEVConstant *ConstPtrDiff =
360 int64_t
PD = std::abs(ConstPtrDiff->getValue()->getSExtValue());
362 Pref.addInstruction(MemI, DT, PD);
369 Prefetches.push_back(
Prefetch(LSCEVAddRec, MemI));
372 unsigned TargetMinStride =
373 getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
374 Prefetches.size(), HasCall);
377 <<
" iterations ahead (loop size: " << LoopSize <<
") in "
378 <<
L->getHeader()->getParent()->getName() <<
": " << *L);
380 << NumMemAccesses <<
" memory accesses, "
381 << NumStridedMemAccesses <<
" strided memory accesses, "
382 << Prefetches.size() <<
" potential prefetch(es), "
383 <<
"a minimum stride of " << TargetMinStride <<
", "
384 << (HasCall ?
"calls" :
"no calls") <<
".\n");
386 for (
auto &
P : Prefetches) {
389 if (!isStrideLargeEnough(
P.LSCEVAddRec, TargetMinStride))
396 P.LSCEVAddRec->getStepRecurrence(*SE)));
397 if (!SCEVE.isSafeToExpand(NextLSCEV))
402 Value *PrefPtrValue = SCEVE.expandCodeFor(NextLSCEV, I8Ptr,
P.InsertPt);
406 Builder.CreateIntrinsic(Intrinsic::prefetch, PrefPtrValue->
getType(),
407 {PrefPtrValue, ConstantInt::get(I32, P.Writes),
408 ConstantInt::get(I32, 3),
409 ConstantInt::get(I32, 1)});
413 <<
", SCEV: " << *
P.LSCEVAddRec <<
"\n");
415 return OptimizationRemark(
DEBUG_TYPE,
"Prefetched",
P.MemI)
416 <<
"prefetched memory access";
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
static bool runOnFunction(Function &F, bool PostInlining)
static cl::opt< bool > PrefetchWrites("loop-prefetch-writes", cl::Hidden, cl::init(false), cl::desc("Prefetch write addresses"))
static cl::opt< unsigned > MinPrefetchStride("min-prefetch-stride", cl::desc("Min stride to add prefetches"), cl::Hidden)
loop data Loop Data Prefetch
static cl::opt< unsigned > PrefetchDistance("prefetch-distance", cl::desc("Number of instructions to prefetch ahead"), cl::Hidden)
static cl::opt< unsigned > MaxPrefetchIterationsAhead("max-prefetch-iters-ahead", cl::desc("Max number of iterations to prefetch ahead"), cl::Hidden)
This file provides the interface for LLVM's Loop Data Prefetching Pass.
static const Function * getCalledFunction(const Value *V)
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
LLVM_ABI AnalysisUsage & addRequiredID(const void *ID)
AnalysisUsage & addPreservedID(const void *ID)
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
LLVM_ABI Instruction * findNearestCommonDominator(Instruction *I1, Instruction *I2) const
Find the nearest instruction I that dominates both I1 and I2, in the sense that a result produced bef...
FunctionPass class - This class is used to implement most global optimizations.
Analysis pass that exposes the LoopInfo for a function.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Run the pass over the function.
The legacy pass manager's analysis pass to compute loop information.
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserve()
Mark an analysis as preserved.
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
Analysis pass that exposes the ScalarEvolution for a function.
The main scalar evolution driver.
LLVM_ABI const SCEV * getConstant(ConstantInt *V)
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI unsigned getSmallConstantMaxTripCount(const Loop *L, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Returns the upper bound of the loop trip count as a normal unsigned value.
LLVM_ABI const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
LLVM_ABI const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
LLVM_ABI const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
Analysis pass providing the TargetTransformInfo.
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Type * getType() const
All values are typed, get the type of this value.
int getNumOccurrences() const
@ BasicBlock
Various leaf nodes.
@ PD
PD - Prefix code for packed double precision vector floating point operations performed in the SSE re...
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI char & LoopSimplifyID
LLVM_ABI FunctionPass * createLoopDataPrefetchPass()
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI void initializeLoopDataPrefetchLegacyPassPass(PassRegistry &)
iterator_range< df_iterator< T > > depth_first(const T &G)
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
Instruction * InsertPt
The point of insertion for the prefetch instruction.
void addInstruction(Instruction *I, DominatorTree *DT=nullptr, int64_t PtrDiff=0)
Add the instruction.
bool Writes
True if targeting a write memory access.
Instruction * MemI
The (first seen) prefetched instruction.
const SCEVAddRecExpr * LSCEVAddRec
The address formula for this prefetch as returned by ScalarEvolution.
Prefetch(const SCEVAddRecExpr *L, Instruction *I)
Constructor to create a new Prefetch for I.
static LLVM_ABI void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).