7
7
#include " BitSets.hpp"
8
8
#include " Containers/BumpMapSet.hpp"
9
9
#include " Utilities/Allocators.hpp"
10
+ #include < Math/BumpVector.hpp>
10
11
#include < cassert>
11
12
#include < cstddef>
12
13
#include < cstdint>
13
14
#include < llvm/ADT/ArrayRef.h>
14
-
15
15
#include < llvm/ADT/SmallPtrSet.h>
16
16
#include < llvm/ADT/SmallVector.h>
17
17
#include < llvm/IR/BasicBlock.h>
21
21
#include < set>
22
22
#include < sys/select.h>
23
23
24
- void buildInstructionGraph (BumpAlloc<> &alloc, Instruction::Cache &cache,
25
- LinearProgramLoopBlock &LB) {
24
+ inline void buildInstructionGraph (BumpAlloc<> &alloc, Instruction::Cache &cache,
25
+ LinearProgramLoopBlock &LB) {
26
26
for (auto &node : LB.getNodes ()) {
27
27
auto access = node.getMemAccesses (alloc, LB.getMemoryAccesses ());
28
28
for (auto *mem : access) {
@@ -45,11 +45,11 @@ inline void merge(aset<Instruction *> &merged, aset<Instruction *> &toMerge) {
45
45
}
46
46
struct ReMapper {
47
47
map<Instruction *, Instruction *> reMap;
48
- auto operator [](Instruction *I ) -> Instruction * {
49
- if (auto f = reMap.find (I ); f != reMap.end ()) return f->second ;
50
- return I ;
48
+ auto operator [](Instruction *J ) -> Instruction * {
49
+ if (auto f = reMap.find (J ); f != reMap.end ()) return f->second ;
50
+ return J ;
51
51
}
52
- void remapFromTo (Instruction *I , Instruction *J) { reMap[I ] = J; }
52
+ void remapFromTo (Instruction *K , Instruction *J) { reMap[K ] = J; }
53
53
};
54
54
55
55
// represents the cost of merging key=>values; cost is hopefully negative.
@@ -119,10 +119,10 @@ struct MergingCost {
119
119
// however, isMerged(I, J) == isMerged(J, I)
120
120
// so we ignore easily swappable parameters
121
121
// NOLINTNEXTLINE(bugprone-easily-swappable-parameters)
122
- auto isMerged (Instruction *I , Instruction *J) const -> bool {
122
+ auto isMerged (Instruction *L , Instruction *J) const -> bool {
123
123
Instruction *K = J;
124
124
do {
125
- if (I == K) return true ;
125
+ if (L == K) return true ;
126
126
K = findMerge (K);
127
127
} while (K && K != J);
128
128
return false ;
@@ -160,10 +160,8 @@ struct MergingCost {
160
160
BumpAlloc<> &alloc;
161
161
Instruction::Cache &cache;
162
162
ReMapper &reMap;
163
- llvm::MutableArrayRef<Instruction *> operands;
164
- constexpr operator llvm::MutableArrayRef<Instruction *>() const {
165
- return operands;
166
- }
163
+ MutPtrVector<Instruction *> operands;
164
+ constexpr operator MutPtrVector<Instruction *>() const { return operands; }
167
165
void merge (size_t i, Instruction *A, Instruction *B) {
168
166
operands[i] = reMap[A]->replaceAllUsesOf (reMap[B]);
169
167
}
@@ -178,7 +176,7 @@ struct MergingCost {
178
176
static auto init (Allocate a, Instruction *A) -> SelectAllocator {
179
177
size_t numOps = A->getNumOperands ();
180
178
auto **operandsPtr = a.alloc .allocate <Instruction *>(numOps);
181
- llvm::MutableArrayRef <Instruction *> operands ( operandsPtr, numOps) ;
179
+ MutPtrVector <Instruction *> operands{ operandsPtr, numOps} ;
182
180
return SelectAllocator{a.alloc , a.cache , a.reMap , operands};
183
181
}
184
182
static auto init (Count, Instruction *) -> SelectCounter {
@@ -212,8 +210,8 @@ struct MergingCost {
212
210
// so we need to check if any operand pairs are merged with each other.
213
211
// note `isMerged(a,a) == true`, so that's the one query we need to use.
214
212
auto selector = init (selects, A);
215
- llvm::MutableArrayRef <Instruction *> operandsA = A->getOperands ();
216
- llvm::MutableArrayRef <Instruction *> operandsB = B->getOperands ();
213
+ MutPtrVector <Instruction *> operandsA = A->getOperands ();
214
+ MutPtrVector <Instruction *> operandsB = B->getOperands ();
217
215
size_t numOperands = operandsA.size ();
218
216
assert (numOperands == operandsB.size ());
219
217
uint8_t associativeOpsFlag = B->associativeOperandsFlag ();
@@ -316,20 +314,20 @@ struct MergingCost {
316
314
}
317
315
};
318
316
319
- void mergeInstructions (
317
+ inline void mergeInstructions (
320
318
BumpAlloc<> &alloc, Instruction::Cache &cache, Predicate::Map &predMap,
321
319
llvm::TargetTransformInfo &TTI, unsigned int vectorBits,
322
320
amap<std::pair<Instruction::Intrinsic, llvm::Type *>,
323
- llvm::SmallVector <std::pair<Instruction *, Predicate::Set>>> &opMap,
324
- llvm::SmallVectorImpl<MergingCost *> &mergingCosts, Instruction *I ,
321
+ BumpPtrVector <std::pair<Instruction *, Predicate::Set>>> &opMap,
322
+ llvm::SmallVectorImpl<MergingCost *> &mergingCosts, Instruction *J ,
325
323
llvm::BasicBlock *BB, Predicate::Set &preds) {
326
324
// have we already visited?
327
- if (mergingCosts.front ()->visited (I )) return ;
325
+ if (mergingCosts.front ()->visited (J )) return ;
328
326
for (auto C : mergingCosts) {
329
- if (C->visited (I )) return ;
330
- C->initAncestors (alloc, I );
327
+ if (C->visited (J )) return ;
328
+ C->initAncestors (alloc, J );
331
329
}
332
- auto op = I ->getOpType ();
330
+ auto op = J ->getOpType ();
333
331
// TODO: confirm that `vec` doesn't get moved if `opMap` is resized
334
332
auto &vec = opMap[op];
335
333
// consider merging with every instruction sharing an opcode
@@ -351,19 +349,19 @@ void mergeInstructions(
351
349
// invalidation, we use an indexed loop
352
350
for (size_t i = 0 ; i < numMerges; ++i) {
353
351
MergingCost *C = mergingCosts[i];
354
- if (C->getAncestors (I )->contains (other)) continue ;
352
+ if (C->getAncestors (J )->contains (other)) continue ;
355
353
// we shouldn't have to check the opposite condition
356
354
// if (C->getAncestors(other)->contains(I))
357
355
// because we are traversing in topological order
358
356
// that is, we haven't visited any descendants of `I`
359
357
// so only an ancestor had a chance
360
358
auto *MC = alloc.construct <MergingCost>(*C);
361
359
// MC is a copy of C, except we're now merging
362
- MC->merge (alloc, TTI, vectorBits, other, I );
360
+ MC->merge (alloc, TTI, vectorBits, other, J );
363
361
}
364
362
}
365
363
// descendants aren't legal merge candidates, so check before merging
366
- for (Instruction *U : I ->getUsers ()) {
364
+ for (Instruction *U : J ->getUsers ()) {
367
365
if (llvm::BasicBlock *BBU = U->getBasicBlock ()) {
368
366
if (BBU == BB) {
369
367
// fast path, skip lookup
@@ -376,7 +374,7 @@ void mergeInstructions(
376
374
}
377
375
}
378
376
// descendants aren't legal merge candidates, so push after merging
379
- vec.push_back ({I , preds});
377
+ vec.push_back ({J , preds});
380
378
// TODO: prune bad candidates from mergingCosts
381
379
}
382
380
@@ -391,21 +389,22 @@ void mergeInstructions(
391
389
// / merging as it allocates a lot of memory that it can free when it is done.
392
390
// / TODO: this algorithm is exponential in time and memory.
393
391
// / Odds are that there's way smarter things we can do.
394
- void mergeInstructions (BumpAlloc<> &alloc, Instruction::Cache &cache,
395
- Predicate::Map &predMap, llvm::TargetTransformInfo &TTI,
396
- BumpAlloc<> &tAlloc, unsigned int vectorBits) {
392
+ inline void mergeInstructions (BumpAlloc<> &alloc, Instruction::Cache &cache,
393
+ Predicate::Map &predMap,
394
+ llvm::TargetTransformInfo &TTI,
395
+ BumpAlloc<> &tAlloc, unsigned int vectorBits) {
397
396
if (!predMap.isDivergent ()) return ;
398
397
// there is a divergence in the control flow that we can ideally merge
399
398
amap<std::pair<Instruction::Intrinsic, llvm::Type *>,
400
- llvm::SmallVector <std::pair<Instruction *, Predicate::Set>>>
401
- opMap{};
399
+ BumpPtrVector <std::pair<Instruction *, Predicate::Set>>>
400
+ opMap{tAlloc };
402
401
llvm::SmallVector<MergingCost *> mergingCosts;
403
- mergingCosts.push_back (alloc. construct <MergingCost>() );
402
+ mergingCosts.emplace_back (alloc);
404
403
for (auto &pred : predMap) {
405
404
for (llvm::Instruction &lI : *pred.first ) {
406
- if (Instruction *I = cache[&lI]) {
405
+ if (Instruction *J = cache[&lI]) {
407
406
mergeInstructions (tAlloc, cache, predMap, TTI, vectorBits, opMap,
408
- mergingCosts, I , pred.first , pred.second );
407
+ mergingCosts, J , pred.first , pred.second );
409
408
}
410
409
}
411
410
}
@@ -421,19 +420,19 @@ void mergeInstructions(BumpAlloc<> &alloc, Instruction::Cache &cache,
421
420
auto [A, B] = pair;
422
421
A = reMap[A];
423
422
B = reMap[B];
424
- llvm::MutableArrayRef<Instruction *> operands =
425
- minCostStrategy->mergeOperands (
426
- A, B, MergingCost::Allocate{alloc, cache, reMap});
423
+ auto operands = minCostStrategy->mergeOperands (
424
+ A, B, MergingCost::Allocate{alloc, cache, reMap});
427
425
A->replaceAllUsesOf (B)->setOperands (operands);
428
426
reMap.remapFromTo (B, A);
429
427
}
430
428
// free memory
431
429
tAlloc.reset ();
432
430
}
433
431
434
- void mergeInstructions (BumpAlloc<> &alloc, Instruction::Cache &cache,
435
- LoopTree *loopForest, llvm::TargetTransformInfo &TTI,
436
- BumpAlloc<> &tAlloc, unsigned int vectorBits) {
432
+ inline void mergeInstructions (BumpAlloc<> &alloc, Instruction::Cache &cache,
433
+ LoopTree *loopForest,
434
+ llvm::TargetTransformInfo &TTI,
435
+ BumpAlloc<> &tAlloc, unsigned int vectorBits) {
437
436
for (auto &predMap : loopForest->getPaths ())
438
437
mergeInstructions (alloc, cache, predMap, TTI, tAlloc, vectorBits);
439
438
for (auto subLoop : loopForest->getSubLoops ())
0 commit comments