Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit cf33a1d

Browse files
committed
Reach test success parity with main
remarks test answers on main are junk.
1 parent e3532c1 commit cf33a1d

File tree

10 files changed

+79
-65
lines changed

10 files changed

+79
-65
lines changed

include/ControlFlowMerging.hpp

Lines changed: 40 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,11 @@
77
#include "BitSets.hpp"
88
#include "Containers/BumpMapSet.hpp"
99
#include "Utilities/Allocators.hpp"
10+
#include <Math/BumpVector.hpp>
1011
#include <cassert>
1112
#include <cstddef>
1213
#include <cstdint>
1314
#include <llvm/ADT/ArrayRef.h>
14-
1515
#include <llvm/ADT/SmallPtrSet.h>
1616
#include <llvm/ADT/SmallVector.h>
1717
#include <llvm/IR/BasicBlock.h>
@@ -21,8 +21,8 @@
2121
#include <set>
2222
#include <sys/select.h>
2323

24-
void buildInstructionGraph(BumpAlloc<> &alloc, Instruction::Cache &cache,
25-
LinearProgramLoopBlock &LB) {
24+
inline void buildInstructionGraph(BumpAlloc<> &alloc, Instruction::Cache &cache,
25+
LinearProgramLoopBlock &LB) {
2626
for (auto &node : LB.getNodes()) {
2727
auto access = node.getMemAccesses(alloc, LB.getMemoryAccesses());
2828
for (auto *mem : access) {
@@ -45,11 +45,11 @@ inline void merge(aset<Instruction *> &merged, aset<Instruction *> &toMerge) {
4545
}
4646
struct ReMapper {
4747
map<Instruction *, Instruction *> reMap;
48-
auto operator[](Instruction *I) -> Instruction * {
49-
if (auto f = reMap.find(I); f != reMap.end()) return f->second;
50-
return I;
48+
auto operator[](Instruction *J) -> Instruction * {
49+
if (auto f = reMap.find(J); f != reMap.end()) return f->second;
50+
return J;
5151
}
52-
void remapFromTo(Instruction *I, Instruction *J) { reMap[I] = J; }
52+
void remapFromTo(Instruction *K, Instruction *J) { reMap[K] = J; }
5353
};
5454

5555
// represents the cost of merging key=>values; cost is hopefully negative.
@@ -119,10 +119,10 @@ struct MergingCost {
119119
// however, isMerged(I, J) == isMerged(J, I)
120120
// so we ignore easily swappable parameters
121121
// NOLINTNEXTLINE(bugprone-easily-swappable-parameters)
122-
auto isMerged(Instruction *I, Instruction *J) const -> bool {
122+
auto isMerged(Instruction *L, Instruction *J) const -> bool {
123123
Instruction *K = J;
124124
do {
125-
if (I == K) return true;
125+
if (L == K) return true;
126126
K = findMerge(K);
127127
} while (K && K != J);
128128
return false;
@@ -160,10 +160,8 @@ struct MergingCost {
160160
BumpAlloc<> &alloc;
161161
Instruction::Cache &cache;
162162
ReMapper &reMap;
163-
llvm::MutableArrayRef<Instruction *> operands;
164-
constexpr operator llvm::MutableArrayRef<Instruction *>() const {
165-
return operands;
166-
}
163+
MutPtrVector<Instruction *> operands;
164+
constexpr operator MutPtrVector<Instruction *>() const { return operands; }
167165
void merge(size_t i, Instruction *A, Instruction *B) {
168166
operands[i] = reMap[A]->replaceAllUsesOf(reMap[B]);
169167
}
@@ -178,7 +176,7 @@ struct MergingCost {
178176
static auto init(Allocate a, Instruction *A) -> SelectAllocator {
179177
size_t numOps = A->getNumOperands();
180178
auto **operandsPtr = a.alloc.allocate<Instruction *>(numOps);
181-
llvm::MutableArrayRef<Instruction *> operands(operandsPtr, numOps);
179+
MutPtrVector<Instruction *> operands{operandsPtr, numOps};
182180
return SelectAllocator{a.alloc, a.cache, a.reMap, operands};
183181
}
184182
static auto init(Count, Instruction *) -> SelectCounter {
@@ -212,8 +210,8 @@ struct MergingCost {
212210
// so we need to check if any operand pairs are merged with each other.
213211
// note `isMerged(a,a) == true`, so that's the one query we need to use.
214212
auto selector = init(selects, A);
215-
llvm::MutableArrayRef<Instruction *> operandsA = A->getOperands();
216-
llvm::MutableArrayRef<Instruction *> operandsB = B->getOperands();
213+
MutPtrVector<Instruction *> operandsA = A->getOperands();
214+
MutPtrVector<Instruction *> operandsB = B->getOperands();
217215
size_t numOperands = operandsA.size();
218216
assert(numOperands == operandsB.size());
219217
uint8_t associativeOpsFlag = B->associativeOperandsFlag();
@@ -316,20 +314,20 @@ struct MergingCost {
316314
}
317315
};
318316

319-
void mergeInstructions(
317+
inline void mergeInstructions(
320318
BumpAlloc<> &alloc, Instruction::Cache &cache, Predicate::Map &predMap,
321319
llvm::TargetTransformInfo &TTI, unsigned int vectorBits,
322320
amap<std::pair<Instruction::Intrinsic, llvm::Type *>,
323-
llvm::SmallVector<std::pair<Instruction *, Predicate::Set>>> &opMap,
324-
llvm::SmallVectorImpl<MergingCost *> &mergingCosts, Instruction *I,
321+
BumpPtrVector<std::pair<Instruction *, Predicate::Set>>> &opMap,
322+
llvm::SmallVectorImpl<MergingCost *> &mergingCosts, Instruction *J,
325323
llvm::BasicBlock *BB, Predicate::Set &preds) {
326324
// have we already visited?
327-
if (mergingCosts.front()->visited(I)) return;
325+
if (mergingCosts.front()->visited(J)) return;
328326
for (auto C : mergingCosts) {
329-
if (C->visited(I)) return;
330-
C->initAncestors(alloc, I);
327+
if (C->visited(J)) return;
328+
C->initAncestors(alloc, J);
331329
}
332-
auto op = I->getOpType();
330+
auto op = J->getOpType();
333331
// TODO: confirm that `vec` doesn't get moved if `opMap` is resized
334332
auto &vec = opMap[op];
335333
// consider merging with every instruction sharing an opcode
@@ -351,19 +349,19 @@ void mergeInstructions(
351349
// invalidation, we use an indexed loop
352350
for (size_t i = 0; i < numMerges; ++i) {
353351
MergingCost *C = mergingCosts[i];
354-
if (C->getAncestors(I)->contains(other)) continue;
352+
if (C->getAncestors(J)->contains(other)) continue;
355353
// we shouldn't have to check the opposite condition
356354
// if (C->getAncestors(other)->contains(I))
357355
// because we are traversing in topological order
358356
// that is, we haven't visited any descendants of `I`
359357
// so only an ancestor had a chance
360358
auto *MC = alloc.construct<MergingCost>(*C);
361359
// MC is a copy of C, except we're now merging
362-
MC->merge(alloc, TTI, vectorBits, other, I);
360+
MC->merge(alloc, TTI, vectorBits, other, J);
363361
}
364362
}
365363
// descendants aren't legal merge candidates, so check before merging
366-
for (Instruction *U : I->getUsers()) {
364+
for (Instruction *U : J->getUsers()) {
367365
if (llvm::BasicBlock *BBU = U->getBasicBlock()) {
368366
if (BBU == BB) {
369367
// fast path, skip lookup
@@ -376,7 +374,7 @@ void mergeInstructions(
376374
}
377375
}
378376
// descendants aren't legal merge candidates, so push after merging
379-
vec.push_back({I, preds});
377+
vec.push_back({J, preds});
380378
// TODO: prune bad candidates from mergingCosts
381379
}
382380

@@ -391,21 +389,22 @@ void mergeInstructions(
391389
/// merging as it allocates a lot of memory that it can free when it is done.
392390
/// TODO: this algorithm is exponential in time and memory.
393391
/// Odds are that there's way smarter things we can do.
394-
void mergeInstructions(BumpAlloc<> &alloc, Instruction::Cache &cache,
395-
Predicate::Map &predMap, llvm::TargetTransformInfo &TTI,
396-
BumpAlloc<> &tAlloc, unsigned int vectorBits) {
392+
inline void mergeInstructions(BumpAlloc<> &alloc, Instruction::Cache &cache,
393+
Predicate::Map &predMap,
394+
llvm::TargetTransformInfo &TTI,
395+
BumpAlloc<> &tAlloc, unsigned int vectorBits) {
397396
if (!predMap.isDivergent()) return;
398397
// there is a divergence in the control flow that we can ideally merge
399398
amap<std::pair<Instruction::Intrinsic, llvm::Type *>,
400-
llvm::SmallVector<std::pair<Instruction *, Predicate::Set>>>
401-
opMap{};
399+
BumpPtrVector<std::pair<Instruction *, Predicate::Set>>>
400+
opMap{tAlloc};
402401
llvm::SmallVector<MergingCost *> mergingCosts;
403-
mergingCosts.push_back(alloc.construct<MergingCost>());
402+
mergingCosts.emplace_back(alloc);
404403
for (auto &pred : predMap) {
405404
for (llvm::Instruction &lI : *pred.first) {
406-
if (Instruction *I = cache[&lI]) {
405+
if (Instruction *J = cache[&lI]) {
407406
mergeInstructions(tAlloc, cache, predMap, TTI, vectorBits, opMap,
408-
mergingCosts, I, pred.first, pred.second);
407+
mergingCosts, J, pred.first, pred.second);
409408
}
410409
}
411410
}
@@ -421,19 +420,19 @@ void mergeInstructions(BumpAlloc<> &alloc, Instruction::Cache &cache,
421420
auto [A, B] = pair;
422421
A = reMap[A];
423422
B = reMap[B];
424-
llvm::MutableArrayRef<Instruction *> operands =
425-
minCostStrategy->mergeOperands(
426-
A, B, MergingCost::Allocate{alloc, cache, reMap});
423+
auto operands = minCostStrategy->mergeOperands(
424+
A, B, MergingCost::Allocate{alloc, cache, reMap});
427425
A->replaceAllUsesOf(B)->setOperands(operands);
428426
reMap.remapFromTo(B, A);
429427
}
430428
// free memory
431429
tAlloc.reset();
432430
}
433431

434-
void mergeInstructions(BumpAlloc<> &alloc, Instruction::Cache &cache,
435-
LoopTree *loopForest, llvm::TargetTransformInfo &TTI,
436-
BumpAlloc<> &tAlloc, unsigned int vectorBits) {
432+
inline void mergeInstructions(BumpAlloc<> &alloc, Instruction::Cache &cache,
433+
LoopTree *loopForest,
434+
llvm::TargetTransformInfo &TTI,
435+
BumpAlloc<> &tAlloc, unsigned int vectorBits) {
437436
for (auto &predMap : loopForest->getPaths())
438437
mergeInstructions(alloc, cache, predMap, TTI, tAlloc, vectorBits);
439438
for (auto subLoop : loopForest->getSubLoops())

include/Instruction.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,7 @@ struct Instruction {
173173
[[no_unique_address]] LinAlg::BumpPtrVector<RecipThroughputLatency> costs;
174174

175175
void setOperands(MutPtrVector<Instruction *> ops) {
176-
operands = ops;
176+
operands << ops;
177177
for (auto op : ops) op->users.insert(this);
178178
}
179179

@@ -1311,6 +1311,7 @@ struct Map {
13111311
}
13121312

13131313
}; // struct Map
1314+
13141315
} // namespace Predicate
13151316

13161317
inline auto Instruction::Cache::getInstruction(BumpAlloc<> &alloc,

include/LoopBlock.hpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -71,8 +71,7 @@ struct ScheduledNode {
7171
addMemory(sId, store, nodeIndex);
7272
}
7373
[[nodiscard]] constexpr auto
74-
getMemAccesses(BumpAlloc<> &alloc,
75-
llvm::ArrayRef<MemoryAccess *> memAccess) const
74+
getMemAccesses(BumpAlloc<> &alloc, PtrVector<MemoryAccess *> memAccess) const
7675
-> Vector<Address *> {
7776
// First, we invert the schedule matrix.
7877
SquarePtrMatrix<int64_t> Phi = schedule.getPhi();
@@ -110,6 +109,7 @@ struct ScheduledNode {
110109
constexpr void addInNeighbor(unsigned int i) { inNeighbors.insert(i); }
111110
constexpr void init(BumpAlloc<> &alloc) {
112111
schedule = AffineSchedule(alloc, getNumLoops());
112+
schedule.getFusionOmega() << 0;
113113
}
114114
constexpr void addMemory(unsigned memId, MemoryAccess *mem,
115115
unsigned nodeIndex) {
@@ -1042,7 +1042,7 @@ class LinearProgramLoopBlock {
10421042
for (auto &&node : nodes) {
10431043
if (depth >= node.getNumLoops()) continue;
10441044
if (!hasActiveEdges(g, node)) {
1045-
node.getOffsetOmega()[depth] = std::numeric_limits<int64_t>::min();
1045+
node.getOffsetOmega(depth) = std::numeric_limits<int64_t>::min();
10461046
if (!node.phiIsScheduled(depth))
10471047
node.getSchedule(depth) << std::numeric_limits<int64_t>::min();
10481048
continue;
@@ -1219,7 +1219,7 @@ class LinearProgramLoopBlock {
12191219
Graph &gi = graphs[i];
12201220
if (!canFuse(*gp, gi, d)) {
12211221
// do not fuse
1222-
for (auto &&v : *gp) v.getFusionOmega()[d] = unfusedOffset;
1222+
for (auto &&v : *gp) v.getFusionOmega(d) = unfusedOffset;
12231223
++unfusedOffset;
12241224
// gi is the new base graph
12251225
gp = &gi;
@@ -1228,7 +1228,7 @@ class LinearProgramLoopBlock {
12281228
(*gp) |= gi;
12291229
}
12301230
// set omegas for gp
1231-
for (auto &&v : *gp) v.getFusionOmega()[d] = unfusedOffset;
1231+
for (auto &&v : *gp) v.getFusionOmega(d) = unfusedOffset;
12321232
++d;
12331233
// size_t numSat = satDeps.size();
12341234
for (auto i : baseGraphs)

include/LoopForest.hpp

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
#include <cstddef>
99
#include <iterator>
1010
#include <limits>
11-
1211
#include <llvm/ADT/SmallVector.h>
1312
#include <llvm/Analysis/LoopInfo.h>
1413
#include <llvm/Analysis/ScalarEvolution.h>
@@ -36,6 +35,10 @@ struct LoopTree {
3635
[[no_unique_address]] Optional<LoopTree *> parentLoop{nullptr};
3736
[[no_unique_address]] llvm::SmallVector<NotNull<MemoryAccess>> memAccesses{};
3837

38+
~LoopTree() {
39+
for (auto subLoop : subLoops) subLoop->~LoopTree();
40+
}
41+
3942
auto getPaths() -> llvm::MutableArrayRef<Predicate::Map> { return paths; }
4043
auto getPaths() const -> llvm::ArrayRef<Predicate::Map> { return paths; }
4144
auto getSubLoops() -> llvm::MutableArrayRef<NotNull<LoopTree>> {
@@ -56,8 +59,8 @@ struct LoopTree {
5659
}
5760
// LoopTree(const LoopTree &) = default;
5861
// LoopTree(LoopTree &&) = default;
59-
auto operator=(const LoopTree &) -> LoopTree & = default;
60-
auto operator=(LoopTree &&) -> LoopTree & = default;
62+
auto operator=(const LoopTree &) -> LoopTree & = delete;
63+
auto operator=(LoopTree &&) -> LoopTree & = delete;
6164
LoopTree(llvm::SmallVector<NotNull<LoopTree>> sL,
6265
llvm::SmallVector<Predicate::Map> pth)
6366
: loop(nullptr), subLoops(std::move(sL)), paths(std::move(pth)) {}
@@ -109,9 +112,9 @@ struct LoopTree {
109112
[[nodiscard]] auto size() const -> size_t { return subLoops.size(); }
110113

111114
static void split(BumpAlloc<> &alloc,
112-
llvm::SmallVectorImpl<NotNull<LoopTree>> &trees,
113-
llvm::SmallVectorImpl<Predicate::Map> &paths,
114-
llvm::SmallVectorImpl<NotNull<LoopTree>> &subTree) {
115+
llvm::SmallVector<NotNull<LoopTree>> &trees,
116+
llvm::SmallVector<Predicate::Map> &paths,
117+
llvm::SmallVector<NotNull<LoopTree>> &subTree) {
115118
if (subTree.size()) {
116119
assert(1 + subTree.size() == paths.size());
117120
auto *newTree =

include/Loops.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -509,7 +509,7 @@ struct AffineLoopNest
509509
for (size_t i = innermostLoopInd; i < numToRemove + innermostLoopInd;
510510
++i)
511511
A(m, i) = A(m, i + numRemainingLoops);
512-
A(m, _(numToRemove + innermostLoopInd, N)) = tmp;
512+
A(m, _(numToRemove + innermostLoopInd, N)) << tmp;
513513
}
514514
}
515515
} else

include/Math/Array.hpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -283,7 +283,8 @@ struct MutArray : Array<T, S>, ArrayOps<T, S, MutArray<T, S>> {
283283

284284
constexpr MutArray(const MutArray &) = default;
285285
constexpr MutArray(MutArray &&) noexcept = default;
286-
constexpr auto operator=(const MutArray &) -> MutArray & = default;
286+
constexpr auto operator=(const MutArray &) -> MutArray & = delete;
287+
// constexpr auto operator=(const MutArray &) -> MutArray & = default;
287288
constexpr auto operator=(MutArray &&) noexcept -> MutArray & = default;
288289

289290
constexpr void truncate(S nz) {
@@ -851,7 +852,8 @@ struct ReallocView : ResizeableView<T, S, U> {
851852
#else
852853
T *newPtr = allocator.allocate(newCapacity);
853854
#endif
854-
if (U oldLen = U(this->sz)) std::copy_n(this->data(), oldLen, newPtr);
855+
if (U oldLen = U(this->sz))
856+
std::uninitialized_copy_n(this->data(), oldLen, newPtr);
855857
maybeDeallocate(newPtr, newCapacity);
856858
}
857859
[[nodiscard]] constexpr auto get_allocator() const noexcept -> A {

include/Math/BumpVector.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -273,3 +273,4 @@ template <typename T, unsigned InitialCapacity = 8> struct BumpPtrVector {
273273
static_assert(std::is_trivially_destructible_v<MutPtrVector<int64_t>>);
274274
static_assert(std::is_trivially_destructible_v<BumpPtrVector<int64_t>>);
275275
} // namespace LinAlg
276+
using LinAlg::BumpPtrVector;

include/Math/Constraints.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -254,7 +254,7 @@ constexpr void slackEqualityConstraints(MutPtrMatrix<int64_t> C,
254254
// counts how many negative and positive elements there are in row `i`.
255255
// A row corresponds to a particular variable in `A'x <= b`.
256256
constexpr auto countNonZeroSign(DensePtrMatrix<int64_t> A, size_t i)
257-
-> std::pair<size_t, size_t> {
257+
-> std::array<size_t, 2> {
258258
size_t numNeg = 0;
259259
size_t numPos = 0;
260260
Row numRow = A.numRow();
@@ -263,7 +263,7 @@ constexpr auto countNonZeroSign(DensePtrMatrix<int64_t> A, size_t i)
263263
numNeg += (Aij < 0);
264264
numPos += (Aij > 0);
265265
}
266-
return std::make_pair(numNeg, numPos);
266+
return {numNeg, numPos};
267267
}
268268

269269
/// x == 0 -> 0, x < 0 -> 1, x > 0 -> 2

0 commit comments

Comments
 (0)