@@ -308,12 +308,13 @@ class LinearProgramLoopBlock {
308
308
// earlier level.
309
309
// Vector<bool, 256> doNotAddEdge;
310
310
// Vector<bool, 256> scheduled;
311
- [[no_unique_address]] size_t numPhiCoefs{0 };
312
- [[no_unique_address]] size_t numOmegaCoefs{0 };
313
- [[no_unique_address]] size_t numLambda{0 };
314
- [[no_unique_address]] size_t numBounding{0 };
315
- [[no_unique_address]] size_t numConstraints{0 };
316
- [[no_unique_address]] size_t numActiveEdges{0 };
311
+ [[no_unique_address]] unsigned numPhiCoefs{0 };
312
+ [[no_unique_address]] unsigned numOmegaCoefs{0 };
313
+ [[no_unique_address]] unsigned numSlack{0 };
314
+ [[no_unique_address]] unsigned numLambda{0 };
315
+ [[no_unique_address]] unsigned numBounding{0 };
316
+ [[no_unique_address]] unsigned numConstraints{0 };
317
+ [[no_unique_address]] unsigned numActiveEdges{0 };
317
318
318
319
public:
319
320
using BitSet = ::MemoryAccess::BitSet;
@@ -812,11 +813,14 @@ class LinearProgramLoopBlock {
812
813
// C, lambdas, omegas, Phis
813
814
numOmegaCoefs = 0 ;
814
815
numPhiCoefs = 0 ;
816
+ numSlack = 0 ;
815
817
for (auto &&node : nodes) {
816
818
// note, we had d > node.getNumLoops() for omegas earlier; why?
817
819
if ((d >= node.getNumLoops ()) || (!hasActiveEdges (g, node, d))) continue ;
818
- if (!node.phiIsScheduled (d))
820
+ if (!node.phiIsScheduled (d)) {
819
821
numPhiCoefs = node.updatePhiOffset (numPhiCoefs);
822
+ ++numSlack;
823
+ }
820
824
numOmegaCoefs = node.updateOmegaOffset (numOmegaCoefs);
821
825
}
822
826
}
@@ -825,25 +829,38 @@ class LinearProgramLoopBlock {
825
829
for (auto edge : edges) edge.validate ();
826
830
}
827
831
#endif
828
- // the plan is to generally avoid instantiating the omni-simplex
829
- // first, we solve individual problems
832
+ // / For now, we instantiate a dense simplex specifying the full problem.
833
+ // /
834
+ // / Eventually, the plan is to generally avoid instantiating the omni-simplex
835
+ // / first, we solve individual problems
836
+ // /
837
+ // / The order of variables in the simplex is:
838
+ // / C, lambdas, slack, omegas, Phis, w, u
839
+ // / where
840
+ // / C: constraints, rest of matrix * variables == C
841
+ // / lambdas: farkas multipliers
842
+ // / slack: slack variables from independent phi solution constraints
843
+ // / omegas: scheduling offsets
844
+ // / Phis: scheduling rotations
845
+ // / w: bounding offsets, independent of symbolic variables
846
+ // / u: bounding offsets, dependent on symbolic variables
830
847
auto instantiateOmniSimplex (const Graph &g, size_t d, bool satisfyDeps)
831
848
-> Optional<Simplex *> {
832
849
auto omniSimplex =
833
- Simplex::create (allocator, numConstraints + numOmegaCoefs ,
850
+ Simplex::create (allocator, numConstraints + numSlack ,
834
851
1 + numBounding + numActiveEdges + numPhiCoefs +
835
- 2 * numOmegaCoefs + numLambda);
852
+ numOmegaCoefs + numSlack + numLambda);
836
853
auto C{omniSimplex->getConstraints ()};
837
854
C << 0 ;
838
855
// layout of omniSimplex:
839
856
// Order: C, then rev-priority to minimize
840
- // C, lambdas, omegas, Phis, w, u
857
+ // C, lambdas, slack, omegas, Phis, w, u
841
858
// rows give constraints; each edge gets its own
842
859
// numBounding = num u
843
860
// numActiveEdges = num w
844
861
Row c = 0 ;
845
- Col l = 1 , o = 1 + numLambda, p = o + numOmegaCoefs, w = p + numPhiCoefs ,
846
- u = w + numActiveEdges;
862
+ Col l = 1 , o = 1 + numLambda + numSlack, p = o + numOmegaCoefs ,
863
+ w = p + numPhiCoefs, u = w + numActiveEdges;
847
864
for (size_t e = 0 ; e < edges.size (); ++e) {
848
865
Dependence &edge = edges[e];
849
866
if (g.isInactive (e, d)) continue ;
@@ -985,7 +1002,7 @@ class LinearProgramLoopBlock {
985
1002
-> std::optional<BitSet> {
986
1003
auto omniSimplex = instantiateOmniSimplex (g, depth, satisfyDeps);
987
1004
if (!omniSimplex) return {};
988
- auto sol = omniSimplex->rLexMinStop (numLambda);
1005
+ auto sol = omniSimplex->rLexMinStop (numLambda + numSlack );
989
1006
updateSchedules (g, depth, sol);
990
1007
return deactivateSatisfiedEdges (g, depth,
991
1008
sol[_ (numPhiCoefs + numOmegaCoefs, end)]);
@@ -1077,39 +1094,39 @@ class LinearProgramLoopBlock {
1077
1094
return 0 ;
1078
1095
}
1079
1096
void addIndependentSolutionConstraints (NotNull<Simplex> omniSimplex,
1080
- const Graph &g, size_t depth ) {
1097
+ const Graph &g, size_t d ) {
1081
1098
// omniSimplex->setNumCons(omniSimplex->getNumCons() +
1082
1099
// memory.size());
1083
1100
// omniSimplex->reserveExtraRows(memory.size());
1084
1101
auto C{omniSimplex->getConstraints ()};
1085
- size_t i = size_t {C.numRow ()} - numOmegaCoefs;
1086
- size_t o = 1 + numLambda + numOmegaCoefs;
1087
- if (depth == 0 ) {
1102
+ size_t i = size_t {C.numRow ()} - numSlack, s = numLambda,
1103
+ o = 1 + numSlack + numLambda + numOmegaCoefs;
1104
+ if (d == 0 ) {
1088
1105
// add ones >= 0
1089
1106
for (auto &&node : nodes) {
1090
- if (node.phiIsScheduled (depth ) || (!hasActiveEdges (g, node))) continue ;
1107
+ if (node.phiIsScheduled (d ) || (!hasActiveEdges (g, node, d ))) continue ;
1091
1108
C (i, 0 ) = 1 ;
1092
1109
C (i, node.getPhiOffsetRange () + o) << 1 ;
1093
- C (i++, last ) = -1 ; // for >=
1110
+ C (i++, ++s ) = -1 ; // for >=
1094
1111
}
1095
1112
} else {
1096
1113
DenseMatrix<int64_t > A, N;
1097
1114
for (auto &&node : nodes) {
1098
- if (node.phiIsScheduled (depth ) || (depth >= node.getNumLoops ()) ||
1099
- (!hasActiveEdges (g, node)))
1115
+ if (node.phiIsScheduled (d ) || (d >= node.getNumLoops ()) ||
1116
+ (!hasActiveEdges (g, node, d )))
1100
1117
continue ;
1101
- A.resizeForOverwrite (Row{size_t (node.getPhi ().numCol ())}, Col{depth });
1102
- A << node.getPhi ()(_ (0 , depth ), _).transpose ();
1118
+ A.resizeForOverwrite (Row{size_t (node.getPhi ().numCol ())}, Col{d });
1119
+ A << node.getPhi ()(_ (0 , d ), _).transpose ();
1103
1120
NormalForm::nullSpace11 (N, A);
1104
1121
C (i, 0 ) = 1 ;
1105
1122
MutPtrVector<int64_t > cc{C (i, node.getPhiOffsetRange () + o)};
1106
1123
// sum(N,dims=1) >= 1 after flipping row signs to be lex > 0
1107
1124
for (size_t m = 0 ; m < N.numRow (); ++m)
1108
1125
cc += N (m, _) * lexSign (N (m, _));
1109
- C (i++, last ) = -1 ; // for >=
1126
+ C (i++, ++s ) = -1 ; // for >=
1110
1127
}
1111
1128
}
1112
- omniSimplex->truncateConstraints ( i);
1129
+ assert ( omniSimplex->getNumCons () == i);
1113
1130
assert (!allZero (omniSimplex->getConstraints ()(last, _)));
1114
1131
}
1115
1132
[[nodiscard]] static auto nonZeroMask (const AbstractVector auto &x)
0 commit comments