5353#include " llvm/Analysis/OptimizationRemarkEmitter.h"
5454#include " llvm/Analysis/PostDominators.h"
5555#include " llvm/Analysis/ScalarEvolution.h"
56- #include " llvm/Analysis/ScalarEvolutionExpressions.h"
5756#include " llvm/Analysis/TargetTransformInfo.h"
5857#include " llvm/IR/Function.h"
5958#include " llvm/IR/Verifier.h"
@@ -102,23 +101,6 @@ STATISTIC(NumHoistedInsts, "Number of hoisted preheader instructions.");
102101STATISTIC (NumSunkInsts, " Number of hoisted preheader instructions." );
103102STATISTIC (NumDA, " DA checks passed" );
104103
105- enum FusionDependenceAnalysisChoice {
106- FUSION_DEPENDENCE_ANALYSIS_SCEV,
107- FUSION_DEPENDENCE_ANALYSIS_DA,
108- FUSION_DEPENDENCE_ANALYSIS_ALL,
109- };
110-
111- static cl::opt<FusionDependenceAnalysisChoice> FusionDependenceAnalysis (
112- " loop-fusion-dependence-analysis" ,
113- cl::desc (" Which dependence analysis should loop fusion use?" ),
114- cl::values(clEnumValN(FUSION_DEPENDENCE_ANALYSIS_SCEV, " scev" ,
115- " Use the scalar evolution interface" ),
116- clEnumValN(FUSION_DEPENDENCE_ANALYSIS_DA, " da" ,
117- " Use the dependence analysis interface" ),
118- clEnumValN(FUSION_DEPENDENCE_ANALYSIS_ALL, " all" ,
119- " Use all available analyses" )),
120- cl::Hidden, cl::init(FUSION_DEPENDENCE_ANALYSIS_DA));
121-
122104static cl::opt<unsigned > FusionPeelMaxCount (
123105 " loop-fusion-peel-max-count" , cl::init(0 ), cl::Hidden,
124106 cl::desc(" Max number of iterations to be peeled from a loop, such that "
@@ -1123,190 +1105,82 @@ struct LoopFuser {
11231105 return true ;
11241106 }
11251107
1126- // / Rewrite all additive recurrences in a SCEV to use a new loop.
1127- class AddRecLoopReplacer : public SCEVRewriteVisitor <AddRecLoopReplacer> {
1128- public:
1129- AddRecLoopReplacer (ScalarEvolution &SE, const Loop &OldL, const Loop &NewL,
1130- bool UseMax = true )
1131- : SCEVRewriteVisitor(SE), Valid(true ), UseMax(UseMax), OldL(OldL),
1132- NewL (NewL) {}
1133-
1134- const SCEV *visitAddRecExpr (const SCEVAddRecExpr *Expr) {
1135- const Loop *ExprL = Expr->getLoop ();
1136- SmallVector<SCEVUse, 2 > Operands;
1137- if (ExprL == &OldL) {
1138- append_range (Operands, Expr->operands ());
1139- return SE.getAddRecExpr (Operands, &NewL, Expr->getNoWrapFlags ());
1140- }
1141-
1142- if (OldL.contains (ExprL)) {
1143- bool Pos = SE.isKnownPositive (Expr->getStepRecurrence (SE));
1144- if (!UseMax || !Pos || !Expr->isAffine ()) {
1145- Valid = false ;
1146- return Expr;
1147- }
1148- return visit (Expr->getStart ());
1149- }
1150-
1151- for (SCEVUse Op : Expr->operands ())
1152- Operands.push_back (visit (Op));
1153- return SE.getAddRecExpr (Operands, ExprL, Expr->getNoWrapFlags ());
1154- }
1155-
1156- bool wasValidSCEV () const { return Valid; }
1157-
1158- private:
1159- bool Valid, UseMax;
1160- const Loop &OldL, &NewL;
1161- };
1162-
1163- // / Return false if the access functions of \p I0 and \p I1 could cause
1164- // / a negative dependence.
1165- bool accessDiffIsPositive (const Loop &L0, const Loop &L1, Instruction &I0,
1166- Instruction &I1, bool EqualIsInvalid) {
1167- Value *Ptr0 = getLoadStorePointerOperand (&I0);
1168- Value *Ptr1 = getLoadStorePointerOperand (&I1);
1169- if (!Ptr0 || !Ptr1)
1170- return false ;
1171-
1172- const SCEV *SCEVPtr0 = SE.getSCEVAtScope (Ptr0, &L0);
1173- const SCEV *SCEVPtr1 = SE.getSCEVAtScope (Ptr1, &L1);
1174- #ifndef NDEBUG
1175- if (VerboseFusionDebugging)
1176- LLVM_DEBUG (dbgs () << " Access function check: " << *SCEVPtr0 << " vs "
1177- << *SCEVPtr1 << " \n " );
1178- #endif
1179- AddRecLoopReplacer Rewriter (SE, L0, L1);
1180- SCEVPtr0 = Rewriter.visit (SCEVPtr0);
1181- #ifndef NDEBUG
1182- if (VerboseFusionDebugging)
1183- LLVM_DEBUG (dbgs () << " Access function after rewrite: " << *SCEVPtr0
1184- << " [Valid: " << Rewriter.wasValidSCEV () << " ]\n " );
1185- #endif
1186- if (!Rewriter.wasValidSCEV ())
1187- return false ;
1188-
1189- // TODO: isKnownPredicate doesnt work well when one SCEV is loop carried (by
1190- // L0) and the other is not. We could check if it is monotone and test
1191- // the beginning and end value instead.
1192-
1193- BasicBlock *L0Header = L0.getHeader ();
1194- auto HasNonLinearDominanceRelation = [&](const SCEV *S) {
1195- const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S);
1196- if (!AddRec)
1197- return false ;
1198- return !DT.dominates (L0Header, AddRec->getLoop ()->getHeader ()) &&
1199- !DT.dominates (AddRec->getLoop ()->getHeader (), L0Header);
1200- };
1201- if (SCEVExprContains (SCEVPtr1, HasNonLinearDominanceRelation))
1202- return false ;
1203-
1204- ICmpInst::Predicate Pred =
1205- EqualIsInvalid ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_SGE;
1206- bool IsAlwaysGE = SE.isKnownPredicate (Pred, SCEVPtr0, SCEVPtr1);
1207- #ifndef NDEBUG
1208- if (VerboseFusionDebugging)
1209- LLVM_DEBUG (dbgs () << " Relation: " << *SCEVPtr0
1210- << (IsAlwaysGE ? " >= " : " may < " ) << *SCEVPtr1
1211- << " \n " );
1212- #endif
1213- return IsAlwaysGE;
1214- }
1215-
12161108 // / Return true if the dependences between @p I0 (in @p L0) and @p I1 (in
1217- // / @p L1) allow loop fusion of @p L0 and @p L1. The dependence analyses
1218- // / specified by @p DepChoice are used to determine this.
1109+ // / @p L1) allow loop fusion of @p L0 and @p L1.
12191110 bool dependencesAllowFusion (const FusionCandidate &FC0,
12201111 const FusionCandidate &FC1, Instruction &I0,
1221- Instruction &I1, bool AnyDep,
1222- FusionDependenceAnalysisChoice DepChoice) {
1112+ Instruction &I1) {
12231113#ifndef NDEBUG
12241114 if (VerboseFusionDebugging) {
1225- LLVM_DEBUG (dbgs () << " Check dep: " << I0 << " vs " << I1 << " : "
1226- << DepChoice << " \n " );
1115+ LLVM_DEBUG (dbgs () << " Check dep: " << I0 << " vs " << I1 << " \n " );
12271116 }
12281117#endif
1229- switch (DepChoice) {
1230- case FUSION_DEPENDENCE_ANALYSIS_SCEV:
1231- return accessDiffIsPositive (*FC0.L , *FC1.L , I0, I1, AnyDep);
1232- case FUSION_DEPENDENCE_ANALYSIS_DA: {
1233- auto DepResult = DI.depends (&I0, &I1);
1234- if (!DepResult)
1235- return true ;
1118+ auto DepResult = DI.depends (&I0, &I1);
1119+ if (!DepResult)
1120+ return true ;
12361121#ifndef NDEBUG
1237- if (VerboseFusionDebugging) {
1238- LLVM_DEBUG (dbgs () << " DA res: " ; DepResult->dump (dbgs ());
1239- dbgs () << " [#l: " << DepResult->getLevels () << " ][Ordered: "
1240- << (DepResult->isOrdered () ? " true" : " false" )
1241- << " ]\n " );
1242- LLVM_DEBUG (dbgs () << " DepResult Levels: " << DepResult->getLevels ()
1243- << " \n " );
1244- }
1122+ if (VerboseFusionDebugging) {
1123+ LLVM_DEBUG (dbgs () << " DA res: " ; DepResult->dump (dbgs ());
1124+ dbgs () << " [#l: " << DepResult->getLevels () << " ][Ordered: "
1125+ << (DepResult->isOrdered () ? " true" : " false" )
1126+ << " ]\n " );
1127+ LLVM_DEBUG (dbgs () << " DepResult Levels: " << DepResult->getLevels ()
1128+ << " \n " );
1129+ }
12451130#endif
1246- unsigned Levels = DepResult->getLevels ();
1247- unsigned SameSDLevels = DepResult->getSameSDLevels ();
1248- unsigned CurLoopLevel = FC0.L ->getLoopDepth ();
1249-
1250- // Check if DA is missing info regarding the current loop level
1251- if (CurLoopLevel > Levels + SameSDLevels)
1252- return false ;
1253-
1254- // Iterating over the outer levels.
1255- for (unsigned Level = 1 ; Level <= std::min (CurLoopLevel - 1 , Levels);
1256- ++Level) {
1257- unsigned Direction = DepResult->getDirection (Level, false );
1258-
1259- // Check if the direction vector does not include equality. If an outer
1260- // loop has a non-equal direction, outer indicies are different and it
1261- // is safe to fuse.
1262- if (!(Direction & Dependence::DVEntry::EQ)) {
1263- LLVM_DEBUG (dbgs () << " Safe to fuse due to non-equal acceses in the "
1264- " outer loops\n " );
1265- NumDA++;
1266- return true ;
1267- }
1268- }
1131+ unsigned Levels = DepResult->getLevels ();
1132+ unsigned SameSDLevels = DepResult->getSameSDLevels ();
1133+ unsigned CurLoopLevel = FC0.L ->getLoopDepth ();
12691134
1270- assert (CurLoopLevel > Levels && " Fusion candidates are not separated" );
1135+ // Check if DA is missing info regarding the current loop level
1136+ if (CurLoopLevel > Levels + SameSDLevels)
1137+ return false ;
12711138
1272- if (DepResult->isScalar (CurLoopLevel, true ) && !DepResult->isAnti ()) {
1273- LLVM_DEBUG (dbgs () << " Safe to fuse due to a loop-invariant non-anti "
1274- " dependency\n " );
1139+ // Iterating over the outer levels.
1140+ for (unsigned Level = 1 ; Level <= std::min (CurLoopLevel - 1 , Levels);
1141+ ++Level) {
1142+ unsigned Direction = DepResult->getDirection (Level, false );
1143+
1144+ // Check if the direction vector does not include equality. If an outer
1145+ // loop has a non-equal direction, outer indicies are different and it
1146+ // is safe to fuse.
1147+ if (!(Direction & Dependence::DVEntry::EQ)) {
1148+ LLVM_DEBUG (dbgs () << " Safe to fuse due to non-equal acceses in the "
1149+ " outer loops\n " );
12751150 NumDA++;
12761151 return true ;
12771152 }
1153+ }
12781154
1279- unsigned CurDir = DepResult->getDirection (CurLoopLevel, true );
1280-
1281- // Check if the direction vector does not include greater direction. In
1282- // that case, the dependency is not a backward loop-carried and is legal
1283- // to fuse. For example here we have a forward dependency
1284- // for (int i = 0; i < n; i++)
1285- // A[i] = ...;
1286- // for (int i = 0; i < n; i++)
1287- // ... = A[i-1];
1288- if (!(CurDir & Dependence::DVEntry::GT)) {
1289- LLVM_DEBUG (dbgs () << " Safe to fuse with no backward loop-carried "
1290- " dependency\n " );
1291- NumDA++;
1292- return true ;
1293- }
1155+ assert (CurLoopLevel > Levels && " Fusion candidates are not separated" );
12941156
1295- if (DepResult->getNextPredecessor () || DepResult->getNextSuccessor ())
1296- LLVM_DEBUG (
1297- dbgs () << " TODO: Implement pred/succ dependence handling! \n " );
1298-
1299- return false ;
1157+ if (DepResult->isScalar (CurLoopLevel, true ) && ! DepResult->isAnti ()) {
1158+ LLVM_DEBUG (dbgs () << " Safe to fuse due to a loop-invariant non-anti "
1159+ " dependency \n " );
1160+ NumDA++;
1161+ return true ;
13001162 }
13011163
1302- case FUSION_DEPENDENCE_ANALYSIS_ALL:
1303- return dependencesAllowFusion (FC0, FC1, I0, I1, AnyDep,
1304- FUSION_DEPENDENCE_ANALYSIS_SCEV) ||
1305- dependencesAllowFusion (FC0, FC1, I0, I1, AnyDep,
1306- FUSION_DEPENDENCE_ANALYSIS_DA);
1164+ unsigned CurDir = DepResult->getDirection (CurLoopLevel, true );
1165+
1166+ // Check if the direction vector does not include greater direction. In
1167+ // that case, the dependency is not a backward loop-carried and is legal
1168+ // to fuse. For example here we have a forward dependency
1169+ // for (int i = 0; i < n; i++)
1170+ // A[i] = ...;
1171+ // for (int i = 0; i < n; i++)
1172+ // ... = A[i-1];
1173+ if (!(CurDir & Dependence::DVEntry::GT)) {
1174+ LLVM_DEBUG (dbgs () << " Safe to fuse with no backward loop-carried "
1175+ " dependency\n " );
1176+ NumDA++;
1177+ return true ;
13071178 }
13081179
1309- llvm_unreachable (" Unknown fusion dependence analysis choice!" );
1180+ if (DepResult->getNextPredecessor () || DepResult->getNextSuccessor ())
1181+ LLVM_DEBUG (dbgs () << " TODO: Implement pred/succ dependence handling!\n " );
1182+
1183+ return false ;
13101184 }
13111185
13121186 // / Perform a dependence check and return if @p FC0 and @p FC1 can be fused.
@@ -1319,30 +1193,22 @@ struct LoopFuser {
13191193
13201194 for (Instruction *WriteL0 : FC0.MemWrites ) {
13211195 for (Instruction *WriteL1 : FC1.MemWrites )
1322- if (!dependencesAllowFusion (FC0, FC1, *WriteL0, *WriteL1,
1323- /* AnyDep */ false ,
1324- FusionDependenceAnalysis)) {
1196+ if (!dependencesAllowFusion (FC0, FC1, *WriteL0, *WriteL1)) {
13251197 return false ;
13261198 }
13271199 for (Instruction *ReadL1 : FC1.MemReads )
1328- if (!dependencesAllowFusion (FC0, FC1, *WriteL0, *ReadL1,
1329- /* AnyDep */ false ,
1330- FusionDependenceAnalysis)) {
1200+ if (!dependencesAllowFusion (FC0, FC1, *WriteL0, *ReadL1)) {
13311201 return false ;
13321202 }
13331203 }
13341204
13351205 for (Instruction *WriteL1 : FC1.MemWrites ) {
13361206 for (Instruction *WriteL0 : FC0.MemWrites )
1337- if (!dependencesAllowFusion (FC0, FC1, *WriteL0, *WriteL1,
1338- /* AnyDep */ false ,
1339- FusionDependenceAnalysis)) {
1207+ if (!dependencesAllowFusion (FC0, FC1, *WriteL0, *WriteL1)) {
13401208 return false ;
13411209 }
13421210 for (Instruction *ReadL0 : FC0.MemReads )
1343- if (!dependencesAllowFusion (FC0, FC1, *ReadL0, *WriteL1,
1344- /* AnyDep */ false ,
1345- FusionDependenceAnalysis)) {
1211+ if (!dependencesAllowFusion (FC0, FC1, *ReadL0, *WriteL1)) {
13461212 return false ;
13471213 }
13481214 }
0 commit comments