diff --git a/src/coreclr/jit/bitsetasshortlong.h b/src/coreclr/jit/bitsetasshortlong.h index 5a2e315673cbe0..2ef293820fd264 100644 --- a/src/coreclr/jit/bitsetasshortlong.h +++ b/src/coreclr/jit/bitsetasshortlong.h @@ -561,6 +561,124 @@ class BitSetOps + static bool VisitBits(Env env, BitSetShortLongRep bs, TFunc func) + { +#ifdef HOST_64BIT +#define BitScanForwardSizeT BitScanForward64 +#else +#define BitScanForwardSizeT BitScanForward +#endif + + if (BitSetOps::IsShort(env)) + { + size_t bits = reinterpret_cast(bs); + DWORD index; + while (BitScanForwardSizeT(&index, bits)) + { + if (!func(index)) + return false; + + bits ^= size_t(1) << index; + } + } + else + { + unsigned len = BitSetTraits::GetArrSize(env); + for (unsigned i = 0; i < len; i++) + { + size_t bits = bs[i]; + DWORD index; + while (BitScanForwardSizeT(&index, bits)) + { + if (!func(i * BitsInSizeT + index)) + return false; + + bits ^= size_t(1) << index; + } + } + } + + return true; +#undef BitScanForwardSizeT + } + + //------------------------------------------------------------------------ + // VisitBitsReverse: Invoke a callback for each index that is set in the + // bit vector, in descending order of indices. + // + // Type parameters: + // TFunc - Type of callback functor + // + // Arguments: + // env - The traits + // bs - The bit vector + // func - The functor callback. Return true to continue to the next bit, + // and false to abort. + // + // Returns: + // True if all bits were iterated; false if the callback returned false + // and iteration was aborted. + // + template + static bool VisitBitsReverse(Env env, BitSetShortLongRep bs, TFunc func) + { +#ifdef HOST_64BIT +#define BitScanReverseSizeT BitScanReverse64 +#else +#define BitScanReverseSizeT BitScanReverse +#endif + + if (BitSetOps::IsShort(env)) + { + size_t bits = reinterpret_cast(bs); + DWORD index; + while (BitScanReverseSizeT(&index, bits)) + { + if (!func(index)) + return false; + + bits ^= size_t(1) << index; + } + } + else + { + unsigned len = BitSetTraits::GetArrSize(env); + for (unsigned i = len; i != 0; i--) + { + size_t bits = bs[i - 1]; + DWORD index; + while (BitScanReverseSizeT(&index, bits)) + { + if (!func((i - 1) * BitsInSizeT + index)) + return false; + + bits ^= size_t(1) << index; + } + } + } + + return true; +#undef BitScanReverseSizeT + } + typedef const BitSetShortLongRep& ValArgType; typedef BitSetShortLongRep RetValType; }; diff --git a/src/coreclr/jit/block.h b/src/coreclr/jit/block.h index 307b3580f92942..9a7df5016c49d9 100644 --- a/src/coreclr/jit/block.h +++ b/src/coreclr/jit/block.h @@ -2097,7 +2097,7 @@ class AllSuccessorEnumerator } // Returns the next available successor or `nullptr` if there are no more successors. - BasicBlock* NextSuccessor(Compiler* comp) + BasicBlock* NextSuccessor() { m_curSucc++; if (m_curSucc >= m_numSuccs) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index d3eabe34466c4d..745372a33b02e2 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -1957,6 +1957,197 @@ inline LoopFlags& operator&=(LoopFlags& a, LoopFlags b) return a = (LoopFlags)((unsigned short)a & (unsigned short)b); } +// Represents a depth-first search tree of the flow graph. +class FlowGraphDfsTree +{ + Compiler* m_comp; + BasicBlock** m_postOrder; + unsigned m_postOrderCount; + +public: + FlowGraphDfsTree(Compiler* comp, BasicBlock** postOrder, unsigned postOrderCount) + : m_comp(comp) + , m_postOrder(postOrder) + , m_postOrderCount(postOrderCount) + { + } + + Compiler* GetCompiler() const + { + return m_comp; + } + + BasicBlock** GetPostOrder() const + { + return m_postOrder; + } + + unsigned GetPostOrderCount() const + { + return m_postOrderCount; + } + + BitVecTraits PostOrderTraits() const + { + return BitVecTraits(m_postOrderCount, m_comp); + } + + bool Contains(BasicBlock* block) const; + bool IsAncestor(BasicBlock* ancestor, BasicBlock* descendant) const; +}; + +class FlowGraphNaturalLoop +{ + friend class FlowGraphNaturalLoops; + + const FlowGraphDfsTree* m_tree; + BasicBlock* m_header; + FlowGraphNaturalLoop* m_parent = nullptr; + // Bit vector of blocks in the loop; each index is the RPO index a block, + // with the head block's RPO index subtracted. + BitVec m_blocks; + unsigned m_blocksSize = 0; + jitstd::vector m_backEdges; + jitstd::vector m_entryEdges; + jitstd::vector m_exitEdges; + unsigned m_index = 0; + + FlowGraphNaturalLoop(const FlowGraphDfsTree* tree, BasicBlock* head); + + unsigned LoopBlockBitVecIndex(BasicBlock* block); + bool TryGetLoopBlockBitVecIndex(BasicBlock* block, unsigned* pIndex); + + BitVecTraits LoopBlockTraits(); +public: + BasicBlock* GetHeader() const + { + return m_header; + } + + const FlowGraphDfsTree* GetDfsTree() const + { + return m_tree; + } + + FlowGraphNaturalLoop* GetParent() const + { + return m_parent; + } + + unsigned GetIndex() const + { + return m_index; + } + + const jitstd::vector& BackEdges() + { + return m_backEdges; + } + + const jitstd::vector& EntryEdges() + { + return m_entryEdges; + } + + const jitstd::vector& ExitEdges() + { + return m_exitEdges; + } + + bool ContainsBlock(BasicBlock* block); + + template + BasicBlockVisit VisitLoopBlocksReversePostOrder(TFunc func); + + template + BasicBlockVisit VisitLoopBlocksPostOrder(TFunc func); + + template + BasicBlockVisit VisitLoopBlocks(TFunc func); +}; + +class FlowGraphNaturalLoops +{ + const FlowGraphDfsTree* m_dfs; + jitstd::vector m_loops; + unsigned m_improperLoopHeaders = 0; + + FlowGraphNaturalLoops(const FlowGraphDfsTree* dfs); + + static bool FindNaturalLoopBlocks(FlowGraphNaturalLoop* loop, jitstd::list& worklist); +public: + size_t NumLoops() + { + return m_loops.size(); + } + + bool HaveNonNaturalLoopCycles() + { + return m_improperLoopHeaders > 0; + } + + FlowGraphNaturalLoop* GetLoopFromHeader(BasicBlock* header); + + bool IsLoopBackEdge(FlowEdge* edge); + bool IsLoopExitEdge(FlowEdge* edge); + + class LoopsPostOrderIter + { + jitstd::vector* m_loops; + + public: + LoopsPostOrderIter(jitstd::vector* loops) + : m_loops(loops) + { + } + + jitstd::vector::reverse_iterator begin() + { + return m_loops->rbegin(); + } + + jitstd::vector::reverse_iterator end() + { + return m_loops->rend(); + } + }; + + class LoopsReversePostOrderIter + { + jitstd::vector* m_loops; + + public: + LoopsReversePostOrderIter(jitstd::vector* loops) + : m_loops(loops) + { + } + + jitstd::vector::iterator begin() + { + return m_loops->begin(); + } + + jitstd::vector::iterator end() + { + return m_loops->end(); + } + }; + + // Iterate the loops in post order (child loops before parent loops) + LoopsPostOrderIter InPostOrder() + { + return LoopsPostOrderIter(&m_loops); + } + + // Iterate the loops in reverse post order (parent loops before child loops) + LoopsReversePostOrderIter InReversePostOrder() + { + return LoopsReversePostOrderIter(&m_loops); + } + + static FlowGraphNaturalLoops* Find(const FlowGraphDfsTree* dfs); +}; + // The following holds information about instr offsets in terms of generated code. enum class IPmappingDscKind @@ -2052,6 +2243,7 @@ class Compiler friend class LocalsUseVisitor; friend class Promotion; friend class ReplaceVisitor; + friend class FlowGraphNaturalLoop; #ifdef FEATURE_HW_INTRINSICS friend struct HWIntrinsicInfo; @@ -4493,8 +4685,7 @@ class Compiler unsigned fgBBNumMax; // The max bbNum that has been assigned to basic blocks unsigned fgDomBBcount; // # of BBs for which we have dominator and reachability information BasicBlock** fgBBReversePostorder; // Blocks in reverse postorder - BasicBlock** fgSSAPostOrder; // Blocks in postorder, computed during SSA - unsigned fgSSAPostOrderCount; // Number of blocks in fgSSAPostOrder + FlowGraphDfsTree* m_dfs; // After the dominance tree is computed, we cache a DFS preorder number and DFS postorder number to compute // dominance queries in O(1). fgDomTreePreOrder and fgDomTreePostOrder are arrays giving the block's preorder and @@ -5588,6 +5779,8 @@ class Compiler PhaseStatus fgSetBlockOrder(); + FlowGraphDfsTree* fgComputeDfs(); + void fgRemoveReturnBlock(BasicBlock* block); void fgConvertBBToThrowBB(BasicBlock* block); diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index 26f4392326a0dd..e8b58faf4dfaa3 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -591,9 +591,14 @@ BasicBlockVisit BasicBlock::VisitAllSuccs(Compiler* comp, TFunc func) switch (bbJumpKind) { case BBJ_EHFINALLYRET: - for (unsigned i = 0; i < bbJumpEhf->bbeCount; i++) + // This can run before import, in which case we haven't converted + // LEAVE into callfinally yet, and haven't added return successors. + if (bbJumpEhf != nullptr) { - RETURN_ON_ABORT(func(bbJumpEhf->bbeSuccs[i])); + for (unsigned i = 0; i < bbJumpEhf->bbeCount; i++) + { + RETURN_ON_ABORT(func(bbJumpEhf->bbeSuccs[i])); + } } return VisitEHSuccs(comp, func); @@ -673,9 +678,14 @@ BasicBlockVisit BasicBlock::VisitRegularSuccs(Compiler* comp, TFunc func) switch (bbJumpKind) { case BBJ_EHFINALLYRET: - for (unsigned i = 0; i < bbJumpEhf->bbeCount; i++) + // This can run before import, in which case we haven't converted + // LEAVE into callfinally yet, and haven't added return successors. + if (bbJumpEhf != nullptr) { - RETURN_ON_ABORT(func(bbJumpEhf->bbeSuccs[i])); + for (unsigned i = 0; i < bbJumpEhf->bbeCount; i++) + { + RETURN_ON_ABORT(func(bbJumpEhf->bbeSuccs[i])); + } } return BasicBlockVisit::Continue; @@ -4936,6 +4946,86 @@ inline bool Compiler::compCanHavePatchpoints(const char** reason) return whyNot == nullptr; } +//------------------------------------------------------------------------------ +// FlowGraphNaturalLoop::VisitLoopBlocksReversePostOrder: Visit all of the +// loop's blocks in reverse post order. +// +// Type parameters: +// TFunc - Callback functor type +// +// Arguments: +// func - Callback functor that takes a BasicBlock* and returns a +// BasicBlockVisit. +// +// Returns: +// BasicBlockVisit that indicated whether the visit was aborted by the +// callback or whether all blocks were visited. +// +template +BasicBlockVisit FlowGraphNaturalLoop::VisitLoopBlocksReversePostOrder(TFunc func) +{ + BitVecTraits traits(m_blocksSize, m_tree->GetCompiler()); + bool result = BitVecOps::VisitBits(&traits, m_blocks, [=](unsigned index) { + // head block rpo index = PostOrderCount - 1 - headPreOrderIndex + // loop block rpo index = head block rpoIndex + index + // loop block po index = PostOrderCount - 1 - loop block rpo index + // = headPreOrderIndex - index + unsigned poIndex = m_header->bbPostorderNum - index; + assert(poIndex < m_tree->GetPostOrderCount()); + return func(m_tree->GetPostOrder()[poIndex]) == BasicBlockVisit::Continue; + }); + + return result ? BasicBlockVisit::Continue : BasicBlockVisit::Abort; +} + +//------------------------------------------------------------------------------ +// FlowGraphNaturalLoop::VisitLoopBlocksPostOrder: Visit all of the loop's +// blocks in post order. +// +// Type parameters: +// TFunc - Callback functor type +// +// Arguments: +// func - Callback functor that takes a BasicBlock* and returns a +// BasicBlockVisit. +// +// Returns: +// BasicBlockVisit that indicated whether the visit was aborted by the +// callback or whether all blocks were visited. +// +template +BasicBlockVisit FlowGraphNaturalLoop::VisitLoopBlocksPostOrder(TFunc func) +{ + BitVecTraits traits(m_blocksSize, m_tree->GetCompiler()); + bool result = BitVecOps::VisitBitsReverse(&traits, m_blocks, [=](unsigned index) { + unsigned poIndex = m_header->bbPostorderNum - index; + assert(poIndex < m_tree->GetPostOrderCount()); + return func(m_tree->GetPostOrder()[poIndex]) == BasicBlockVisit::Continue; + }); + + return result ? BasicBlockVisit::Continue : BasicBlockVisit::Abort; +} + +//------------------------------------------------------------------------------ +// FlowGraphNaturalLoop::VisitLoopBlocks: Visit all of the loop's blocks. +// +// Type parameters: +// TFunc - Callback functor type +// +// Arguments: +// func - Callback functor that takes a BasicBlock* and returns a +// BasicBlockVisit. +// +// Returns: +// BasicBlockVisit that indicated whether the visit was aborted by the +// callback or whether all blocks were visited. +// +template +BasicBlockVisit FlowGraphNaturalLoop::VisitLoopBlocks(TFunc func) +{ + return VisitLoopBlocksReversePostOrder(func); +} + /*****************************************************************************/ #endif //_COMPILER_HPP_ /*****************************************************************************/ diff --git a/src/coreclr/jit/compmemkind.h b/src/coreclr/jit/compmemkind.h index 645a6b44f80ee2..3112cba822d1bc 100644 --- a/src/coreclr/jit/compmemkind.h +++ b/src/coreclr/jit/compmemkind.h @@ -17,6 +17,8 @@ CompMemKindMacro(ImpStack) CompMemKindMacro(BasicBlock) CompMemKindMacro(CallArgs) CompMemKindMacro(FlowEdge) +CompMemKindMacro(DepthFirstSearch) +CompMemKindMacro(Loops) CompMemKindMacro(TreeStatementList) CompMemKindMacro(SiScope) CompMemKindMacro(DominatorMemory) diff --git a/src/coreclr/jit/compphases.h b/src/coreclr/jit/compphases.h index 486a6de7ce5f83..0473b7e4da90b2 100644 --- a/src/coreclr/jit/compphases.h +++ b/src/coreclr/jit/compphases.h @@ -77,7 +77,6 @@ CompPhaseNameMacro(PHASE_SWITCH_RECOGNITION, "Recognize Switch", CompPhaseNameMacro(PHASE_FIND_OPER_ORDER, "Find oper order", false, -1, false) CompPhaseNameMacro(PHASE_SET_BLOCK_ORDER, "Set block order", false, -1, true) CompPhaseNameMacro(PHASE_BUILD_SSA, "Build SSA representation", true, -1, false) -CompPhaseNameMacro(PHASE_BUILD_SSA_TOPOSORT, "SSA: topological sort", false, PHASE_BUILD_SSA, false) CompPhaseNameMacro(PHASE_BUILD_SSA_DOMS, "SSA: Doms1", false, PHASE_BUILD_SSA, false) CompPhaseNameMacro(PHASE_BUILD_SSA_LIVENESS, "SSA: liveness", false, PHASE_BUILD_SSA, false) CompPhaseNameMacro(PHASE_BUILD_SSA_DF, "SSA: DF", false, PHASE_BUILD_SSA, false) diff --git a/src/coreclr/jit/fgbasic.cpp b/src/coreclr/jit/fgbasic.cpp index 34ca63d39a2cd4..fd665d64543b52 100644 --- a/src/coreclr/jit/fgbasic.cpp +++ b/src/coreclr/jit/fgbasic.cpp @@ -68,6 +68,8 @@ void Compiler::fgInit() fgBBVarSetsInited = false; fgReturnCount = 0; + m_dfs = nullptr; + // Initialize BlockSet data. fgCurBBEpoch = 0; fgCurBBEpochSize = 0; diff --git a/src/coreclr/jit/fgdiagnostic.cpp b/src/coreclr/jit/fgdiagnostic.cpp index 8ddc9aecf84121..cf703b04869269 100644 --- a/src/coreclr/jit/fgdiagnostic.cpp +++ b/src/coreclr/jit/fgdiagnostic.cpp @@ -2970,6 +2970,7 @@ void Compiler::fgDebugCheckBBlist(bool checkBBNum /* = false */, bool checkBBRef bool allNodesLinked = (fgNodeThreading == NodeThreading::AllTrees) || (fgNodeThreading == NodeThreading::LIR); unsigned numBlocks = 0; + unsigned maxBBNum = 0; for (BasicBlock* const block : Blocks()) { @@ -2981,6 +2982,8 @@ void Compiler::fgDebugCheckBBlist(bool checkBBNum /* = false */, bool checkBBRef assert(block->IsLast() || (block->bbNum + 1 == block->Next()->bbNum)); } + maxBBNum = max(maxBBNum, block->bbNum); + // Check that all the successors have the current traversal stamp. Use the 'Compiler*' version of the // iterator, but not for BBJ_SWITCH: we don't want to end up calling GetDescriptorForSwitch(), which will // dynamically create the unique switch list. @@ -3184,6 +3187,7 @@ void Compiler::fgDebugCheckBBlist(bool checkBBNum /* = false */, bool checkBBRef } assert(fgBBcount == numBlocks); + assert(fgBBNumMax >= maxBBNum); // Make sure the one return BB is not changed. if (genReturnBB != nullptr) diff --git a/src/coreclr/jit/fgprofilesynthesis.cpp b/src/coreclr/jit/fgprofilesynthesis.cpp index c9204c1e971e6f..6b3b2fff2dee1d 100644 --- a/src/coreclr/jit/fgprofilesynthesis.cpp +++ b/src/coreclr/jit/fgprofilesynthesis.cpp @@ -33,8 +33,8 @@ // void ProfileSynthesis::Run(ProfileSynthesisOption option) { - BuildReversePostorder(); - FindLoops(); + m_dfs = m_comp->fgComputeDfs(); + m_loops = FlowGraphNaturalLoops::Find(m_dfs); // Retain or compute edge likelihood information // @@ -176,111 +176,6 @@ void ProfileSynthesis::AssignLikelihoods() } } -//------------------------------------------------------------------------ -// IsDfsAncestor: see if block `x` is ancestor of block `y` in the depth -// first spanning tree -// -// Arguments: -// x -- block that is possible ancestor -// y -- block that is possible descendant -// -// Returns: -// True if x is ancestor of y in the depth first spanning tree. -// -// Notes: -// If return value is false, then x does not dominate y. -// -bool ProfileSynthesis::IsDfsAncestor(BasicBlock* x, BasicBlock* y) -{ - return ((x->bbPreorderNum <= y->bbPreorderNum) && (y->bbPostorderNum <= x->bbPostorderNum)); -} - -//------------------------------------------------------------------------ -// GetLoopFromHeader: see if a block is a loop header, and if so return -// the associated loop. -// -// Arguments: -// block - block in question -// -// Returns: -// loop headed by block, or nullptr -// -SimpleLoop* ProfileSynthesis::GetLoopFromHeader(BasicBlock* block) -{ - for (SimpleLoop* loop : *m_loops) - { - if (loop->m_head == block) - { - return loop; - } - } - - return nullptr; -} - -//------------------------------------------------------------------------ -// IsLoopBackEdge: see if an edge is a loop back edge -// -// Arguments: -// edge - edge in question -// -// Returns: -// True if edge is a backedge in some recognized loop. -// -// Notes: -// Different than asking IsDfsAncestor since we disqualify some -// natural backedges for complex loop strctures. -// -// Todo: -// Annotate the edge directly -// -bool ProfileSynthesis::IsLoopBackEdge(FlowEdge* edge) -{ - for (SimpleLoop* loop : *m_loops) - { - for (FlowEdge* loopBackEdge : loop->m_backEdges) - { - if (loopBackEdge == edge) - { - return true; - } - } - } - - return false; -} - -//------------------------------------------------------------------------ -// IsLoopExitEdge: see if a flow edge is a loop exit edge -// -// Arguments: -// edge - edge in question -// -// Returns: -// True if edge is an exit edge in some recognized loop -// -// Todo: -// Annotate the edge directly -// -// Decide if we want to report that the edge exits -// multiple loops. - -bool ProfileSynthesis::IsLoopExitEdge(FlowEdge* edge) -{ - for (SimpleLoop* loop : *m_loops) - { - for (FlowEdge* loopExitEdge : loop->m_exitEdges) - { - if (loopExitEdge == edge) - { - return true; - } - } - } - - return false; -} - //------------------------------------------------------------------------ // AssignLikelihoodNext: update edge likelihood for block that always // transfers control to bbNext @@ -353,8 +248,8 @@ void ProfileSynthesis::AssignLikelihoodCond(BasicBlock* block) // LOOP BACK EDGE heuristic // - bool const isJumpEdgeBackEdge = IsLoopBackEdge(jumpEdge); - bool const isNextEdgeBackEdge = IsLoopBackEdge(nextEdge); + bool const isJumpEdgeBackEdge = m_loops->IsLoopBackEdge(jumpEdge); + bool const isNextEdgeBackEdge = m_loops->IsLoopBackEdge(nextEdge); if (isJumpEdgeBackEdge != isNextEdgeBackEdge) { @@ -379,8 +274,8 @@ void ProfileSynthesis::AssignLikelihoodCond(BasicBlock* block) // Consider: adjust probability if loop has multiple exit edges, so that // overall exit probability is around 0.1. // - bool const isJumpEdgeExitEdge = IsLoopExitEdge(jumpEdge); - bool const isNextEdgeExitEdge = IsLoopExitEdge(nextEdge); + bool const isJumpEdgeExitEdge = m_loops->IsLoopExitEdge(jumpEdge); + bool const isNextEdgeExitEdge = m_loops->IsLoopExitEdge(nextEdge); if (isJumpEdgeExitEdge != isNextEdgeExitEdge) { @@ -783,263 +678,22 @@ void ProfileSynthesis::RandomizeLikelihoods() #endif // DEBUG } -//------------------------------------------------------------------------ -// fgBuildReversePostorder: compute depth first spanning tree and pre -// and post numbers for the blocks -// -void ProfileSynthesis::BuildReversePostorder() -{ - m_comp->EnsureBasicBlockEpoch(); - m_comp->fgDfsReversePostorder(); - - // Build map from bbNum to Block*. - // - m_bbNumToBlockMap = new (m_comp, CMK_Pgo) BasicBlock*[m_comp->fgBBNumMax + 1]{}; - for (BasicBlock* const block : m_comp->Blocks()) - { - m_bbNumToBlockMap[block->bbNum] = block; - } - -#ifdef DEBUG - if (m_comp->verbose) - { - printf("\nAfter doing a post order traversal of the BB graph, this is the ordering:\n"); - for (unsigned i = 1; i <= m_comp->fgBBNumMax; ++i) - { - BasicBlock* const block = m_comp->fgBBReversePostorder[i]; - printf("%02u -> " FMT_BB "[%u, %u]\n", i, block->bbNum, block->bbPreorderNum, block->bbPostorderNum); - } - printf("\n"); - } -#endif // DEBUG -} - -//------------------------------------------------------------------------ -// FindLoops: locate and classify loops -// -void ProfileSynthesis::FindLoops() -{ - CompAllocator allocator = m_comp->getAllocator(CMK_Pgo); - m_loops = new (allocator) LoopVector(allocator); - - // Identify loops - // - for (unsigned i = 1; i <= m_comp->fgBBNumMax; i++) - { - BasicBlock* const block = m_comp->fgBBReversePostorder[i]; - - // If a block is a DFS ancestor of one if its predecessors then the block is a loop header. - // - SimpleLoop* loop = nullptr; - - for (FlowEdge* predEdge : block->PredEdges()) - { - if (IsDfsAncestor(block, predEdge->getSourceBlock())) - { - if (loop == nullptr) - { - loop = new (allocator) SimpleLoop(block, allocator); - JITDUMP("\n"); - } - - JITDUMP(FMT_BB " -> " FMT_BB " is a backedge\n", predEdge->getSourceBlock()->bbNum, block->bbNum); - loop->m_backEdges.push_back(predEdge); - } - } - - if (loop == nullptr) - { - continue; - } - - JITDUMP(FMT_BB " is head of a DFS loop with %d back edges\n", block->bbNum, loop->m_backEdges.size()); - - // Now walk back in flow along the back edges from block to determine if - // this is a natural loop and to find all the blocks in the loop. - // - loop->m_blocks = BlockSetOps::MakeEmpty(m_comp); - BlockSetOps::AddElemD(m_comp, loop->m_blocks, block->bbNum); - - // todo: hoist this out and just do a reset here - jitstd::list worklist(allocator); - - // Seed the worklist - // - for (FlowEdge* backEdge : loop->m_backEdges) - { - BasicBlock* const backEdgeSource = backEdge->getSourceBlock(); - - if (BlockSetOps::IsMember(m_comp, loop->m_blocks, backEdgeSource->bbNum)) - { - continue; - } - - worklist.push_back(backEdgeSource); - } - - bool isNaturalLoop = true; - - // Work back through flow to loop head or to another pred - // that is clearly outside the loop. - // - // TODO: verify that we can indeed get back to the loop head - // and not get stopped somewhere (eg loop through EH). - // - while (!worklist.empty() & isNaturalLoop) - { - BasicBlock* const loopBlock = worklist.back(); - worklist.pop_back(); - BlockSetOps::AddElemD(m_comp, loop->m_blocks, loopBlock->bbNum); - - for (FlowEdge* const predEdge : loopBlock->PredEdges()) - { - BasicBlock* const predBlock = predEdge->getSourceBlock(); - - // `block` cannot dominate `predBlock` unless it is a DFS ancestor. - // - if (!IsDfsAncestor(block, predBlock)) - { - // Does this represent flow out of some handler? - // If so we will ignore it. - // - // Might want to vet that handler's try region entry - // is a dfs ancestor...? - // - if (!BasicBlock::sameHndRegion(block, predBlock)) - { - continue; - } - - JITDUMP("Loop is not natural; witness " FMT_BB " -> " FMT_BB "\n", predBlock->bbNum, - loopBlock->bbNum); - - isNaturalLoop = false; - m_improperLoopHeaders++; - break; - } - - if (BlockSetOps::IsMember(m_comp, loop->m_blocks, predBlock->bbNum)) - { - continue; - } - - worklist.push_back(predBlock); - } - } - - if (!isNaturalLoop) - { - continue; - } - - JITDUMP("Loop has %d blocks\n", BlockSetOps::Count(m_comp, loop->m_blocks)); - - // Find the exit edges - // - BlockSetOps::Iter iter(m_comp, loop->m_blocks); - unsigned bbNum = 0; - while (iter.NextElem(&bbNum)) - { - BasicBlock* const loopBlock = m_bbNumToBlockMap[bbNum]; - - for (BasicBlock* const succBlock : loopBlock->Succs(m_comp)) - { - if (!BlockSetOps::IsMember(m_comp, loop->m_blocks, succBlock->bbNum)) - { - FlowEdge* const exitEdge = m_comp->fgGetPredForBlock(succBlock, loopBlock); - JITDUMP(FMT_BB " -> " FMT_BB " is an exit edge\n", loopBlock->bbNum, succBlock->bbNum); - loop->m_exitEdges.push_back(exitEdge); - } - } - } - - // Find the entry edges - // - // Note if fgEntryBB is a loop head we won't have an entry edge. - // So it needs to be special cased later on when processing - // entry edges. - // - for (FlowEdge* const predEdge : loop->m_head->PredEdges()) - { - if (!IsDfsAncestor(block, predEdge->getSourceBlock())) - { - JITDUMP(FMT_BB " -> " FMT_BB " is an entry edge\n", predEdge->getSourceBlock()->bbNum, - loop->m_head->bbNum); - loop->m_entryEdges.push_back(predEdge); - } - } - - // Search for parent loop, validate proper nesting. - // - // Since loops record in outer->inner order the parent will be the - // most recently recorded loop that contains this loop's header. - // - for (auto it = m_loops->rbegin(), itEnd = m_loops->rend(); it != itEnd; ++it) - { - SimpleLoop* const otherLoop = *it; - - if (BlockSetOps::IsMember(m_comp, otherLoop->m_blocks, block->bbNum)) - { - // Ancestor loop; should contain all blocks of this loop - // - assert(BlockSetOps::IsSubset(m_comp, loop->m_blocks, otherLoop->m_blocks)); - - if (loop->m_parent == nullptr) - { - loop->m_parent = otherLoop; - loop->m_depth = otherLoop->m_depth + 1; - JITDUMP("at depth %u, nested within loop starting at " FMT_BB "\n", loop->m_depth, - otherLoop->m_head->bbNum); - - // Note we could break here but that would bypass the non-overlap check - // just below, so for now we check against all known loops. - } - } - else - { - // Non-ancestor loop; should have no blocks in common with current loop - // - assert(BlockSetOps::IsEmptyIntersection(m_comp, loop->m_blocks, otherLoop->m_blocks)); - } - } - - if (loop->m_parent == nullptr) - { - JITDUMP("top-level loop\n"); - loop->m_depth = 1; - } - - // Record this loop - // - m_loops->push_back(loop); - } - - if (m_loops->size() > 0) - { - JITDUMP("\nFound %d loops\n", m_loops->size()); - } - - if (m_improperLoopHeaders > 0) - { - JITDUMP("Rejected %d loop headers\n", m_improperLoopHeaders); - } -} - //------------------------------------------------------------------------ // FindCyclicProbabilities: for each loop, compute how much flow returns // to the loop head given one external count. // void ProfileSynthesis::ComputeCyclicProbabilities() { - // We found loop walking in reverse postorder, so the loop vector - // is naturally organized with outer loops before inner. - // - // Walk it backwards here so we compute inner loop cyclic probabilities - // first. We rely on that when processing outer loops. - // - for (auto it = m_loops->rbegin(), itEnd = m_loops->rend(); it != itEnd; ++it) + m_cyclicProbabilities = nullptr; + if (m_loops->NumLoops() == 0) + { + return; + } + + m_cyclicProbabilities = new (m_comp, CMK_Pgo) weight_t[m_loops->NumLoops()]; + // Walk loops in post order to visit inner loops before outer loops. + for (FlowGraphNaturalLoop* loop : m_loops->InPostOrder()) { - SimpleLoop* const loop = *it; ComputeCyclicProbabilities(loop); } } @@ -1048,52 +702,41 @@ void ProfileSynthesis::ComputeCyclicProbabilities() // FindCyclicProbabilities: for a given loop, compute how much flow returns // to the loop head given one external count. // -void ProfileSynthesis::ComputeCyclicProbabilities(SimpleLoop* loop) +void ProfileSynthesis::ComputeCyclicProbabilities(FlowGraphNaturalLoop* loop) { // Initialize // - BlockSetOps::Iter iter(m_comp, loop->m_blocks); - unsigned bbNum = 0; - while (iter.NextElem(&bbNum)) - { - BasicBlock* const loopBlock = m_bbNumToBlockMap[bbNum]; - loopBlock->bbWeight = 0.0; - } + loop->VisitLoopBlocks([](BasicBlock* loopBlock) { + loopBlock->bbWeight = 0.0; + return BasicBlockVisit::Continue; + }); // Process loop blocks in RPO. Just takes one pass through the loop blocks // as any cyclic contributions are handled by cyclic probabilities. // - for (unsigned int i = 1; i <= m_comp->fgBBNumMax; i++) - { - BasicBlock* const block = m_comp->fgBBReversePostorder[i]; - - if (!BlockSetOps::IsMember(m_comp, loop->m_blocks, block->bbNum)) - { - continue; - } - + loop->VisitLoopBlocksReversePostOrder([=](BasicBlock* block) { // Loop head gets external count of 1 // - if (block == loop->m_head) + if (block == loop->GetHeader()) { JITDUMP("ccp: " FMT_BB " :: 1.0\n", block->bbNum); block->bbWeight = 1.0; } else { - SimpleLoop* const nestedLoop = GetLoopFromHeader(block); + FlowGraphNaturalLoop* const nestedLoop = m_loops->GetLoopFromHeader(block); if (nestedLoop != nullptr) { // We should have figured this out already. // - assert(nestedLoop->m_cyclicProbability != 0); + assert(m_cyclicProbabilities[nestedLoop->GetIndex()] != 0); // Sum entry edges, multply by Cp // weight_t newWeight = 0.0; - for (FlowEdge* const edge : nestedLoop->m_entryEdges) + for (FlowEdge* const edge : nestedLoop->EntryEdges()) { if (BasicBlock::sameHndRegion(block, edge->getSourceBlock())) { @@ -1101,7 +744,7 @@ void ProfileSynthesis::ComputeCyclicProbabilities(SimpleLoop* loop) } } - newWeight *= nestedLoop->m_cyclicProbability; + newWeight *= m_cyclicProbabilities[nestedLoop->GetIndex()]; block->bbWeight = newWeight; JITDUMP("ccp (nested header): " FMT_BB " :: " FMT_WT "\n", block->bbNum, newWeight); @@ -1123,17 +766,19 @@ void ProfileSynthesis::ComputeCyclicProbabilities(SimpleLoop* loop) JITDUMP("ccp: " FMT_BB " :: " FMT_WT "\n", block->bbNum, newWeight); } } - } + + return BasicBlockVisit::Continue; + }); // Now look at cyclic flow back to the head block. // weight_t cyclicWeight = 0; bool capped = false; - for (FlowEdge* const edge : loop->m_backEdges) + for (FlowEdge* const edge : loop->BackEdges()) { JITDUMP("ccp backedge " FMT_BB " (" FMT_WT ") -> " FMT_BB " likelihood " FMT_WT "\n", - edge->getSourceBlock()->bbNum, edge->getSourceBlock()->bbWeight, loop->m_head->bbNum, + edge->getSourceBlock()->bbNum, edge->getSourceBlock()->bbWeight, loop->GetHeader()->bbNum, edge->getLikelihood()); cyclicWeight += edge->getLikelyWeight(); @@ -1157,22 +802,22 @@ void ProfileSynthesis::ComputeCyclicProbabilities(SimpleLoop* loop) weight_t const cyclicProbability = 1.0 / (1.0 - cyclicWeight); JITDUMP("For loop at " FMT_BB " cyclic weight is " FMT_WT " cyclic probability is " FMT_WT "%s\n", - loop->m_head->bbNum, cyclicWeight, cyclicProbability, capped ? " [capped]" : ""); + loop->GetHeader()->bbNum, cyclicWeight, cyclicProbability, capped ? " [capped]" : ""); - loop->m_cyclicProbability = cyclicProbability; + m_cyclicProbabilities[loop->GetIndex()] = cyclicProbability; // Try and adjust loop exit likelihood to reflect capping. // If there are multiple exits we just adjust the first one we can. This is somewhat arbitrary. // If there are no exits, there's nothing we can do. // - if (capped && (loop->m_exitEdges.size() > 0)) + if (capped && (loop->ExitEdges().size() > 0)) { // Figure out how much flow exits the loop with the capped probablility // and current block frequencies and exit likelihoods. // weight_t cappedExitWeight = 0.0; - for (FlowEdge* const exitEdge : loop->m_exitEdges) + for (FlowEdge* const exitEdge : loop->ExitEdges()) { BasicBlock* const exitBlock = exitEdge->getSourceBlock(); weight_t const exitBlockFrequency = exitBlock->bbWeight; @@ -1198,7 +843,7 @@ void ProfileSynthesis::ComputeCyclicProbabilities(SimpleLoop* loop) bool adjustedExit = false; - for (FlowEdge* const exitEdge : loop->m_exitEdges) + for (FlowEdge* const exitEdge : loop->ExitEdges()) { // Does this block have enough weight that it can supply all the missing weight? // @@ -1354,70 +999,13 @@ void ProfileSynthesis::AssignInputWeights(ProfileSynthesisOption option) // ComputeBlockWeights: compute weights for all blocks // based on input weights, edge likelihoods, and cyclic probabilities // -// Notes: -// We want to first walk the main method body, then any finally -// handers from outermost to innermost. -// -// The depth first walk we did to kick off synthesis has split the -// graph into a forest of depth first spanning trees. We leverage -// this and the EH table structure to accomplish the visiting order above. -// -// We might be able to avoid all this if during the DFS walk we -// walked from try entries to filter or handlers, so that a -// single DFST encompassed all the reachable blocks in the right order. -// void ProfileSynthesis::ComputeBlockWeights() { JITDUMP("Computing block weights\n"); - // Main method body - // - ComputeBlockWeightsSubgraph(m_comp->fgFirstBB); - - // All finally and fault handlers from outer->inner - // (walk EH table backwards) - // - if (!m_comp->compIsForInlining()) - { - for (unsigned i = 0; i < m_comp->compHndBBtabCount; i++) - { - unsigned const XTnum = m_comp->compHndBBtabCount - i - 1; - EHblkDsc* const HBtab = &m_comp->compHndBBtab[XTnum]; - if (HBtab->HasFilter()) - { - // Filter subtree includes handler - // - ComputeBlockWeightsSubgraph(HBtab->ebdFilter); - } - else - { - ComputeBlockWeightsSubgraph(HBtab->ebdHndBeg); - } - } - } - - // Anything else is unreachable and will have zero count -} - -//------------------------------------------------------------------------ -// ComputeBlockWeights: compute weights for all blocks in a particular DFST -// -// Arguments: -// entry - root node of a DFST -// -void ProfileSynthesis::ComputeBlockWeightsSubgraph(BasicBlock* entry) -{ - // Determine the range of indices for this DFST in the overall RPO. - // - const unsigned firstIndex = m_comp->fgBBNumMax - entry->bbPostorderNum + 1; - assert(m_comp->fgBBReversePostorder[firstIndex] == entry); - - assert(entry->bbPostorderNum >= entry->bbPreorderNum); - const unsigned lastIndex = firstIndex + entry->bbPostorderNum - entry->bbPreorderNum; - - for (unsigned int i = firstIndex; i <= lastIndex; i++) + for (unsigned i = m_dfs->GetPostOrderCount(); i != 0; i--) { - BasicBlock* const block = m_comp->fgBBReversePostorder[i]; + BasicBlock* block = m_dfs->GetPostOrder()[i - 1]; ComputeBlockWeight(block); } } @@ -1430,15 +1018,15 @@ void ProfileSynthesis::ComputeBlockWeightsSubgraph(BasicBlock* entry) // void ProfileSynthesis::ComputeBlockWeight(BasicBlock* block) { - SimpleLoop* const loop = GetLoopFromHeader(block); - weight_t newWeight = block->bbWeight; - const char* kind = ""; + FlowGraphNaturalLoop* const loop = m_loops->GetLoopFromHeader(block); + weight_t newWeight = block->bbWeight; + const char* kind = ""; if (loop != nullptr) { // Sum all entry edges that aren't EH flow // - for (FlowEdge* const edge : loop->m_entryEdges) + for (FlowEdge* const edge : loop->EntryEdges()) { if (BasicBlock::sameHndRegion(block, edge->getSourceBlock())) { @@ -1448,7 +1036,7 @@ void ProfileSynthesis::ComputeBlockWeight(BasicBlock* block) // Scale by cyclic probability // - newWeight *= loop->m_cyclicProbability; + newWeight *= m_cyclicProbabilities[loop->GetIndex()]; kind = " (loop head)"; } else diff --git a/src/coreclr/jit/fgprofilesynthesis.h b/src/coreclr/jit/fgprofilesynthesis.h index a5d5f7d827f43d..ab82fffe5e37cd 100644 --- a/src/coreclr/jit/fgprofilesynthesis.h +++ b/src/coreclr/jit/fgprofilesynthesis.h @@ -9,34 +9,7 @@ // Flowgraph Profile Synthesis -typedef jitstd::vector EdgeVector; -typedef jitstd::vector WeightVector; - -struct SimpleLoop -{ - SimpleLoop(BasicBlock* head, CompAllocator allocator) - : m_head(head) - , m_parent(nullptr) - , m_blocks(BlockSetOps::UninitVal()) - , m_entryEdges(allocator) - , m_backEdges(allocator) - , m_exitEdges(allocator) - , m_cyclicProbability(0) - , m_depth(0) - { - } - - BasicBlock* m_head; - SimpleLoop* m_parent; - BlockSet m_blocks; - EdgeVector m_entryEdges; - EdgeVector m_backEdges; - EdgeVector m_exitEdges; - weight_t m_cyclicProbability; - unsigned m_depth; -}; - -typedef jitstd::vector LoopVector; +typedef jitstd::vector WeightVector; //------------------------------------------------------------------------ // ProfileSynthesisOption: specify behavior of profile synthesis @@ -68,11 +41,7 @@ class ProfileSynthesis private: ProfileSynthesis(Compiler* compiler) - : m_comp(compiler) - , m_loops(nullptr) - , m_bbNumToBlockMap(nullptr) - , m_improperLoopHeaders(0) - , m_cappedCyclicProbabilities(0) + : m_comp(compiler), m_loops(nullptr), m_improperLoopHeaders(0), m_cappedCyclicProbabilities(0) { } @@ -86,14 +55,6 @@ class ProfileSynthesis void Run(ProfileSynthesisOption option); - void BuildReversePostorder(); - static bool IsDfsAncestor(BasicBlock* x, BasicBlock* y); - bool IsLoopBackEdge(FlowEdge* edge); - bool IsLoopExitEdge(FlowEdge* edge); - - void FindLoops(); - SimpleLoop* GetLoopFromHeader(BasicBlock* block); - weight_t SumOutgoingLikelihoods(BasicBlock* block, WeightVector* likelihoods = nullptr); void AssignLikelihoods(); @@ -108,20 +69,20 @@ class ProfileSynthesis void RandomizeLikelihoods(); void ComputeCyclicProbabilities(); - void ComputeCyclicProbabilities(SimpleLoop* loop); + void ComputeCyclicProbabilities(FlowGraphNaturalLoop* loop); void AssignInputWeights(ProfileSynthesisOption option); void ComputeBlockWeights(); - void ComputeBlockWeightsSubgraph(BasicBlock* block); void ComputeBlockWeight(BasicBlock* block); private: - Compiler* const m_comp; - LoopVector* m_loops; - BasicBlock** m_bbNumToBlockMap; - unsigned m_improperLoopHeaders; - unsigned m_cappedCyclicProbabilities; + Compiler* const m_comp; + FlowGraphDfsTree* m_dfs; + FlowGraphNaturalLoops* m_loops; + weight_t* m_cyclicProbabilities; + unsigned m_improperLoopHeaders; + unsigned m_cappedCyclicProbabilities; }; #endif // !_FGPROFILESYNTHESIS_H_ diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index a237cfea0ff44c..f81c55b004a043 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -4083,3 +4083,560 @@ void Compiler::fgLclFldAssign(unsigned lclNum) lvaSetVarDoNotEnregister(lclNum DEBUGARG(DoNotEnregisterReason::LocalField)); } } + +//------------------------------------------------------------------------ +// FlowGraphDfsTree::Contains: Check if a block is contained in the DFS tree; +// i.e., if it is reachable. +// +// Arguments: +// block - The block +// +// Return Value: +// True if the block is reachable from the root. +// +bool FlowGraphDfsTree::Contains(BasicBlock* block) const +{ + return (block->bbPostorderNum < m_postOrderCount) && (m_postOrder[block->bbPostorderNum] == block); +} + +//------------------------------------------------------------------------ +// FlowGraphDfsTree::IsAncestor: Check if block `ancestor` is an ancestor of +// block `descendant` +// +// Arguments: +// ancestor -- block that is possible ancestor +// descendant -- block that is possible descendant +// +// Returns: +// True if `ancestor` is ancestor of `descendant` in the depth first spanning +// tree. +// +// Notes: +// If return value is false, then `ancestor` does not dominate `descendant`. +// +bool FlowGraphDfsTree::IsAncestor(BasicBlock* ancestor, BasicBlock* descendant) const +{ + assert(Contains(ancestor) && Contains(descendant)); + return (ancestor->bbPreorderNum <= descendant->bbPreorderNum) && + (descendant->bbPostorderNum <= ancestor->bbPostorderNum); +} + +//------------------------------------------------------------------------ +// fgComputeDfs: Compute a depth-first search tree for the flow graph. +// +// Returns: +// The tree. +// +// Notes: +// Preorder and postorder numbers are assigned into the BasicBlock structure. +// The tree returned contains a postorder of the basic blocks. +// +FlowGraphDfsTree* Compiler::fgComputeDfs() +{ + BasicBlock** postOrder = new (this, CMK_DepthFirstSearch) BasicBlock*[fgBBcount]; + BitVecTraits traits(fgBBNumMax + 1, this); + + BitVec visited(BitVecOps::MakeEmpty(&traits)); + + unsigned preOrderIndex = 0; + unsigned postOrderIndex = 0; + + ArrayStack blocks(getAllocator(CMK_DepthFirstSearch)); + + auto dfsFrom = [&, postOrder](BasicBlock* firstBB) { + + BitVecOps::AddElemD(&traits, visited, firstBB->bbNum); + blocks.Emplace(this, firstBB); + firstBB->bbPreorderNum = preOrderIndex++; + + while (!blocks.Empty()) + { + BasicBlock* block = blocks.TopRef().Block(); + BasicBlock* succ = blocks.TopRef().NextSuccessor(); + + if (succ != nullptr) + { + if (BitVecOps::TryAddElemD(&traits, visited, succ->bbNum)) + { + blocks.Emplace(this, succ); + succ->bbPreorderNum = preOrderIndex++; + } + } + else + { + blocks.Pop(); + postOrder[postOrderIndex] = block; + block->bbPostorderNum = postOrderIndex++; + } + } + + }; + + dfsFrom(fgFirstBB); + + if ((fgEntryBB != nullptr) && !BitVecOps::IsMember(&traits, visited, fgEntryBB->bbNum)) + { + // OSR methods will early on create flow that looks like it goes to the + // patchpoint, but during morph we may transform to something that + // requires the original entry (fgEntryBB). + assert(opts.IsOSR()); + assert((fgEntryBB->bbRefs == 1) && (fgEntryBB->bbPreds == nullptr)); + dfsFrom(fgEntryBB); + } + + if ((genReturnBB != nullptr) && !BitVecOps::IsMember(&traits, visited, genReturnBB->bbNum) && !fgGlobalMorphDone) + { + // We introduce the merged return BB before morph and will redirect + // other returns to it as part of morph; keep it reachable. + dfsFrom(genReturnBB); + } + + return new (this, CMK_DepthFirstSearch) FlowGraphDfsTree(this, postOrder, postOrderIndex); +} + +//------------------------------------------------------------------------ +// FlowGraphNaturalLoop::FlowGraphNaturalLoop: Initialize a new loop instance. +// +// Returns: +// tree - The DFS tree +// header - The loop header +// +FlowGraphNaturalLoop::FlowGraphNaturalLoop(const FlowGraphDfsTree* tree, BasicBlock* header) + : m_tree(tree) + , m_header(header) + , m_blocks(BitVecOps::UninitVal()) + , m_backEdges(tree->GetCompiler()->getAllocator(CMK_Loops)) + , m_entryEdges(tree->GetCompiler()->getAllocator(CMK_Loops)) + , m_exitEdges(tree->GetCompiler()->getAllocator(CMK_Loops)) +{ +} + +//------------------------------------------------------------------------ +// LoopBlockBitVecIndex: Convert a basic block to an index into the bit vector +// used to store the set of loop blocks. +// +// Parameters: +// block - The block +// +// Returns: +// Index into the bit vector +// +// Remarks: +// The bit vector is stored with the base index of the loop header since we +// know the header is an ancestor of all loop blocks. Thus we do not need to +// waste space on previous blocks. +// +// This function should only be used when it is known that the block has an +// index in the loop bit vector. +// +unsigned FlowGraphNaturalLoop::LoopBlockBitVecIndex(BasicBlock* block) +{ + assert(m_tree->Contains(block)); + + unsigned index = m_header->bbPostorderNum - block->bbPostorderNum; + assert(index < m_blocksSize); + return index; +} + +//------------------------------------------------------------------------ +// TryGetLoopBlockBitVecIndex: Convert a basic block to an index into the bit +// vector used to store the set of loop blocks. +// +// Parameters: +// block - The block +// pIndex - [out] Index into the bit vector, if this function returns true. +// +// Returns: +// True if the block has an index in the loop bit vector. +// +// Remarks: +// See GetLoopBlockBitVecIndex for more information. This function can be +// used when it is not known whether the block has an index in the loop bit +// vector. +// +bool FlowGraphNaturalLoop::TryGetLoopBlockBitVecIndex(BasicBlock* block, unsigned* pIndex) +{ + if (block->bbPostorderNum > m_header->bbPostorderNum) + { + return false; + } + + unsigned index = m_header->bbPostorderNum - block->bbPostorderNum; + if (index >= m_blocksSize) + { + return false; + } + + *pIndex = index; + return true; +} + +//------------------------------------------------------------------------ +// LoopBlockTraits: Get traits for a bit vector for blocks in this loop. +// +// Returns: +// Bit vector traits. +// +BitVecTraits FlowGraphNaturalLoop::LoopBlockTraits() +{ + return BitVecTraits(m_blocksSize, m_tree->GetCompiler()); +} + +//------------------------------------------------------------------------ +// ContainsBlock: Returns true if this loop contains the specified block. +// +// Parameters: +// block - A block +// +// Returns: +// True if the block is contained in the loop. +// +// Remarks: +// Containment here means that the block is in the SCC of the loop; i.e. it +// is in a cycle with the header block. Note that EH successors are taken +// into acount; for example, a BBJ_RETURN may still be a loop block provided +// that its handler can reach the loop header. +// +bool FlowGraphNaturalLoop::ContainsBlock(BasicBlock* block) +{ + unsigned index; + if (!TryGetLoopBlockBitVecIndex(block, &index)) + { + return false; + } + + BitVecTraits traits = LoopBlockTraits(); + return BitVecOps::IsMember(&traits, m_blocks, index); +} + +//------------------------------------------------------------------------ +// FlowGraphNaturalLoops::FlowGraphNaturalLoops: Initialize a new instance to +// track a set of loops over the flow graph. +// +// Parameters: +// dfs - A DFS tree. +// +FlowGraphNaturalLoops::FlowGraphNaturalLoops(const FlowGraphDfsTree* dfs) + : m_dfs(dfs), m_loops(m_dfs->GetCompiler()->getAllocator(CMK_Loops)) +{ +} + +// GetLoopFromHeader: See if a block is a loop header, and if so return the +// associated loop. +// +// Parameters: +// block - block in question +// +// Returns: +// Loop headed by block, or nullptr +// +FlowGraphNaturalLoop* FlowGraphNaturalLoops::GetLoopFromHeader(BasicBlock* block) +{ + // TODO-TP: This can use binary search based on post order number. + for (FlowGraphNaturalLoop* loop : m_loops) + { + if (loop->m_header == block) + { + return loop; + } + } + + return nullptr; +} + +//------------------------------------------------------------------------ +// IsLoopBackEdge: See if an edge is a loop back edge +// +// Parameters: +// edge - edge in question +// +// Returns: +// True if edge is a backedge in some recognized loop. +// +bool FlowGraphNaturalLoops::IsLoopBackEdge(FlowEdge* edge) +{ + for (FlowGraphNaturalLoop* loop : m_loops) + { + for (FlowEdge* loopBackEdge : loop->m_backEdges) + { + if (loopBackEdge == edge) + { + return true; + } + } + } + + return false; +} + +//------------------------------------------------------------------------ +// IsLoopExitEdge: see if a flow edge is a loop exit edge +// +// Parameters: +// edge - edge in question +// +// Returns: +// True if edge is an exit edge in some recognized loop. Note that a single +// edge may exit multiple loops. +// +bool FlowGraphNaturalLoops::IsLoopExitEdge(FlowEdge* edge) +{ + for (FlowGraphNaturalLoop* loop : m_loops) + { + for (FlowEdge* loopExitEdge : loop->m_exitEdges) + { + if (loopExitEdge == edge) + { + return true; + } + } + } + + return false; +} + +//------------------------------------------------------------------------ +// FlowGraphNaturalLoops::Find: Find natural loops in the specified DFS tree +// constructed for the flow graph. +// +// Parameters: +// dfs - The DFS tree +// +// Returns: +// Identified natural loops. +// +FlowGraphNaturalLoops* FlowGraphNaturalLoops::Find(const FlowGraphDfsTree* dfs) +{ + Compiler* comp = dfs->GetCompiler(); + comp->m_blockToEHPreds = nullptr; + +#ifdef DEBUG + JITDUMP("Identifying loops in DFS tree with following reverse post order:\n"); + for (unsigned i = dfs->GetPostOrderCount(); i != 0; i--) + { + unsigned rpoNum = dfs->GetPostOrderCount() - i; + BasicBlock* const block = dfs->GetPostOrder()[i - 1]; + JITDUMP("%02u -> " FMT_BB "[%u, %u]\n", rpoNum + 1, block->bbNum, block->bbPreorderNum + 1, + block->bbPostorderNum + 1); + } +#endif + + FlowGraphNaturalLoops* loops = new (comp, CMK_Loops) FlowGraphNaturalLoops(dfs); + + jitstd::list worklist(comp->getAllocator(CMK_Loops)); + + for (unsigned i = dfs->GetPostOrderCount(); i != 0; i--) + { + BasicBlock* const header = dfs->GetPostOrder()[i - 1]; + + // If a block is a DFS ancestor of one if its predecessors then the block is a loop header. + // + FlowGraphNaturalLoop* loop = nullptr; + + for (FlowEdge* predEdge : header->PredEdges()) + { + BasicBlock* predBlock = predEdge->getSourceBlock(); + if (dfs->Contains(predBlock) && dfs->IsAncestor(header, predBlock)) + { + if (loop == nullptr) + { + loop = new (comp, CMK_Loops) FlowGraphNaturalLoop(dfs, header); + JITDUMP("\n"); + } + + JITDUMP(FMT_BB " -> " FMT_BB " is a backedge\n", predBlock->bbNum, header->bbNum); + loop->m_backEdges.push_back(predEdge); + } + } + + if (loop == nullptr) + { + continue; + } + + JITDUMP(FMT_BB " is the header of a DFS loop with %zu back edges\n", header->bbNum, loop->m_backEdges.size()); + + // Now walk back in flow along the back edges from head to determine if + // this is a natural loop and to find all the blocks in the loop. + // + + worklist.clear(); + loop->m_blocksSize = loop->m_header->bbPostorderNum + 1; + + BitVecTraits loopTraits = loop->LoopBlockTraits(); + loop->m_blocks = BitVecOps::MakeEmpty(&loopTraits); + + if (!FindNaturalLoopBlocks(loop, worklist)) + { + loops->m_improperLoopHeaders++; + continue; + } + + JITDUMP("Loop has %u blocks\n", BitVecOps::Count(&loopTraits, loop->m_blocks)); + + // Find the exit edges + // + loop->VisitLoopBlocks([=](BasicBlock* loopBlock) { + loopBlock->VisitRegularSuccs(comp, [=](BasicBlock* succBlock) { + if (!loop->ContainsBlock(succBlock)) + { + FlowEdge* const exitEdge = comp->fgGetPredForBlock(succBlock, loopBlock); + JITDUMP(FMT_BB " -> " FMT_BB " is an exit edge\n", loopBlock->bbNum, succBlock->bbNum); + loop->m_exitEdges.push_back(exitEdge); + } + + return BasicBlockVisit::Continue; + }); + + return BasicBlockVisit::Continue; + }); + + // Find the entry edges + // + // Note if fgEntryBB is a loop head we won't have an entry edge. + // So it needs to be special cased later on when processing + // entry edges. + // + for (FlowEdge* const predEdge : loop->m_header->PredEdges()) + { + BasicBlock* predBlock = predEdge->getSourceBlock(); + if (dfs->Contains(predBlock) && !dfs->IsAncestor(header, predEdge->getSourceBlock())) + { + JITDUMP(FMT_BB " -> " FMT_BB " is an entry edge\n", predEdge->getSourceBlock()->bbNum, + loop->m_header->bbNum); + loop->m_entryEdges.push_back(predEdge); + } + } + + // Search for parent loop. + // + // Since loops record in outer->inner order the parent will be the + // most recently recorded loop that contains this loop's header. + // + for (FlowGraphNaturalLoop* const otherLoop : loops->InPostOrder()) + { + if (otherLoop->ContainsBlock(header)) + { + loop->m_parent = otherLoop; + JITDUMP("Nested within loop starting at " FMT_BB "\n", otherLoop->GetHeader()->bbNum); + break; + } + } + +#ifdef DEBUG + // In debug, validate nestedness versus other loops. + // + for (FlowGraphNaturalLoop* const otherLoop : loops->InPostOrder()) + { + if (otherLoop->ContainsBlock(header)) + { + // Ancestor loop; should contain all blocks of this loop + // + loop->VisitLoopBlocks([otherLoop](BasicBlock* loopBlock) { + assert(otherLoop->ContainsBlock(loopBlock)); + return BasicBlockVisit::Continue; + }); + } + else + { + // Non-ancestor loop; should have no blocks in common with current loop + // + loop->VisitLoopBlocks([otherLoop](BasicBlock* loopBlock) { + assert(!otherLoop->ContainsBlock(loopBlock)); + return BasicBlockVisit::Continue; + }); + } + } +#endif + + // Record this loop + // + loop->m_index = (unsigned)loops->m_loops.size(); + loops->m_loops.push_back(loop); + + JITDUMP("Added loop " FMT_LP " with header " FMT_BB "\n", loop->GetIndex(), loop->GetHeader()->bbNum); + } + + if (loops->m_loops.size() > 0) + { + JITDUMP("\nFound %zu loops\n", loops->m_loops.size()); + } + + if (loops->m_improperLoopHeaders > 0) + { + JITDUMP("Rejected %u loop headers\n", loops->m_improperLoopHeaders); + } + + return loops; +} + +//------------------------------------------------------------------------ +// FlowGraphNaturalLoops::FindNaturalLoopBlocks: Find the loop blocks for a +// specified loop. +// +// Parameters: +// loop - The natural loop we are constructing +// worklist - Scratch worklist to use for the search +// +// Returns: +// True if the loop is natural; marks the loop blocks into 'loop' as part of +// the search. +// +bool FlowGraphNaturalLoops::FindNaturalLoopBlocks(FlowGraphNaturalLoop* loop, jitstd::list& worklist) +{ + const FlowGraphDfsTree* tree = loop->m_tree; + Compiler* comp = tree->GetCompiler(); + BitVecTraits loopTraits = loop->LoopBlockTraits(); + BitVecOps::AddElemD(&loopTraits, loop->m_blocks, 0); + + // Seed the worklist + // + worklist.clear(); + for (FlowEdge* backEdge : loop->m_backEdges) + { + BasicBlock* const backEdgeSource = backEdge->getSourceBlock(); + if (backEdgeSource == loop->GetHeader()) + { + continue; + } + + assert(!BitVecOps::IsMember(&loopTraits, loop->m_blocks, loop->LoopBlockBitVecIndex(backEdgeSource))); + worklist.push_back(backEdgeSource); + BitVecOps::AddElemD(&loopTraits, loop->m_blocks, loop->LoopBlockBitVecIndex(backEdgeSource)); + } + + // Work back through flow to loop head or to another pred + // that is clearly outside the loop. + // + while (!worklist.empty()) + { + BasicBlock* const loopBlock = worklist.back(); + worklist.pop_back(); + + for (FlowEdge* predEdge = comp->BlockPredsWithEH(loopBlock); predEdge != nullptr; + predEdge = predEdge->getNextPredEdge()) + { + BasicBlock* const predBlock = predEdge->getSourceBlock(); + + if (!tree->Contains(predBlock)) + { + continue; + } + + // Head cannot dominate `predBlock` unless it is a DFS ancestor. + // + if (!tree->IsAncestor(loop->GetHeader(), predBlock)) + { + JITDUMP("Loop is not natural; witness " FMT_BB " -> " FMT_BB "\n", predBlock->bbNum, loopBlock->bbNum); + return false; + } + + if (BitVecOps::TryAddElemD(&loopTraits, loop->m_blocks, loop->LoopBlockBitVecIndex(predBlock))) + { + worklist.push_back(predBlock); + } + } + } + + return true; +} diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 73a8474324d395..d8f2e472b7efcc 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -14124,8 +14124,9 @@ PhaseStatus Compiler::fgMorphBlocks() // We are done with the global morphing phase // - fgGlobalMorph = false; - compCurBB = nullptr; + fgGlobalMorph = false; + fgGlobalMorphDone = true; + compCurBB = nullptr; #ifdef DEBUG if (optLocalAssertionProp) diff --git a/src/coreclr/jit/ssabuilder.cpp b/src/coreclr/jit/ssabuilder.cpp index 9bedef7bc937e5..cab35f7ae654a7 100644 --- a/src/coreclr/jit/ssabuilder.cpp +++ b/src/coreclr/jit/ssabuilder.cpp @@ -61,6 +61,9 @@ PhaseStatus Compiler::fgSsaBuild() fgResetForSsa(); } + // Reset BlockPredsWithEH cache. + m_blockToEHPreds = nullptr; + SsaBuilder builder(this); builder.Build(); fgSsaPassesCompleted++; @@ -69,8 +72,6 @@ PhaseStatus Compiler::fgSsaBuild() JitTestCheckSSA(); #endif // DEBUG - fgSSAPostOrder = builder.GetPostOrder(&fgSSAPostOrderCount); - return PhaseStatus::MODIFIED_EVERYTHING; } @@ -136,88 +137,6 @@ SsaBuilder::SsaBuilder(Compiler* pCompiler) { } -//------------------------------------------------------------------------ -// TopologicalSort: Topologically sort the graph and return the number of nodes visited. -// -// Arguments: -// postOrder - The array in which the arranged basic blocks have to be returned. -// count - The size of the postOrder array. -// -// Return Value: -// The number of nodes visited while performing DFS on the graph. -// -unsigned SsaBuilder::TopologicalSort(BasicBlock** postOrder, int count) -{ - Compiler* comp = m_pCompiler; - - // TopologicalSort is called first so m_visited should already be empty - assert(BitVecOps::IsEmpty(&m_visitedTraits, m_visited)); - - // Display basic blocks. - DBEXEC(VERBOSE, comp->fgDispBasicBlocks()); - DBEXEC(VERBOSE, comp->fgDispHandlerTab()); - - auto DumpBlockAndSuccessors = [](Compiler* comp, BasicBlock* block) { -#ifdef DEBUG - if (comp->verboseSsa) - { - printf("[SsaBuilder::TopologicalSort] Pushing " FMT_BB ": [", block->bbNum); - AllSuccessorEnumerator successors(comp, block); - unsigned index = 0; - while (true) - { - BasicBlock* succ = successors.NextSuccessor(comp); - - if (succ == nullptr) - { - break; - } - - printf("%s" FMT_BB, (index++ ? ", " : ""), succ->bbNum); - } - printf("]\n"); - } -#endif - }; - - // Compute order. - unsigned postIndex = 0; - BasicBlock* block = comp->fgFirstBB; - BitVecOps::AddElemD(&m_visitedTraits, m_visited, block->bbNum); - - ArrayStack blocks(m_allocator); - blocks.Emplace(comp, block); - DumpBlockAndSuccessors(comp, block); - - while (!blocks.Empty()) - { - BasicBlock* block = blocks.TopRef().Block(); - BasicBlock* succ = blocks.TopRef().NextSuccessor(comp); - - if (succ != nullptr) - { - // if the block on TOS still has unreached successors, visit them - if (BitVecOps::TryAddElemD(&m_visitedTraits, m_visited, succ->bbNum)) - { - blocks.Emplace(comp, succ); - DumpBlockAndSuccessors(comp, succ); - } - } - else - { - // all successors have been visited - blocks.Pop(); - - DBG_SSA_JITDUMP("[SsaBuilder::TopologicalSort] postOrder[%u] = " FMT_BB "\n", postIndex, block->bbNum); - postOrder[postIndex] = block; - block->bbPostorderNum = postIndex; - postIndex++; - } - } - - return postIndex; -} - /** * Computes the immediate dominator IDom for each block iteratively. * @@ -226,10 +145,14 @@ unsigned SsaBuilder::TopologicalSort(BasicBlock** postOrder, int count) * * @see "A simple, fast dominance algorithm." paper. */ -void SsaBuilder::ComputeImmediateDom(BasicBlock** postOrder, int count) +void SsaBuilder::ComputeImmediateDom() { JITDUMP("[SsaBuilder::ComputeImmediateDom]\n"); + FlowGraphDfsTree* dfs = m_pCompiler->m_dfs; + BasicBlock** postOrder = dfs->GetPostOrder(); + unsigned count = dfs->GetPostOrderCount(); + // Add entry point to visited as its IDom is NULL. assert(postOrder[count - 1] == m_pCompiler->fgFirstBB); @@ -604,14 +527,15 @@ void SsaBuilder::AddPhiArg( * * To do so, the function computes liveness, dominance frontier and inserts a phi node, * if we have var v in def(b) and live-in(l) and l is in DF(b). - * - * @param postOrder The array of basic blocks arranged in postOrder. - * @param count The size of valid elements in the postOrder array. */ -void SsaBuilder::InsertPhiFunctions(BasicBlock** postOrder, int count) +void SsaBuilder::InsertPhiFunctions() { JITDUMP("*************** In SsaBuilder::InsertPhiFunctions()\n"); + FlowGraphDfsTree* dfs = m_pCompiler->m_dfs; + BasicBlock** postOrder = dfs->GetPostOrder(); + unsigned count = dfs->GetPostOrderCount(); + // Compute dominance frontier. BlkToBlkVectorMap mapDF(m_allocator); ComputeDominanceFrontiers(postOrder, count, &mapDF); @@ -622,7 +546,7 @@ void SsaBuilder::InsertPhiFunctions(BasicBlock** postOrder, int count) JITDUMP("Inserting phi functions:\n"); - for (int i = 0; i < count; ++i) + for (unsigned i = 0; i < count; ++i) { BasicBlock* block = postOrder[i]; DBG_SSA_JITDUMP("Considering dominance frontier of block " FMT_BB ":\n", block->bbNum); @@ -1494,8 +1418,6 @@ void SsaBuilder::Build() // Allocate the postOrder array for the graph. - m_postOrder = new (m_allocator) BasicBlock*[blockCount]; - m_visitedTraits = BitVecTraits(blockCount, m_pCompiler); m_visited = BitVecOps::MakeEmpty(&m_visitedTraits); @@ -1511,13 +1433,10 @@ void SsaBuilder::Build() block->bbPostorderNum = 0; } - // Topologically sort the graph. - m_postOrderCount = TopologicalSort(m_postOrder, blockCount); - JITDUMP("[SsaBuilder] Topologically sorted the graph.\n"); - EndPhase(PHASE_BUILD_SSA_TOPOSORT); + m_pCompiler->m_dfs = m_pCompiler->fgComputeDfs(); // Compute IDom(b). - ComputeImmediateDom(m_postOrder, m_postOrderCount); + ComputeImmediateDom(); m_pCompiler->fgSsaDomTree = m_pCompiler->fgBuildDomTree(); EndPhase(PHASE_BUILD_SSA_DOMS); @@ -1536,7 +1455,7 @@ void SsaBuilder::Build() } // Insert phi functions. - InsertPhiFunctions(m_postOrder, m_postOrderCount); + InsertPhiFunctions(); // Rename local variables and collect UD information for each ssa var. RenameVariables(); diff --git a/src/coreclr/jit/ssabuilder.h b/src/coreclr/jit/ssabuilder.h index 92faf0f21d05ef..9dd405d774cfc7 100644 --- a/src/coreclr/jit/ssabuilder.h +++ b/src/coreclr/jit/ssabuilder.h @@ -33,29 +33,15 @@ class SsaBuilder // variable are stored in the "per SSA data" on the local descriptor. void Build(); - BasicBlock** GetPostOrder(unsigned* count) - { - *count = m_postOrderCount; - return m_postOrder; - } - private: // Ensures that the basic block graph has a root for the dominator graph, by ensuring // that there is a first block that is not in a try region (adding an empty block for that purpose // if necessary). Eventually should move to Compiler. void SetupBBRoot(); - // Requires "postOrder" to be an array of size "count". Requires "count" to at least - // be the size of the flow graph. Sorts the current compiler's flow-graph and places - // the blocks in post order (i.e., a node's children first) in the array. Returns the - // number of nodes visited while sorting the graph. In other words, valid entries in - // the output array. - unsigned TopologicalSort(BasicBlock** postOrder, int count); - - // Requires "postOrder" to hold the blocks of the flowgraph in topologically sorted - // order. Requires count to be the valid entries in the "postOrder" array. Computes - // each block's immediate dominator and records it in the BasicBlock in bbIDom. - void ComputeImmediateDom(BasicBlock** postOrder, int count); + // Computes each block's immediate dominator and records it in the + // BasicBlock in bbIDom. + void ComputeImmediateDom(); // Compute flow graph dominance frontiers. void ComputeDominanceFrontiers(BasicBlock** postOrder, int count, BlkToBlkVectorMap* mapDF); @@ -73,7 +59,7 @@ class SsaBuilder // Requires "postOrder" to hold the blocks of the flowgraph in topologically sorted order. Requires // count to be the valid entries in the "postOrder" array. Inserts GT_PHI nodes at the beginning // of basic blocks that require them. - void InsertPhiFunctions(BasicBlock** postOrder, int count); + void InsertPhiFunctions(); // Rename all definitions and uses within the compiled method. void RenameVariables(); @@ -110,6 +96,4 @@ class SsaBuilder BitVec m_visited; SsaRenameState m_renameStack; - BasicBlock** m_postOrder = nullptr; - unsigned m_postOrderCount = 0; }; diff --git a/src/coreclr/jit/valuenum.cpp b/src/coreclr/jit/valuenum.cpp index 774ddd877c0bed..433df197a7d92d 100644 --- a/src/coreclr/jit/valuenum.cpp +++ b/src/coreclr/jit/valuenum.cpp @@ -9668,8 +9668,7 @@ class ValueNumberState // bool IsReachable(BasicBlock* bb) { - return (bb->bbPostorderNum < m_comp->fgSSAPostOrderCount) && - (m_comp->fgSSAPostOrder[bb->bbPostorderNum] == bb) && + return m_comp->m_dfs->Contains(bb) && !BitVecOps::IsMember(&m_blockTraits, m_provenUnreachableBlocks, bb->bbNum); } @@ -9850,9 +9849,11 @@ PhaseStatus Compiler::fgValueNumber() // SSA has already computed a post-order taking EH successors into account. // Visiting that in reverse will ensure we visit a block's predecessors // before itself whenever possible. - for (unsigned i = fgSSAPostOrderCount; i != 0; i--) + BasicBlock** postOrder = m_dfs->GetPostOrder(); + unsigned postOrderCount = m_dfs->GetPostOrderCount(); + for (unsigned i = postOrderCount; i != 0; i--) { - BasicBlock* block = fgSSAPostOrder[i - 1]; + BasicBlock* block = postOrder[i - 1]; JITDUMP("Visiting " FMT_BB "\n", block->bbNum); if (block != fgFirstBB)