From f4a156931dd0ddc6fad45366fdb8b8ca812cc11f Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sun, 26 Nov 2023 22:54:25 +0100 Subject: [PATCH 1/4] JIT: Factor SSA's DFS and profile synthesis's loop finding Factor out SSA's general DFS (that takes EH into account) and encapsulate it in a `FlowGraphDfsTree` class. Factor out profile synthesis's loop finding and encapsulate it in a `FlowGraphNaturalLoops` class. Switch construction of it to use the general DFS instead of the restricted one (that does not account for exceptional flow). Optimize a few things in the process: * Avoid storing loop blocks in a larger than necessary bit vector; store them starting from the loop header's postorder index instead. * Provide post-order and reverse post-order visitors for the loop blocks; switch profile synthesis to use this in a place No diffs are expected. A small amount of diffs are expected when profile synthesis is enabled due to the modelling of exceptional flow and also from handling unreachable predecessors (which would reject some loops as unnatural loops before). My future plans are to proceed to replace the loop representation of loops with this factored version, removing the lexicality requirement in the process, and hopefully fixing some of our deficiencies. --- src/coreclr/jit/bitsetasshortlong.h | 118 ++++++ src/coreclr/jit/block.h | 2 +- src/coreclr/jit/compiler.h | 197 ++++++++- src/coreclr/jit/compiler.hpp | 98 ++++- src/coreclr/jit/compmemkind.h | 2 + src/coreclr/jit/fgbasic.cpp | 2 + src/coreclr/jit/fgdiagnostic.cpp | 4 + src/coreclr/jit/fgprofilesynthesis.cpp | 497 ++-------------------- src/coreclr/jit/fgprofilesynthesis.h | 57 +-- src/coreclr/jit/flowgraph.cpp | 557 +++++++++++++++++++++++++ src/coreclr/jit/morph.cpp | 5 +- src/coreclr/jit/ssabuilder.cpp | 115 +---- src/coreclr/jit/ssabuilder.h | 24 +- src/coreclr/jit/valuenum.cpp | 9 +- 14 files changed, 1050 insertions(+), 637 deletions(-) diff --git a/src/coreclr/jit/bitsetasshortlong.h b/src/coreclr/jit/bitsetasshortlong.h index 5a2e315673cbe0..2ef293820fd264 100644 --- a/src/coreclr/jit/bitsetasshortlong.h +++ b/src/coreclr/jit/bitsetasshortlong.h @@ -561,6 +561,124 @@ class BitSetOps + static bool VisitBits(Env env, BitSetShortLongRep bs, TFunc func) + { +#ifdef HOST_64BIT +#define BitScanForwardSizeT BitScanForward64 +#else +#define BitScanForwardSizeT BitScanForward +#endif + + if (BitSetOps::IsShort(env)) + { + size_t bits = reinterpret_cast(bs); + DWORD index; + while (BitScanForwardSizeT(&index, bits)) + { + if (!func(index)) + return false; + + bits ^= size_t(1) << index; + } + } + else + { + unsigned len = BitSetTraits::GetArrSize(env); + for (unsigned i = 0; i < len; i++) + { + size_t bits = bs[i]; + DWORD index; + while (BitScanForwardSizeT(&index, bits)) + { + if (!func(i * BitsInSizeT + index)) + return false; + + bits ^= size_t(1) << index; + } + } + } + + return true; +#undef BitScanForwardSizeT + } + + //------------------------------------------------------------------------ + // VisitBitsReverse: Invoke a callback for each index that is set in the + // bit vector, in descending order of indices. + // + // Type parameters: + // TFunc - Type of callback functor + // + // Arguments: + // env - The traits + // bs - The bit vector + // func - The functor callback. Return true to continue to the next bit, + // and false to abort. + // + // Returns: + // True if all bits were iterated; false if the callback returned false + // and iteration was aborted. + // + template + static bool VisitBitsReverse(Env env, BitSetShortLongRep bs, TFunc func) + { +#ifdef HOST_64BIT +#define BitScanReverseSizeT BitScanReverse64 +#else +#define BitScanReverseSizeT BitScanReverse +#endif + + if (BitSetOps::IsShort(env)) + { + size_t bits = reinterpret_cast(bs); + DWORD index; + while (BitScanReverseSizeT(&index, bits)) + { + if (!func(index)) + return false; + + bits ^= size_t(1) << index; + } + } + else + { + unsigned len = BitSetTraits::GetArrSize(env); + for (unsigned i = len; i != 0; i--) + { + size_t bits = bs[i - 1]; + DWORD index; + while (BitScanReverseSizeT(&index, bits)) + { + if (!func((i - 1) * BitsInSizeT + index)) + return false; + + bits ^= size_t(1) << index; + } + } + } + + return true; +#undef BitScanReverseSizeT + } + typedef const BitSetShortLongRep& ValArgType; typedef BitSetShortLongRep RetValType; }; diff --git a/src/coreclr/jit/block.h b/src/coreclr/jit/block.h index 307b3580f92942..9a7df5016c49d9 100644 --- a/src/coreclr/jit/block.h +++ b/src/coreclr/jit/block.h @@ -2097,7 +2097,7 @@ class AllSuccessorEnumerator } // Returns the next available successor or `nullptr` if there are no more successors. - BasicBlock* NextSuccessor(Compiler* comp) + BasicBlock* NextSuccessor() { m_curSucc++; if (m_curSucc >= m_numSuccs) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index d3eabe34466c4d..ba8603de78debb 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -1957,6 +1957,197 @@ inline LoopFlags& operator&=(LoopFlags& a, LoopFlags b) return a = (LoopFlags)((unsigned short)a & (unsigned short)b); } +// Represents a depth-first search tree of the flow graph. +class FlowGraphDfsTree +{ + Compiler* m_comp; + BasicBlock** m_postOrder; + unsigned m_postOrderCount; + +public: + FlowGraphDfsTree(Compiler* comp, BasicBlock** postOrder, unsigned postOrderCount) + : m_comp(comp) + , m_postOrder(postOrder) + , m_postOrderCount(postOrderCount) + { + } + + Compiler* GetCompiler() const + { + return m_comp; + } + + BasicBlock** GetPostOrder() const + { + return m_postOrder; + } + + unsigned GetPostOrderCount() const + { + return m_postOrderCount; + } + + BitVecTraits PostOrderTraits() const + { + return BitVecTraits(m_postOrderCount, m_comp); + } + + bool Contains(BasicBlock* block) const; + bool IsAncestor(BasicBlock* ancestor, BasicBlock* descendant) const; +}; + +class FlowGraphNaturalLoop +{ + friend class FlowGraphNaturalLoops; + + const FlowGraphDfsTree* m_tree; + BasicBlock* m_header; + FlowGraphNaturalLoop* m_parent = nullptr; + // Bit vector of blocks in the loop; each index is the RPO index a block, + // with the head block's RPO index subtracted. + BitVec m_blocks; + unsigned m_blocksSize = 0; + jitstd::vector m_backEdges; + jitstd::vector m_entryEdges; + jitstd::vector m_exitEdges; + unsigned m_index = 0; + + FlowGraphNaturalLoop(const FlowGraphDfsTree* tree, BasicBlock* head); + + unsigned LoopBlockBitVecIndex(BasicBlock* block); + bool TryGetLoopBlockBitVecIndex(BasicBlock* block, unsigned* pIndex); + + BitVecTraits LoopBlockTraits(); +public: + BasicBlock* GetHeader() const + { + return m_header; + } + + const FlowGraphDfsTree* GetDfsTree() const + { + return m_tree; + } + + FlowGraphNaturalLoop* GetParent() const + { + return m_parent; + } + + unsigned GetIndex() const + { + return m_index; + } + + const jitstd::vector& BackEdges() + { + return m_backEdges; + } + + const jitstd::vector& EntryEdges() + { + return m_entryEdges; + } + + const jitstd::vector& ExitEdges() + { + return m_exitEdges; + } + + bool ContainsBlock(BasicBlock* block); + + template + BasicBlockVisit VisitLoopBlocksReversePostOrder(TFunc func); + + template + BasicBlockVisit VisitLoopBlocksPostOrder(TFunc func); + + template + BasicBlockVisit VisitLoopBlocks(TFunc func); +}; + +class FlowGraphNaturalLoops +{ + const FlowGraphDfsTree* m_dfs; + jitstd::vector m_loops; + unsigned m_improperLoopHeaders = 0; + + FlowGraphNaturalLoops(const FlowGraphDfsTree* dfs); + +public: + size_t NumLoops() + { + return m_loops.size(); + } + + bool HasNonNaturalLoopCycles() + { + return m_improperLoopHeaders> 0; + } + + FlowGraphNaturalLoop* GetLoopFromHeader(BasicBlock* header); + + bool IsLoopBackEdge(FlowEdge* edge); + bool IsLoopExitEdge(FlowEdge* edge); + + class LoopsPostOrderIter + { + jitstd::vector* m_loops; + + public: + LoopsPostOrderIter(jitstd::vector* loops) + : m_loops(loops) + { + } + + jitstd::vector::reverse_iterator begin() + { + return m_loops->rbegin(); + } + + jitstd::vector::reverse_iterator end() + { + return m_loops->rend(); + } + }; + + class LoopsReversePostOrderIter + { + jitstd::vector* m_loops; + + public: + LoopsReversePostOrderIter(jitstd::vector* loops) + : m_loops(loops) + { + } + + jitstd::vector::iterator begin() + { + return m_loops->begin(); + } + + jitstd::vector::iterator end() + { + return m_loops->end(); + } + }; + + // Iterate the loops in post order (child loops before parent loops) + LoopsPostOrderIter InPostOrder() + { + return LoopsPostOrderIter(&m_loops); + } + + // Iterate the loops in reverse post order (parent loops before child loops) + LoopsReversePostOrderIter InReversePostOrder() + { + return LoopsReversePostOrderIter(&m_loops); + } + + static FlowGraphNaturalLoops* Find(const FlowGraphDfsTree* dfs); + static bool FindNaturalLoopBlocks(FlowGraphNaturalLoop* loop, jitstd::list& worklist); +}; + // The following holds information about instr offsets in terms of generated code. enum class IPmappingDscKind @@ -2052,6 +2243,7 @@ class Compiler friend class LocalsUseVisitor; friend class Promotion; friend class ReplaceVisitor; + friend class FlowGraphNaturalLoop; #ifdef FEATURE_HW_INTRINSICS friend struct HWIntrinsicInfo; @@ -4493,8 +4685,7 @@ class Compiler unsigned fgBBNumMax; // The max bbNum that has been assigned to basic blocks unsigned fgDomBBcount; // # of BBs for which we have dominator and reachability information BasicBlock** fgBBReversePostorder; // Blocks in reverse postorder - BasicBlock** fgSSAPostOrder; // Blocks in postorder, computed during SSA - unsigned fgSSAPostOrderCount; // Number of blocks in fgSSAPostOrder + FlowGraphDfsTree* m_dfs; // After the dominance tree is computed, we cache a DFS preorder number and DFS postorder number to compute // dominance queries in O(1). fgDomTreePreOrder and fgDomTreePostOrder are arrays giving the block's preorder and @@ -5588,6 +5779,8 @@ class Compiler PhaseStatus fgSetBlockOrder(); + FlowGraphDfsTree* fgComputeDfs(); + void fgRemoveReturnBlock(BasicBlock* block); void fgConvertBBToThrowBB(BasicBlock* block); diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index 26f4392326a0dd..e8b58faf4dfaa3 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -591,9 +591,14 @@ BasicBlockVisit BasicBlock::VisitAllSuccs(Compiler* comp, TFunc func) switch (bbJumpKind) { case BBJ_EHFINALLYRET: - for (unsigned i = 0; i < bbJumpEhf->bbeCount; i++) + // This can run before import, in which case we haven't converted + // LEAVE into callfinally yet, and haven't added return successors. + if (bbJumpEhf != nullptr) { - RETURN_ON_ABORT(func(bbJumpEhf->bbeSuccs[i])); + for (unsigned i = 0; i < bbJumpEhf->bbeCount; i++) + { + RETURN_ON_ABORT(func(bbJumpEhf->bbeSuccs[i])); + } } return VisitEHSuccs(comp, func); @@ -673,9 +678,14 @@ BasicBlockVisit BasicBlock::VisitRegularSuccs(Compiler* comp, TFunc func) switch (bbJumpKind) { case BBJ_EHFINALLYRET: - for (unsigned i = 0; i < bbJumpEhf->bbeCount; i++) + // This can run before import, in which case we haven't converted + // LEAVE into callfinally yet, and haven't added return successors. + if (bbJumpEhf != nullptr) { - RETURN_ON_ABORT(func(bbJumpEhf->bbeSuccs[i])); + for (unsigned i = 0; i < bbJumpEhf->bbeCount; i++) + { + RETURN_ON_ABORT(func(bbJumpEhf->bbeSuccs[i])); + } } return BasicBlockVisit::Continue; @@ -4936,6 +4946,86 @@ inline bool Compiler::compCanHavePatchpoints(const char** reason) return whyNot == nullptr; } +//------------------------------------------------------------------------------ +// FlowGraphNaturalLoop::VisitLoopBlocksReversePostOrder: Visit all of the +// loop's blocks in reverse post order. +// +// Type parameters: +// TFunc - Callback functor type +// +// Arguments: +// func - Callback functor that takes a BasicBlock* and returns a +// BasicBlockVisit. +// +// Returns: +// BasicBlockVisit that indicated whether the visit was aborted by the +// callback or whether all blocks were visited. +// +template +BasicBlockVisit FlowGraphNaturalLoop::VisitLoopBlocksReversePostOrder(TFunc func) +{ + BitVecTraits traits(m_blocksSize, m_tree->GetCompiler()); + bool result = BitVecOps::VisitBits(&traits, m_blocks, [=](unsigned index) { + // head block rpo index = PostOrderCount - 1 - headPreOrderIndex + // loop block rpo index = head block rpoIndex + index + // loop block po index = PostOrderCount - 1 - loop block rpo index + // = headPreOrderIndex - index + unsigned poIndex = m_header->bbPostorderNum - index; + assert(poIndex < m_tree->GetPostOrderCount()); + return func(m_tree->GetPostOrder()[poIndex]) == BasicBlockVisit::Continue; + }); + + return result ? BasicBlockVisit::Continue : BasicBlockVisit::Abort; +} + +//------------------------------------------------------------------------------ +// FlowGraphNaturalLoop::VisitLoopBlocksPostOrder: Visit all of the loop's +// blocks in post order. +// +// Type parameters: +// TFunc - Callback functor type +// +// Arguments: +// func - Callback functor that takes a BasicBlock* and returns a +// BasicBlockVisit. +// +// Returns: +// BasicBlockVisit that indicated whether the visit was aborted by the +// callback or whether all blocks were visited. +// +template +BasicBlockVisit FlowGraphNaturalLoop::VisitLoopBlocksPostOrder(TFunc func) +{ + BitVecTraits traits(m_blocksSize, m_tree->GetCompiler()); + bool result = BitVecOps::VisitBitsReverse(&traits, m_blocks, [=](unsigned index) { + unsigned poIndex = m_header->bbPostorderNum - index; + assert(poIndex < m_tree->GetPostOrderCount()); + return func(m_tree->GetPostOrder()[poIndex]) == BasicBlockVisit::Continue; + }); + + return result ? BasicBlockVisit::Continue : BasicBlockVisit::Abort; +} + +//------------------------------------------------------------------------------ +// FlowGraphNaturalLoop::VisitLoopBlocks: Visit all of the loop's blocks. +// +// Type parameters: +// TFunc - Callback functor type +// +// Arguments: +// func - Callback functor that takes a BasicBlock* and returns a +// BasicBlockVisit. +// +// Returns: +// BasicBlockVisit that indicated whether the visit was aborted by the +// callback or whether all blocks were visited. +// +template +BasicBlockVisit FlowGraphNaturalLoop::VisitLoopBlocks(TFunc func) +{ + return VisitLoopBlocksReversePostOrder(func); +} + /*****************************************************************************/ #endif //_COMPILER_HPP_ /*****************************************************************************/ diff --git a/src/coreclr/jit/compmemkind.h b/src/coreclr/jit/compmemkind.h index 645a6b44f80ee2..3112cba822d1bc 100644 --- a/src/coreclr/jit/compmemkind.h +++ b/src/coreclr/jit/compmemkind.h @@ -17,6 +17,8 @@ CompMemKindMacro(ImpStack) CompMemKindMacro(BasicBlock) CompMemKindMacro(CallArgs) CompMemKindMacro(FlowEdge) +CompMemKindMacro(DepthFirstSearch) +CompMemKindMacro(Loops) CompMemKindMacro(TreeStatementList) CompMemKindMacro(SiScope) CompMemKindMacro(DominatorMemory) diff --git a/src/coreclr/jit/fgbasic.cpp b/src/coreclr/jit/fgbasic.cpp index 34ca63d39a2cd4..fd665d64543b52 100644 --- a/src/coreclr/jit/fgbasic.cpp +++ b/src/coreclr/jit/fgbasic.cpp @@ -68,6 +68,8 @@ void Compiler::fgInit() fgBBVarSetsInited = false; fgReturnCount = 0; + m_dfs = nullptr; + // Initialize BlockSet data. fgCurBBEpoch = 0; fgCurBBEpochSize = 0; diff --git a/src/coreclr/jit/fgdiagnostic.cpp b/src/coreclr/jit/fgdiagnostic.cpp index 8ddc9aecf84121..cf703b04869269 100644 --- a/src/coreclr/jit/fgdiagnostic.cpp +++ b/src/coreclr/jit/fgdiagnostic.cpp @@ -2970,6 +2970,7 @@ void Compiler::fgDebugCheckBBlist(bool checkBBNum /* = false */, bool checkBBRef bool allNodesLinked = (fgNodeThreading == NodeThreading::AllTrees) || (fgNodeThreading == NodeThreading::LIR); unsigned numBlocks = 0; + unsigned maxBBNum = 0; for (BasicBlock* const block : Blocks()) { @@ -2981,6 +2982,8 @@ void Compiler::fgDebugCheckBBlist(bool checkBBNum /* = false */, bool checkBBRef assert(block->IsLast() || (block->bbNum + 1 == block->Next()->bbNum)); } + maxBBNum = max(maxBBNum, block->bbNum); + // Check that all the successors have the current traversal stamp. Use the 'Compiler*' version of the // iterator, but not for BBJ_SWITCH: we don't want to end up calling GetDescriptorForSwitch(), which will // dynamically create the unique switch list. @@ -3184,6 +3187,7 @@ void Compiler::fgDebugCheckBBlist(bool checkBBNum /* = false */, bool checkBBRef } assert(fgBBcount == numBlocks); + assert(fgBBNumMax >= maxBBNum); // Make sure the one return BB is not changed. if (genReturnBB != nullptr) diff --git a/src/coreclr/jit/fgprofilesynthesis.cpp b/src/coreclr/jit/fgprofilesynthesis.cpp index c9204c1e971e6f..d048caa504cb78 100644 --- a/src/coreclr/jit/fgprofilesynthesis.cpp +++ b/src/coreclr/jit/fgprofilesynthesis.cpp @@ -33,8 +33,8 @@ // void ProfileSynthesis::Run(ProfileSynthesisOption option) { - BuildReversePostorder(); - FindLoops(); + m_dfs = m_comp->fgComputeDfs(); + m_loops = FlowGraphNaturalLoops::Find(m_dfs); // Retain or compute edge likelihood information // @@ -176,111 +176,6 @@ void ProfileSynthesis::AssignLikelihoods() } } -//------------------------------------------------------------------------ -// IsDfsAncestor: see if block `x` is ancestor of block `y` in the depth -// first spanning tree -// -// Arguments: -// x -- block that is possible ancestor -// y -- block that is possible descendant -// -// Returns: -// True if x is ancestor of y in the depth first spanning tree. -// -// Notes: -// If return value is false, then x does not dominate y. -// -bool ProfileSynthesis::IsDfsAncestor(BasicBlock* x, BasicBlock* y) -{ - return ((x->bbPreorderNum <= y->bbPreorderNum) && (y->bbPostorderNum <= x->bbPostorderNum)); -} - -//------------------------------------------------------------------------ -// GetLoopFromHeader: see if a block is a loop header, and if so return -// the associated loop. -// -// Arguments: -// block - block in question -// -// Returns: -// loop headed by block, or nullptr -// -SimpleLoop* ProfileSynthesis::GetLoopFromHeader(BasicBlock* block) -{ - for (SimpleLoop* loop : *m_loops) - { - if (loop->m_head == block) - { - return loop; - } - } - - return nullptr; -} - -//------------------------------------------------------------------------ -// IsLoopBackEdge: see if an edge is a loop back edge -// -// Arguments: -// edge - edge in question -// -// Returns: -// True if edge is a backedge in some recognized loop. -// -// Notes: -// Different than asking IsDfsAncestor since we disqualify some -// natural backedges for complex loop strctures. -// -// Todo: -// Annotate the edge directly -// -bool ProfileSynthesis::IsLoopBackEdge(FlowEdge* edge) -{ - for (SimpleLoop* loop : *m_loops) - { - for (FlowEdge* loopBackEdge : loop->m_backEdges) - { - if (loopBackEdge == edge) - { - return true; - } - } - } - - return false; -} - -//------------------------------------------------------------------------ -// IsLoopExitEdge: see if a flow edge is a loop exit edge -// -// Arguments: -// edge - edge in question -// -// Returns: -// True if edge is an exit edge in some recognized loop -// -// Todo: -// Annotate the edge directly -// -// Decide if we want to report that the edge exits -// multiple loops. - -bool ProfileSynthesis::IsLoopExitEdge(FlowEdge* edge) -{ - for (SimpleLoop* loop : *m_loops) - { - for (FlowEdge* loopExitEdge : loop->m_exitEdges) - { - if (loopExitEdge == edge) - { - return true; - } - } - } - - return false; -} - //------------------------------------------------------------------------ // AssignLikelihoodNext: update edge likelihood for block that always // transfers control to bbNext @@ -353,8 +248,8 @@ void ProfileSynthesis::AssignLikelihoodCond(BasicBlock* block) // LOOP BACK EDGE heuristic // - bool const isJumpEdgeBackEdge = IsLoopBackEdge(jumpEdge); - bool const isNextEdgeBackEdge = IsLoopBackEdge(nextEdge); + bool const isJumpEdgeBackEdge = m_loops->IsLoopBackEdge(jumpEdge); + bool const isNextEdgeBackEdge = m_loops->IsLoopBackEdge(nextEdge); if (isJumpEdgeBackEdge != isNextEdgeBackEdge) { @@ -379,8 +274,8 @@ void ProfileSynthesis::AssignLikelihoodCond(BasicBlock* block) // Consider: adjust probability if loop has multiple exit edges, so that // overall exit probability is around 0.1. // - bool const isJumpEdgeExitEdge = IsLoopExitEdge(jumpEdge); - bool const isNextEdgeExitEdge = IsLoopExitEdge(nextEdge); + bool const isJumpEdgeExitEdge = m_loops->IsLoopExitEdge(jumpEdge); + bool const isNextEdgeExitEdge = m_loops->IsLoopExitEdge(nextEdge); if (isJumpEdgeExitEdge != isNextEdgeExitEdge) { @@ -783,263 +678,22 @@ void ProfileSynthesis::RandomizeLikelihoods() #endif // DEBUG } -//------------------------------------------------------------------------ -// fgBuildReversePostorder: compute depth first spanning tree and pre -// and post numbers for the blocks -// -void ProfileSynthesis::BuildReversePostorder() -{ - m_comp->EnsureBasicBlockEpoch(); - m_comp->fgDfsReversePostorder(); - - // Build map from bbNum to Block*. - // - m_bbNumToBlockMap = new (m_comp, CMK_Pgo) BasicBlock*[m_comp->fgBBNumMax + 1]{}; - for (BasicBlock* const block : m_comp->Blocks()) - { - m_bbNumToBlockMap[block->bbNum] = block; - } - -#ifdef DEBUG - if (m_comp->verbose) - { - printf("\nAfter doing a post order traversal of the BB graph, this is the ordering:\n"); - for (unsigned i = 1; i <= m_comp->fgBBNumMax; ++i) - { - BasicBlock* const block = m_comp->fgBBReversePostorder[i]; - printf("%02u -> " FMT_BB "[%u, %u]\n", i, block->bbNum, block->bbPreorderNum, block->bbPostorderNum); - } - printf("\n"); - } -#endif // DEBUG -} - -//------------------------------------------------------------------------ -// FindLoops: locate and classify loops -// -void ProfileSynthesis::FindLoops() -{ - CompAllocator allocator = m_comp->getAllocator(CMK_Pgo); - m_loops = new (allocator) LoopVector(allocator); - - // Identify loops - // - for (unsigned i = 1; i <= m_comp->fgBBNumMax; i++) - { - BasicBlock* const block = m_comp->fgBBReversePostorder[i]; - - // If a block is a DFS ancestor of one if its predecessors then the block is a loop header. - // - SimpleLoop* loop = nullptr; - - for (FlowEdge* predEdge : block->PredEdges()) - { - if (IsDfsAncestor(block, predEdge->getSourceBlock())) - { - if (loop == nullptr) - { - loop = new (allocator) SimpleLoop(block, allocator); - JITDUMP("\n"); - } - - JITDUMP(FMT_BB " -> " FMT_BB " is a backedge\n", predEdge->getSourceBlock()->bbNum, block->bbNum); - loop->m_backEdges.push_back(predEdge); - } - } - - if (loop == nullptr) - { - continue; - } - - JITDUMP(FMT_BB " is head of a DFS loop with %d back edges\n", block->bbNum, loop->m_backEdges.size()); - - // Now walk back in flow along the back edges from block to determine if - // this is a natural loop and to find all the blocks in the loop. - // - loop->m_blocks = BlockSetOps::MakeEmpty(m_comp); - BlockSetOps::AddElemD(m_comp, loop->m_blocks, block->bbNum); - - // todo: hoist this out and just do a reset here - jitstd::list worklist(allocator); - - // Seed the worklist - // - for (FlowEdge* backEdge : loop->m_backEdges) - { - BasicBlock* const backEdgeSource = backEdge->getSourceBlock(); - - if (BlockSetOps::IsMember(m_comp, loop->m_blocks, backEdgeSource->bbNum)) - { - continue; - } - - worklist.push_back(backEdgeSource); - } - - bool isNaturalLoop = true; - - // Work back through flow to loop head or to another pred - // that is clearly outside the loop. - // - // TODO: verify that we can indeed get back to the loop head - // and not get stopped somewhere (eg loop through EH). - // - while (!worklist.empty() & isNaturalLoop) - { - BasicBlock* const loopBlock = worklist.back(); - worklist.pop_back(); - BlockSetOps::AddElemD(m_comp, loop->m_blocks, loopBlock->bbNum); - - for (FlowEdge* const predEdge : loopBlock->PredEdges()) - { - BasicBlock* const predBlock = predEdge->getSourceBlock(); - - // `block` cannot dominate `predBlock` unless it is a DFS ancestor. - // - if (!IsDfsAncestor(block, predBlock)) - { - // Does this represent flow out of some handler? - // If so we will ignore it. - // - // Might want to vet that handler's try region entry - // is a dfs ancestor...? - // - if (!BasicBlock::sameHndRegion(block, predBlock)) - { - continue; - } - - JITDUMP("Loop is not natural; witness " FMT_BB " -> " FMT_BB "\n", predBlock->bbNum, - loopBlock->bbNum); - - isNaturalLoop = false; - m_improperLoopHeaders++; - break; - } - - if (BlockSetOps::IsMember(m_comp, loop->m_blocks, predBlock->bbNum)) - { - continue; - } - - worklist.push_back(predBlock); - } - } - - if (!isNaturalLoop) - { - continue; - } - - JITDUMP("Loop has %d blocks\n", BlockSetOps::Count(m_comp, loop->m_blocks)); - - // Find the exit edges - // - BlockSetOps::Iter iter(m_comp, loop->m_blocks); - unsigned bbNum = 0; - while (iter.NextElem(&bbNum)) - { - BasicBlock* const loopBlock = m_bbNumToBlockMap[bbNum]; - - for (BasicBlock* const succBlock : loopBlock->Succs(m_comp)) - { - if (!BlockSetOps::IsMember(m_comp, loop->m_blocks, succBlock->bbNum)) - { - FlowEdge* const exitEdge = m_comp->fgGetPredForBlock(succBlock, loopBlock); - JITDUMP(FMT_BB " -> " FMT_BB " is an exit edge\n", loopBlock->bbNum, succBlock->bbNum); - loop->m_exitEdges.push_back(exitEdge); - } - } - } - - // Find the entry edges - // - // Note if fgEntryBB is a loop head we won't have an entry edge. - // So it needs to be special cased later on when processing - // entry edges. - // - for (FlowEdge* const predEdge : loop->m_head->PredEdges()) - { - if (!IsDfsAncestor(block, predEdge->getSourceBlock())) - { - JITDUMP(FMT_BB " -> " FMT_BB " is an entry edge\n", predEdge->getSourceBlock()->bbNum, - loop->m_head->bbNum); - loop->m_entryEdges.push_back(predEdge); - } - } - - // Search for parent loop, validate proper nesting. - // - // Since loops record in outer->inner order the parent will be the - // most recently recorded loop that contains this loop's header. - // - for (auto it = m_loops->rbegin(), itEnd = m_loops->rend(); it != itEnd; ++it) - { - SimpleLoop* const otherLoop = *it; - - if (BlockSetOps::IsMember(m_comp, otherLoop->m_blocks, block->bbNum)) - { - // Ancestor loop; should contain all blocks of this loop - // - assert(BlockSetOps::IsSubset(m_comp, loop->m_blocks, otherLoop->m_blocks)); - - if (loop->m_parent == nullptr) - { - loop->m_parent = otherLoop; - loop->m_depth = otherLoop->m_depth + 1; - JITDUMP("at depth %u, nested within loop starting at " FMT_BB "\n", loop->m_depth, - otherLoop->m_head->bbNum); - - // Note we could break here but that would bypass the non-overlap check - // just below, so for now we check against all known loops. - } - } - else - { - // Non-ancestor loop; should have no blocks in common with current loop - // - assert(BlockSetOps::IsEmptyIntersection(m_comp, loop->m_blocks, otherLoop->m_blocks)); - } - } - - if (loop->m_parent == nullptr) - { - JITDUMP("top-level loop\n"); - loop->m_depth = 1; - } - - // Record this loop - // - m_loops->push_back(loop); - } - - if (m_loops->size() > 0) - { - JITDUMP("\nFound %d loops\n", m_loops->size()); - } - - if (m_improperLoopHeaders > 0) - { - JITDUMP("Rejected %d loop headers\n", m_improperLoopHeaders); - } -} - //------------------------------------------------------------------------ // FindCyclicProbabilities: for each loop, compute how much flow returns // to the loop head given one external count. // void ProfileSynthesis::ComputeCyclicProbabilities() { - // We found loop walking in reverse postorder, so the loop vector - // is naturally organized with outer loops before inner. - // - // Walk it backwards here so we compute inner loop cyclic probabilities - // first. We rely on that when processing outer loops. - // - for (auto it = m_loops->rbegin(), itEnd = m_loops->rend(); it != itEnd; ++it) + m_cyclicProbabilities = nullptr; + if (m_loops->NumLoops() == 0) + { + return; + } + + m_cyclicProbabilities = new (m_comp, CMK_Pgo) weight_t[m_loops->NumLoops()]; + // Walk loops in post order to visit inner loops before outer loops. + for (FlowGraphNaturalLoop* loop : m_loops->InPostOrder()) { - SimpleLoop* const loop = *it; ComputeCyclicProbabilities(loop); } } @@ -1048,52 +702,34 @@ void ProfileSynthesis::ComputeCyclicProbabilities() // FindCyclicProbabilities: for a given loop, compute how much flow returns // to the loop head given one external count. // -void ProfileSynthesis::ComputeCyclicProbabilities(SimpleLoop* loop) +void ProfileSynthesis::ComputeCyclicProbabilities(FlowGraphNaturalLoop* loop) { - // Initialize - // - BlockSetOps::Iter iter(m_comp, loop->m_blocks); - unsigned bbNum = 0; - while (iter.NextElem(&bbNum)) - { - BasicBlock* const loopBlock = m_bbNumToBlockMap[bbNum]; - loopBlock->bbWeight = 0.0; - } - // Process loop blocks in RPO. Just takes one pass through the loop blocks // as any cyclic contributions are handled by cyclic probabilities. // - for (unsigned int i = 1; i <= m_comp->fgBBNumMax; i++) - { - BasicBlock* const block = m_comp->fgBBReversePostorder[i]; - - if (!BlockSetOps::IsMember(m_comp, loop->m_blocks, block->bbNum)) - { - continue; - } - + loop->VisitLoopBlocksReversePostOrder([=](BasicBlock* block) { // Loop head gets external count of 1 // - if (block == loop->m_head) + if (block == loop->GetHeader()) { JITDUMP("ccp: " FMT_BB " :: 1.0\n", block->bbNum); block->bbWeight = 1.0; } else { - SimpleLoop* const nestedLoop = GetLoopFromHeader(block); + FlowGraphNaturalLoop* const nestedLoop = m_loops->GetLoopFromHeader(block); if (nestedLoop != nullptr) { // We should have figured this out already. // - assert(nestedLoop->m_cyclicProbability != 0); + assert(m_cyclicProbabilities[nestedLoop->GetIndex()] != 0); // Sum entry edges, multply by Cp // weight_t newWeight = 0.0; - for (FlowEdge* const edge : nestedLoop->m_entryEdges) + for (FlowEdge* const edge : nestedLoop->EntryEdges()) { if (BasicBlock::sameHndRegion(block, edge->getSourceBlock())) { @@ -1101,7 +737,7 @@ void ProfileSynthesis::ComputeCyclicProbabilities(SimpleLoop* loop) } } - newWeight *= nestedLoop->m_cyclicProbability; + newWeight *= m_cyclicProbabilities[nestedLoop->GetIndex()]; block->bbWeight = newWeight; JITDUMP("ccp (nested header): " FMT_BB " :: " FMT_WT "\n", block->bbNum, newWeight); @@ -1123,17 +759,19 @@ void ProfileSynthesis::ComputeCyclicProbabilities(SimpleLoop* loop) JITDUMP("ccp: " FMT_BB " :: " FMT_WT "\n", block->bbNum, newWeight); } } - } + + return BasicBlockVisit::Continue; + }); // Now look at cyclic flow back to the head block. // weight_t cyclicWeight = 0; bool capped = false; - for (FlowEdge* const edge : loop->m_backEdges) + for (FlowEdge* const edge : loop->BackEdges()) { JITDUMP("ccp backedge " FMT_BB " (" FMT_WT ") -> " FMT_BB " likelihood " FMT_WT "\n", - edge->getSourceBlock()->bbNum, edge->getSourceBlock()->bbWeight, loop->m_head->bbNum, + edge->getSourceBlock()->bbNum, edge->getSourceBlock()->bbWeight, loop->GetHeader()->bbNum, edge->getLikelihood()); cyclicWeight += edge->getLikelyWeight(); @@ -1157,22 +795,22 @@ void ProfileSynthesis::ComputeCyclicProbabilities(SimpleLoop* loop) weight_t const cyclicProbability = 1.0 / (1.0 - cyclicWeight); JITDUMP("For loop at " FMT_BB " cyclic weight is " FMT_WT " cyclic probability is " FMT_WT "%s\n", - loop->m_head->bbNum, cyclicWeight, cyclicProbability, capped ? " [capped]" : ""); + loop->GetHeader()->bbNum, cyclicWeight, cyclicProbability, capped ? " [capped]" : ""); - loop->m_cyclicProbability = cyclicProbability; + m_cyclicProbabilities[loop->GetIndex()] = cyclicProbability; // Try and adjust loop exit likelihood to reflect capping. // If there are multiple exits we just adjust the first one we can. This is somewhat arbitrary. // If there are no exits, there's nothing we can do. // - if (capped && (loop->m_exitEdges.size() > 0)) + if (capped && (loop->ExitEdges().size() > 0)) { // Figure out how much flow exits the loop with the capped probablility // and current block frequencies and exit likelihoods. // weight_t cappedExitWeight = 0.0; - for (FlowEdge* const exitEdge : loop->m_exitEdges) + for (FlowEdge* const exitEdge : loop->ExitEdges()) { BasicBlock* const exitBlock = exitEdge->getSourceBlock(); weight_t const exitBlockFrequency = exitBlock->bbWeight; @@ -1198,7 +836,7 @@ void ProfileSynthesis::ComputeCyclicProbabilities(SimpleLoop* loop) bool adjustedExit = false; - for (FlowEdge* const exitEdge : loop->m_exitEdges) + for (FlowEdge* const exitEdge : loop->ExitEdges()) { // Does this block have enough weight that it can supply all the missing weight? // @@ -1354,70 +992,13 @@ void ProfileSynthesis::AssignInputWeights(ProfileSynthesisOption option) // ComputeBlockWeights: compute weights for all blocks // based on input weights, edge likelihoods, and cyclic probabilities // -// Notes: -// We want to first walk the main method body, then any finally -// handers from outermost to innermost. -// -// The depth first walk we did to kick off synthesis has split the -// graph into a forest of depth first spanning trees. We leverage -// this and the EH table structure to accomplish the visiting order above. -// -// We might be able to avoid all this if during the DFS walk we -// walked from try entries to filter or handlers, so that a -// single DFST encompassed all the reachable blocks in the right order. -// void ProfileSynthesis::ComputeBlockWeights() { JITDUMP("Computing block weights\n"); - // Main method body - // - ComputeBlockWeightsSubgraph(m_comp->fgFirstBB); - - // All finally and fault handlers from outer->inner - // (walk EH table backwards) - // - if (!m_comp->compIsForInlining()) - { - for (unsigned i = 0; i < m_comp->compHndBBtabCount; i++) - { - unsigned const XTnum = m_comp->compHndBBtabCount - i - 1; - EHblkDsc* const HBtab = &m_comp->compHndBBtab[XTnum]; - if (HBtab->HasFilter()) - { - // Filter subtree includes handler - // - ComputeBlockWeightsSubgraph(HBtab->ebdFilter); - } - else - { - ComputeBlockWeightsSubgraph(HBtab->ebdHndBeg); - } - } - } - - // Anything else is unreachable and will have zero count -} - -//------------------------------------------------------------------------ -// ComputeBlockWeights: compute weights for all blocks in a particular DFST -// -// Arguments: -// entry - root node of a DFST -// -void ProfileSynthesis::ComputeBlockWeightsSubgraph(BasicBlock* entry) -{ - // Determine the range of indices for this DFST in the overall RPO. - // - const unsigned firstIndex = m_comp->fgBBNumMax - entry->bbPostorderNum + 1; - assert(m_comp->fgBBReversePostorder[firstIndex] == entry); - - assert(entry->bbPostorderNum >= entry->bbPreorderNum); - const unsigned lastIndex = firstIndex + entry->bbPostorderNum - entry->bbPreorderNum; - - for (unsigned int i = firstIndex; i <= lastIndex; i++) + for (unsigned i = m_dfs->GetPostOrderCount(); i != 0; i--) { - BasicBlock* const block = m_comp->fgBBReversePostorder[i]; + BasicBlock* block = m_dfs->GetPostOrder()[i - 1]; ComputeBlockWeight(block); } } @@ -1430,15 +1011,15 @@ void ProfileSynthesis::ComputeBlockWeightsSubgraph(BasicBlock* entry) // void ProfileSynthesis::ComputeBlockWeight(BasicBlock* block) { - SimpleLoop* const loop = GetLoopFromHeader(block); - weight_t newWeight = block->bbWeight; - const char* kind = ""; + FlowGraphNaturalLoop* const loop = m_loops->GetLoopFromHeader(block); + weight_t newWeight = block->bbWeight; + const char* kind = ""; if (loop != nullptr) { // Sum all entry edges that aren't EH flow // - for (FlowEdge* const edge : loop->m_entryEdges) + for (FlowEdge* const edge : loop->EntryEdges()) { if (BasicBlock::sameHndRegion(block, edge->getSourceBlock())) { @@ -1448,7 +1029,7 @@ void ProfileSynthesis::ComputeBlockWeight(BasicBlock* block) // Scale by cyclic probability // - newWeight *= loop->m_cyclicProbability; + newWeight *= m_cyclicProbabilities[loop->GetIndex()]; kind = " (loop head)"; } else diff --git a/src/coreclr/jit/fgprofilesynthesis.h b/src/coreclr/jit/fgprofilesynthesis.h index a5d5f7d827f43d..ab82fffe5e37cd 100644 --- a/src/coreclr/jit/fgprofilesynthesis.h +++ b/src/coreclr/jit/fgprofilesynthesis.h @@ -9,34 +9,7 @@ // Flowgraph Profile Synthesis -typedef jitstd::vector EdgeVector; -typedef jitstd::vector WeightVector; - -struct SimpleLoop -{ - SimpleLoop(BasicBlock* head, CompAllocator allocator) - : m_head(head) - , m_parent(nullptr) - , m_blocks(BlockSetOps::UninitVal()) - , m_entryEdges(allocator) - , m_backEdges(allocator) - , m_exitEdges(allocator) - , m_cyclicProbability(0) - , m_depth(0) - { - } - - BasicBlock* m_head; - SimpleLoop* m_parent; - BlockSet m_blocks; - EdgeVector m_entryEdges; - EdgeVector m_backEdges; - EdgeVector m_exitEdges; - weight_t m_cyclicProbability; - unsigned m_depth; -}; - -typedef jitstd::vector LoopVector; +typedef jitstd::vector WeightVector; //------------------------------------------------------------------------ // ProfileSynthesisOption: specify behavior of profile synthesis @@ -68,11 +41,7 @@ class ProfileSynthesis private: ProfileSynthesis(Compiler* compiler) - : m_comp(compiler) - , m_loops(nullptr) - , m_bbNumToBlockMap(nullptr) - , m_improperLoopHeaders(0) - , m_cappedCyclicProbabilities(0) + : m_comp(compiler), m_loops(nullptr), m_improperLoopHeaders(0), m_cappedCyclicProbabilities(0) { } @@ -86,14 +55,6 @@ class ProfileSynthesis void Run(ProfileSynthesisOption option); - void BuildReversePostorder(); - static bool IsDfsAncestor(BasicBlock* x, BasicBlock* y); - bool IsLoopBackEdge(FlowEdge* edge); - bool IsLoopExitEdge(FlowEdge* edge); - - void FindLoops(); - SimpleLoop* GetLoopFromHeader(BasicBlock* block); - weight_t SumOutgoingLikelihoods(BasicBlock* block, WeightVector* likelihoods = nullptr); void AssignLikelihoods(); @@ -108,20 +69,20 @@ class ProfileSynthesis void RandomizeLikelihoods(); void ComputeCyclicProbabilities(); - void ComputeCyclicProbabilities(SimpleLoop* loop); + void ComputeCyclicProbabilities(FlowGraphNaturalLoop* loop); void AssignInputWeights(ProfileSynthesisOption option); void ComputeBlockWeights(); - void ComputeBlockWeightsSubgraph(BasicBlock* block); void ComputeBlockWeight(BasicBlock* block); private: - Compiler* const m_comp; - LoopVector* m_loops; - BasicBlock** m_bbNumToBlockMap; - unsigned m_improperLoopHeaders; - unsigned m_cappedCyclicProbabilities; + Compiler* const m_comp; + FlowGraphDfsTree* m_dfs; + FlowGraphNaturalLoops* m_loops; + weight_t* m_cyclicProbabilities; + unsigned m_improperLoopHeaders; + unsigned m_cappedCyclicProbabilities; }; #endif // !_FGPROFILESYNTHESIS_H_ diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index a237cfea0ff44c..f81c55b004a043 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -4083,3 +4083,560 @@ void Compiler::fgLclFldAssign(unsigned lclNum) lvaSetVarDoNotEnregister(lclNum DEBUGARG(DoNotEnregisterReason::LocalField)); } } + +//------------------------------------------------------------------------ +// FlowGraphDfsTree::Contains: Check if a block is contained in the DFS tree; +// i.e., if it is reachable. +// +// Arguments: +// block - The block +// +// Return Value: +// True if the block is reachable from the root. +// +bool FlowGraphDfsTree::Contains(BasicBlock* block) const +{ + return (block->bbPostorderNum < m_postOrderCount) && (m_postOrder[block->bbPostorderNum] == block); +} + +//------------------------------------------------------------------------ +// FlowGraphDfsTree::IsAncestor: Check if block `ancestor` is an ancestor of +// block `descendant` +// +// Arguments: +// ancestor -- block that is possible ancestor +// descendant -- block that is possible descendant +// +// Returns: +// True if `ancestor` is ancestor of `descendant` in the depth first spanning +// tree. +// +// Notes: +// If return value is false, then `ancestor` does not dominate `descendant`. +// +bool FlowGraphDfsTree::IsAncestor(BasicBlock* ancestor, BasicBlock* descendant) const +{ + assert(Contains(ancestor) && Contains(descendant)); + return (ancestor->bbPreorderNum <= descendant->bbPreorderNum) && + (descendant->bbPostorderNum <= ancestor->bbPostorderNum); +} + +//------------------------------------------------------------------------ +// fgComputeDfs: Compute a depth-first search tree for the flow graph. +// +// Returns: +// The tree. +// +// Notes: +// Preorder and postorder numbers are assigned into the BasicBlock structure. +// The tree returned contains a postorder of the basic blocks. +// +FlowGraphDfsTree* Compiler::fgComputeDfs() +{ + BasicBlock** postOrder = new (this, CMK_DepthFirstSearch) BasicBlock*[fgBBcount]; + BitVecTraits traits(fgBBNumMax + 1, this); + + BitVec visited(BitVecOps::MakeEmpty(&traits)); + + unsigned preOrderIndex = 0; + unsigned postOrderIndex = 0; + + ArrayStack blocks(getAllocator(CMK_DepthFirstSearch)); + + auto dfsFrom = [&, postOrder](BasicBlock* firstBB) { + + BitVecOps::AddElemD(&traits, visited, firstBB->bbNum); + blocks.Emplace(this, firstBB); + firstBB->bbPreorderNum = preOrderIndex++; + + while (!blocks.Empty()) + { + BasicBlock* block = blocks.TopRef().Block(); + BasicBlock* succ = blocks.TopRef().NextSuccessor(); + + if (succ != nullptr) + { + if (BitVecOps::TryAddElemD(&traits, visited, succ->bbNum)) + { + blocks.Emplace(this, succ); + succ->bbPreorderNum = preOrderIndex++; + } + } + else + { + blocks.Pop(); + postOrder[postOrderIndex] = block; + block->bbPostorderNum = postOrderIndex++; + } + } + + }; + + dfsFrom(fgFirstBB); + + if ((fgEntryBB != nullptr) && !BitVecOps::IsMember(&traits, visited, fgEntryBB->bbNum)) + { + // OSR methods will early on create flow that looks like it goes to the + // patchpoint, but during morph we may transform to something that + // requires the original entry (fgEntryBB). + assert(opts.IsOSR()); + assert((fgEntryBB->bbRefs == 1) && (fgEntryBB->bbPreds == nullptr)); + dfsFrom(fgEntryBB); + } + + if ((genReturnBB != nullptr) && !BitVecOps::IsMember(&traits, visited, genReturnBB->bbNum) && !fgGlobalMorphDone) + { + // We introduce the merged return BB before morph and will redirect + // other returns to it as part of morph; keep it reachable. + dfsFrom(genReturnBB); + } + + return new (this, CMK_DepthFirstSearch) FlowGraphDfsTree(this, postOrder, postOrderIndex); +} + +//------------------------------------------------------------------------ +// FlowGraphNaturalLoop::FlowGraphNaturalLoop: Initialize a new loop instance. +// +// Returns: +// tree - The DFS tree +// header - The loop header +// +FlowGraphNaturalLoop::FlowGraphNaturalLoop(const FlowGraphDfsTree* tree, BasicBlock* header) + : m_tree(tree) + , m_header(header) + , m_blocks(BitVecOps::UninitVal()) + , m_backEdges(tree->GetCompiler()->getAllocator(CMK_Loops)) + , m_entryEdges(tree->GetCompiler()->getAllocator(CMK_Loops)) + , m_exitEdges(tree->GetCompiler()->getAllocator(CMK_Loops)) +{ +} + +//------------------------------------------------------------------------ +// LoopBlockBitVecIndex: Convert a basic block to an index into the bit vector +// used to store the set of loop blocks. +// +// Parameters: +// block - The block +// +// Returns: +// Index into the bit vector +// +// Remarks: +// The bit vector is stored with the base index of the loop header since we +// know the header is an ancestor of all loop blocks. Thus we do not need to +// waste space on previous blocks. +// +// This function should only be used when it is known that the block has an +// index in the loop bit vector. +// +unsigned FlowGraphNaturalLoop::LoopBlockBitVecIndex(BasicBlock* block) +{ + assert(m_tree->Contains(block)); + + unsigned index = m_header->bbPostorderNum - block->bbPostorderNum; + assert(index < m_blocksSize); + return index; +} + +//------------------------------------------------------------------------ +// TryGetLoopBlockBitVecIndex: Convert a basic block to an index into the bit +// vector used to store the set of loop blocks. +// +// Parameters: +// block - The block +// pIndex - [out] Index into the bit vector, if this function returns true. +// +// Returns: +// True if the block has an index in the loop bit vector. +// +// Remarks: +// See GetLoopBlockBitVecIndex for more information. This function can be +// used when it is not known whether the block has an index in the loop bit +// vector. +// +bool FlowGraphNaturalLoop::TryGetLoopBlockBitVecIndex(BasicBlock* block, unsigned* pIndex) +{ + if (block->bbPostorderNum > m_header->bbPostorderNum) + { + return false; + } + + unsigned index = m_header->bbPostorderNum - block->bbPostorderNum; + if (index >= m_blocksSize) + { + return false; + } + + *pIndex = index; + return true; +} + +//------------------------------------------------------------------------ +// LoopBlockTraits: Get traits for a bit vector for blocks in this loop. +// +// Returns: +// Bit vector traits. +// +BitVecTraits FlowGraphNaturalLoop::LoopBlockTraits() +{ + return BitVecTraits(m_blocksSize, m_tree->GetCompiler()); +} + +//------------------------------------------------------------------------ +// ContainsBlock: Returns true if this loop contains the specified block. +// +// Parameters: +// block - A block +// +// Returns: +// True if the block is contained in the loop. +// +// Remarks: +// Containment here means that the block is in the SCC of the loop; i.e. it +// is in a cycle with the header block. Note that EH successors are taken +// into acount; for example, a BBJ_RETURN may still be a loop block provided +// that its handler can reach the loop header. +// +bool FlowGraphNaturalLoop::ContainsBlock(BasicBlock* block) +{ + unsigned index; + if (!TryGetLoopBlockBitVecIndex(block, &index)) + { + return false; + } + + BitVecTraits traits = LoopBlockTraits(); + return BitVecOps::IsMember(&traits, m_blocks, index); +} + +//------------------------------------------------------------------------ +// FlowGraphNaturalLoops::FlowGraphNaturalLoops: Initialize a new instance to +// track a set of loops over the flow graph. +// +// Parameters: +// dfs - A DFS tree. +// +FlowGraphNaturalLoops::FlowGraphNaturalLoops(const FlowGraphDfsTree* dfs) + : m_dfs(dfs), m_loops(m_dfs->GetCompiler()->getAllocator(CMK_Loops)) +{ +} + +// GetLoopFromHeader: See if a block is a loop header, and if so return the +// associated loop. +// +// Parameters: +// block - block in question +// +// Returns: +// Loop headed by block, or nullptr +// +FlowGraphNaturalLoop* FlowGraphNaturalLoops::GetLoopFromHeader(BasicBlock* block) +{ + // TODO-TP: This can use binary search based on post order number. + for (FlowGraphNaturalLoop* loop : m_loops) + { + if (loop->m_header == block) + { + return loop; + } + } + + return nullptr; +} + +//------------------------------------------------------------------------ +// IsLoopBackEdge: See if an edge is a loop back edge +// +// Parameters: +// edge - edge in question +// +// Returns: +// True if edge is a backedge in some recognized loop. +// +bool FlowGraphNaturalLoops::IsLoopBackEdge(FlowEdge* edge) +{ + for (FlowGraphNaturalLoop* loop : m_loops) + { + for (FlowEdge* loopBackEdge : loop->m_backEdges) + { + if (loopBackEdge == edge) + { + return true; + } + } + } + + return false; +} + +//------------------------------------------------------------------------ +// IsLoopExitEdge: see if a flow edge is a loop exit edge +// +// Parameters: +// edge - edge in question +// +// Returns: +// True if edge is an exit edge in some recognized loop. Note that a single +// edge may exit multiple loops. +// +bool FlowGraphNaturalLoops::IsLoopExitEdge(FlowEdge* edge) +{ + for (FlowGraphNaturalLoop* loop : m_loops) + { + for (FlowEdge* loopExitEdge : loop->m_exitEdges) + { + if (loopExitEdge == edge) + { + return true; + } + } + } + + return false; +} + +//------------------------------------------------------------------------ +// FlowGraphNaturalLoops::Find: Find natural loops in the specified DFS tree +// constructed for the flow graph. +// +// Parameters: +// dfs - The DFS tree +// +// Returns: +// Identified natural loops. +// +FlowGraphNaturalLoops* FlowGraphNaturalLoops::Find(const FlowGraphDfsTree* dfs) +{ + Compiler* comp = dfs->GetCompiler(); + comp->m_blockToEHPreds = nullptr; + +#ifdef DEBUG + JITDUMP("Identifying loops in DFS tree with following reverse post order:\n"); + for (unsigned i = dfs->GetPostOrderCount(); i != 0; i--) + { + unsigned rpoNum = dfs->GetPostOrderCount() - i; + BasicBlock* const block = dfs->GetPostOrder()[i - 1]; + JITDUMP("%02u -> " FMT_BB "[%u, %u]\n", rpoNum + 1, block->bbNum, block->bbPreorderNum + 1, + block->bbPostorderNum + 1); + } +#endif + + FlowGraphNaturalLoops* loops = new (comp, CMK_Loops) FlowGraphNaturalLoops(dfs); + + jitstd::list worklist(comp->getAllocator(CMK_Loops)); + + for (unsigned i = dfs->GetPostOrderCount(); i != 0; i--) + { + BasicBlock* const header = dfs->GetPostOrder()[i - 1]; + + // If a block is a DFS ancestor of one if its predecessors then the block is a loop header. + // + FlowGraphNaturalLoop* loop = nullptr; + + for (FlowEdge* predEdge : header->PredEdges()) + { + BasicBlock* predBlock = predEdge->getSourceBlock(); + if (dfs->Contains(predBlock) && dfs->IsAncestor(header, predBlock)) + { + if (loop == nullptr) + { + loop = new (comp, CMK_Loops) FlowGraphNaturalLoop(dfs, header); + JITDUMP("\n"); + } + + JITDUMP(FMT_BB " -> " FMT_BB " is a backedge\n", predBlock->bbNum, header->bbNum); + loop->m_backEdges.push_back(predEdge); + } + } + + if (loop == nullptr) + { + continue; + } + + JITDUMP(FMT_BB " is the header of a DFS loop with %zu back edges\n", header->bbNum, loop->m_backEdges.size()); + + // Now walk back in flow along the back edges from head to determine if + // this is a natural loop and to find all the blocks in the loop. + // + + worklist.clear(); + loop->m_blocksSize = loop->m_header->bbPostorderNum + 1; + + BitVecTraits loopTraits = loop->LoopBlockTraits(); + loop->m_blocks = BitVecOps::MakeEmpty(&loopTraits); + + if (!FindNaturalLoopBlocks(loop, worklist)) + { + loops->m_improperLoopHeaders++; + continue; + } + + JITDUMP("Loop has %u blocks\n", BitVecOps::Count(&loopTraits, loop->m_blocks)); + + // Find the exit edges + // + loop->VisitLoopBlocks([=](BasicBlock* loopBlock) { + loopBlock->VisitRegularSuccs(comp, [=](BasicBlock* succBlock) { + if (!loop->ContainsBlock(succBlock)) + { + FlowEdge* const exitEdge = comp->fgGetPredForBlock(succBlock, loopBlock); + JITDUMP(FMT_BB " -> " FMT_BB " is an exit edge\n", loopBlock->bbNum, succBlock->bbNum); + loop->m_exitEdges.push_back(exitEdge); + } + + return BasicBlockVisit::Continue; + }); + + return BasicBlockVisit::Continue; + }); + + // Find the entry edges + // + // Note if fgEntryBB is a loop head we won't have an entry edge. + // So it needs to be special cased later on when processing + // entry edges. + // + for (FlowEdge* const predEdge : loop->m_header->PredEdges()) + { + BasicBlock* predBlock = predEdge->getSourceBlock(); + if (dfs->Contains(predBlock) && !dfs->IsAncestor(header, predEdge->getSourceBlock())) + { + JITDUMP(FMT_BB " -> " FMT_BB " is an entry edge\n", predEdge->getSourceBlock()->bbNum, + loop->m_header->bbNum); + loop->m_entryEdges.push_back(predEdge); + } + } + + // Search for parent loop. + // + // Since loops record in outer->inner order the parent will be the + // most recently recorded loop that contains this loop's header. + // + for (FlowGraphNaturalLoop* const otherLoop : loops->InPostOrder()) + { + if (otherLoop->ContainsBlock(header)) + { + loop->m_parent = otherLoop; + JITDUMP("Nested within loop starting at " FMT_BB "\n", otherLoop->GetHeader()->bbNum); + break; + } + } + +#ifdef DEBUG + // In debug, validate nestedness versus other loops. + // + for (FlowGraphNaturalLoop* const otherLoop : loops->InPostOrder()) + { + if (otherLoop->ContainsBlock(header)) + { + // Ancestor loop; should contain all blocks of this loop + // + loop->VisitLoopBlocks([otherLoop](BasicBlock* loopBlock) { + assert(otherLoop->ContainsBlock(loopBlock)); + return BasicBlockVisit::Continue; + }); + } + else + { + // Non-ancestor loop; should have no blocks in common with current loop + // + loop->VisitLoopBlocks([otherLoop](BasicBlock* loopBlock) { + assert(!otherLoop->ContainsBlock(loopBlock)); + return BasicBlockVisit::Continue; + }); + } + } +#endif + + // Record this loop + // + loop->m_index = (unsigned)loops->m_loops.size(); + loops->m_loops.push_back(loop); + + JITDUMP("Added loop " FMT_LP " with header " FMT_BB "\n", loop->GetIndex(), loop->GetHeader()->bbNum); + } + + if (loops->m_loops.size() > 0) + { + JITDUMP("\nFound %zu loops\n", loops->m_loops.size()); + } + + if (loops->m_improperLoopHeaders > 0) + { + JITDUMP("Rejected %u loop headers\n", loops->m_improperLoopHeaders); + } + + return loops; +} + +//------------------------------------------------------------------------ +// FlowGraphNaturalLoops::FindNaturalLoopBlocks: Find the loop blocks for a +// specified loop. +// +// Parameters: +// loop - The natural loop we are constructing +// worklist - Scratch worklist to use for the search +// +// Returns: +// True if the loop is natural; marks the loop blocks into 'loop' as part of +// the search. +// +bool FlowGraphNaturalLoops::FindNaturalLoopBlocks(FlowGraphNaturalLoop* loop, jitstd::list& worklist) +{ + const FlowGraphDfsTree* tree = loop->m_tree; + Compiler* comp = tree->GetCompiler(); + BitVecTraits loopTraits = loop->LoopBlockTraits(); + BitVecOps::AddElemD(&loopTraits, loop->m_blocks, 0); + + // Seed the worklist + // + worklist.clear(); + for (FlowEdge* backEdge : loop->m_backEdges) + { + BasicBlock* const backEdgeSource = backEdge->getSourceBlock(); + if (backEdgeSource == loop->GetHeader()) + { + continue; + } + + assert(!BitVecOps::IsMember(&loopTraits, loop->m_blocks, loop->LoopBlockBitVecIndex(backEdgeSource))); + worklist.push_back(backEdgeSource); + BitVecOps::AddElemD(&loopTraits, loop->m_blocks, loop->LoopBlockBitVecIndex(backEdgeSource)); + } + + // Work back through flow to loop head or to another pred + // that is clearly outside the loop. + // + while (!worklist.empty()) + { + BasicBlock* const loopBlock = worklist.back(); + worklist.pop_back(); + + for (FlowEdge* predEdge = comp->BlockPredsWithEH(loopBlock); predEdge != nullptr; + predEdge = predEdge->getNextPredEdge()) + { + BasicBlock* const predBlock = predEdge->getSourceBlock(); + + if (!tree->Contains(predBlock)) + { + continue; + } + + // Head cannot dominate `predBlock` unless it is a DFS ancestor. + // + if (!tree->IsAncestor(loop->GetHeader(), predBlock)) + { + JITDUMP("Loop is not natural; witness " FMT_BB " -> " FMT_BB "\n", predBlock->bbNum, loopBlock->bbNum); + return false; + } + + if (BitVecOps::TryAddElemD(&loopTraits, loop->m_blocks, loop->LoopBlockBitVecIndex(predBlock))) + { + worklist.push_back(predBlock); + } + } + } + + return true; +} diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 73a8474324d395..d8f2e472b7efcc 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -14124,8 +14124,9 @@ PhaseStatus Compiler::fgMorphBlocks() // We are done with the global morphing phase // - fgGlobalMorph = false; - compCurBB = nullptr; + fgGlobalMorph = false; + fgGlobalMorphDone = true; + compCurBB = nullptr; #ifdef DEBUG if (optLocalAssertionProp) diff --git a/src/coreclr/jit/ssabuilder.cpp b/src/coreclr/jit/ssabuilder.cpp index 9bedef7bc937e5..cab35f7ae654a7 100644 --- a/src/coreclr/jit/ssabuilder.cpp +++ b/src/coreclr/jit/ssabuilder.cpp @@ -61,6 +61,9 @@ PhaseStatus Compiler::fgSsaBuild() fgResetForSsa(); } + // Reset BlockPredsWithEH cache. + m_blockToEHPreds = nullptr; + SsaBuilder builder(this); builder.Build(); fgSsaPassesCompleted++; @@ -69,8 +72,6 @@ PhaseStatus Compiler::fgSsaBuild() JitTestCheckSSA(); #endif // DEBUG - fgSSAPostOrder = builder.GetPostOrder(&fgSSAPostOrderCount); - return PhaseStatus::MODIFIED_EVERYTHING; } @@ -136,88 +137,6 @@ SsaBuilder::SsaBuilder(Compiler* pCompiler) { } -//------------------------------------------------------------------------ -// TopologicalSort: Topologically sort the graph and return the number of nodes visited. -// -// Arguments: -// postOrder - The array in which the arranged basic blocks have to be returned. -// count - The size of the postOrder array. -// -// Return Value: -// The number of nodes visited while performing DFS on the graph. -// -unsigned SsaBuilder::TopologicalSort(BasicBlock** postOrder, int count) -{ - Compiler* comp = m_pCompiler; - - // TopologicalSort is called first so m_visited should already be empty - assert(BitVecOps::IsEmpty(&m_visitedTraits, m_visited)); - - // Display basic blocks. - DBEXEC(VERBOSE, comp->fgDispBasicBlocks()); - DBEXEC(VERBOSE, comp->fgDispHandlerTab()); - - auto DumpBlockAndSuccessors = [](Compiler* comp, BasicBlock* block) { -#ifdef DEBUG - if (comp->verboseSsa) - { - printf("[SsaBuilder::TopologicalSort] Pushing " FMT_BB ": [", block->bbNum); - AllSuccessorEnumerator successors(comp, block); - unsigned index = 0; - while (true) - { - BasicBlock* succ = successors.NextSuccessor(comp); - - if (succ == nullptr) - { - break; - } - - printf("%s" FMT_BB, (index++ ? ", " : ""), succ->bbNum); - } - printf("]\n"); - } -#endif - }; - - // Compute order. - unsigned postIndex = 0; - BasicBlock* block = comp->fgFirstBB; - BitVecOps::AddElemD(&m_visitedTraits, m_visited, block->bbNum); - - ArrayStack blocks(m_allocator); - blocks.Emplace(comp, block); - DumpBlockAndSuccessors(comp, block); - - while (!blocks.Empty()) - { - BasicBlock* block = blocks.TopRef().Block(); - BasicBlock* succ = blocks.TopRef().NextSuccessor(comp); - - if (succ != nullptr) - { - // if the block on TOS still has unreached successors, visit them - if (BitVecOps::TryAddElemD(&m_visitedTraits, m_visited, succ->bbNum)) - { - blocks.Emplace(comp, succ); - DumpBlockAndSuccessors(comp, succ); - } - } - else - { - // all successors have been visited - blocks.Pop(); - - DBG_SSA_JITDUMP("[SsaBuilder::TopologicalSort] postOrder[%u] = " FMT_BB "\n", postIndex, block->bbNum); - postOrder[postIndex] = block; - block->bbPostorderNum = postIndex; - postIndex++; - } - } - - return postIndex; -} - /** * Computes the immediate dominator IDom for each block iteratively. * @@ -226,10 +145,14 @@ unsigned SsaBuilder::TopologicalSort(BasicBlock** postOrder, int count) * * @see "A simple, fast dominance algorithm." paper. */ -void SsaBuilder::ComputeImmediateDom(BasicBlock** postOrder, int count) +void SsaBuilder::ComputeImmediateDom() { JITDUMP("[SsaBuilder::ComputeImmediateDom]\n"); + FlowGraphDfsTree* dfs = m_pCompiler->m_dfs; + BasicBlock** postOrder = dfs->GetPostOrder(); + unsigned count = dfs->GetPostOrderCount(); + // Add entry point to visited as its IDom is NULL. assert(postOrder[count - 1] == m_pCompiler->fgFirstBB); @@ -604,14 +527,15 @@ void SsaBuilder::AddPhiArg( * * To do so, the function computes liveness, dominance frontier and inserts a phi node, * if we have var v in def(b) and live-in(l) and l is in DF(b). - * - * @param postOrder The array of basic blocks arranged in postOrder. - * @param count The size of valid elements in the postOrder array. */ -void SsaBuilder::InsertPhiFunctions(BasicBlock** postOrder, int count) +void SsaBuilder::InsertPhiFunctions() { JITDUMP("*************** In SsaBuilder::InsertPhiFunctions()\n"); + FlowGraphDfsTree* dfs = m_pCompiler->m_dfs; + BasicBlock** postOrder = dfs->GetPostOrder(); + unsigned count = dfs->GetPostOrderCount(); + // Compute dominance frontier. BlkToBlkVectorMap mapDF(m_allocator); ComputeDominanceFrontiers(postOrder, count, &mapDF); @@ -622,7 +546,7 @@ void SsaBuilder::InsertPhiFunctions(BasicBlock** postOrder, int count) JITDUMP("Inserting phi functions:\n"); - for (int i = 0; i < count; ++i) + for (unsigned i = 0; i < count; ++i) { BasicBlock* block = postOrder[i]; DBG_SSA_JITDUMP("Considering dominance frontier of block " FMT_BB ":\n", block->bbNum); @@ -1494,8 +1418,6 @@ void SsaBuilder::Build() // Allocate the postOrder array for the graph. - m_postOrder = new (m_allocator) BasicBlock*[blockCount]; - m_visitedTraits = BitVecTraits(blockCount, m_pCompiler); m_visited = BitVecOps::MakeEmpty(&m_visitedTraits); @@ -1511,13 +1433,10 @@ void SsaBuilder::Build() block->bbPostorderNum = 0; } - // Topologically sort the graph. - m_postOrderCount = TopologicalSort(m_postOrder, blockCount); - JITDUMP("[SsaBuilder] Topologically sorted the graph.\n"); - EndPhase(PHASE_BUILD_SSA_TOPOSORT); + m_pCompiler->m_dfs = m_pCompiler->fgComputeDfs(); // Compute IDom(b). - ComputeImmediateDom(m_postOrder, m_postOrderCount); + ComputeImmediateDom(); m_pCompiler->fgSsaDomTree = m_pCompiler->fgBuildDomTree(); EndPhase(PHASE_BUILD_SSA_DOMS); @@ -1536,7 +1455,7 @@ void SsaBuilder::Build() } // Insert phi functions. - InsertPhiFunctions(m_postOrder, m_postOrderCount); + InsertPhiFunctions(); // Rename local variables and collect UD information for each ssa var. RenameVariables(); diff --git a/src/coreclr/jit/ssabuilder.h b/src/coreclr/jit/ssabuilder.h index 92faf0f21d05ef..9dd405d774cfc7 100644 --- a/src/coreclr/jit/ssabuilder.h +++ b/src/coreclr/jit/ssabuilder.h @@ -33,29 +33,15 @@ class SsaBuilder // variable are stored in the "per SSA data" on the local descriptor. void Build(); - BasicBlock** GetPostOrder(unsigned* count) - { - *count = m_postOrderCount; - return m_postOrder; - } - private: // Ensures that the basic block graph has a root for the dominator graph, by ensuring // that there is a first block that is not in a try region (adding an empty block for that purpose // if necessary). Eventually should move to Compiler. void SetupBBRoot(); - // Requires "postOrder" to be an array of size "count". Requires "count" to at least - // be the size of the flow graph. Sorts the current compiler's flow-graph and places - // the blocks in post order (i.e., a node's children first) in the array. Returns the - // number of nodes visited while sorting the graph. In other words, valid entries in - // the output array. - unsigned TopologicalSort(BasicBlock** postOrder, int count); - - // Requires "postOrder" to hold the blocks of the flowgraph in topologically sorted - // order. Requires count to be the valid entries in the "postOrder" array. Computes - // each block's immediate dominator and records it in the BasicBlock in bbIDom. - void ComputeImmediateDom(BasicBlock** postOrder, int count); + // Computes each block's immediate dominator and records it in the + // BasicBlock in bbIDom. + void ComputeImmediateDom(); // Compute flow graph dominance frontiers. void ComputeDominanceFrontiers(BasicBlock** postOrder, int count, BlkToBlkVectorMap* mapDF); @@ -73,7 +59,7 @@ class SsaBuilder // Requires "postOrder" to hold the blocks of the flowgraph in topologically sorted order. Requires // count to be the valid entries in the "postOrder" array. Inserts GT_PHI nodes at the beginning // of basic blocks that require them. - void InsertPhiFunctions(BasicBlock** postOrder, int count); + void InsertPhiFunctions(); // Rename all definitions and uses within the compiled method. void RenameVariables(); @@ -110,6 +96,4 @@ class SsaBuilder BitVec m_visited; SsaRenameState m_renameStack; - BasicBlock** m_postOrder = nullptr; - unsigned m_postOrderCount = 0; }; diff --git a/src/coreclr/jit/valuenum.cpp b/src/coreclr/jit/valuenum.cpp index 774ddd877c0bed..433df197a7d92d 100644 --- a/src/coreclr/jit/valuenum.cpp +++ b/src/coreclr/jit/valuenum.cpp @@ -9668,8 +9668,7 @@ class ValueNumberState // bool IsReachable(BasicBlock* bb) { - return (bb->bbPostorderNum < m_comp->fgSSAPostOrderCount) && - (m_comp->fgSSAPostOrder[bb->bbPostorderNum] == bb) && + return m_comp->m_dfs->Contains(bb) && !BitVecOps::IsMember(&m_blockTraits, m_provenUnreachableBlocks, bb->bbNum); } @@ -9850,9 +9849,11 @@ PhaseStatus Compiler::fgValueNumber() // SSA has already computed a post-order taking EH successors into account. // Visiting that in reverse will ensure we visit a block's predecessors // before itself whenever possible. - for (unsigned i = fgSSAPostOrderCount; i != 0; i--) + BasicBlock** postOrder = m_dfs->GetPostOrder(); + unsigned postOrderCount = m_dfs->GetPostOrderCount(); + for (unsigned i = postOrderCount; i != 0; i--) { - BasicBlock* block = fgSSAPostOrder[i - 1]; + BasicBlock* block = postOrder[i - 1]; JITDUMP("Visiting " FMT_BB "\n", block->bbNum); if (block != fgFirstBB) From 66269abdddb514efd78b852daa0bada92e944241 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sun, 26 Nov 2023 23:50:28 +0100 Subject: [PATCH 2/4] Nit --- src/coreclr/jit/compiler.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index ba8603de78debb..d0d8f05c37e146 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -2080,9 +2080,9 @@ class FlowGraphNaturalLoops return m_loops.size(); } - bool HasNonNaturalLoopCycles() + bool HaveNonNaturalLoopCycles() { - return m_improperLoopHeaders> 0; + return m_improperLoopHeaders > 0; } FlowGraphNaturalLoop* GetLoopFromHeader(BasicBlock* header); From bdfa7af14471e80337e1d7f10d35e0695847439c Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Mon, 27 Nov 2023 15:05:33 +0100 Subject: [PATCH 3/4] Fix a source of diffs With more general cycles the loop below that looks at predecessors could see uninitialized weights, so this is definitely needed. --- src/coreclr/jit/fgprofilesynthesis.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/coreclr/jit/fgprofilesynthesis.cpp b/src/coreclr/jit/fgprofilesynthesis.cpp index d048caa504cb78..400be0f16107dd 100644 --- a/src/coreclr/jit/fgprofilesynthesis.cpp +++ b/src/coreclr/jit/fgprofilesynthesis.cpp @@ -704,6 +704,13 @@ void ProfileSynthesis::ComputeCyclicProbabilities() // void ProfileSynthesis::ComputeCyclicProbabilities(FlowGraphNaturalLoop* loop) { + // Initialize + // + loop->VisitLoopBlocks([](BasicBlock* loopBlock) { + loopBlock->bbWeight = 0.0; + return BasicBlockVisit::Continue; + }); + // Process loop blocks in RPO. Just takes one pass through the loop blocks // as any cyclic contributions are handled by cyclic probabilities. // From 883b5caec8bae3c46b08493c2d6aa7b97af83898 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Mon, 27 Nov 2023 15:07:16 +0100 Subject: [PATCH 4/4] Clean ups --- src/coreclr/jit/compiler.h | 2 +- src/coreclr/jit/compphases.h | 1 - src/coreclr/jit/fgprofilesynthesis.cpp | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index d0d8f05c37e146..745372a33b02e2 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -2074,6 +2074,7 @@ class FlowGraphNaturalLoops FlowGraphNaturalLoops(const FlowGraphDfsTree* dfs); + static bool FindNaturalLoopBlocks(FlowGraphNaturalLoop* loop, jitstd::list& worklist); public: size_t NumLoops() { @@ -2145,7 +2146,6 @@ class FlowGraphNaturalLoops } static FlowGraphNaturalLoops* Find(const FlowGraphDfsTree* dfs); - static bool FindNaturalLoopBlocks(FlowGraphNaturalLoop* loop, jitstd::list& worklist); }; // The following holds information about instr offsets in terms of generated code. diff --git a/src/coreclr/jit/compphases.h b/src/coreclr/jit/compphases.h index 486a6de7ce5f83..0473b7e4da90b2 100644 --- a/src/coreclr/jit/compphases.h +++ b/src/coreclr/jit/compphases.h @@ -77,7 +77,6 @@ CompPhaseNameMacro(PHASE_SWITCH_RECOGNITION, "Recognize Switch", CompPhaseNameMacro(PHASE_FIND_OPER_ORDER, "Find oper order", false, -1, false) CompPhaseNameMacro(PHASE_SET_BLOCK_ORDER, "Set block order", false, -1, true) CompPhaseNameMacro(PHASE_BUILD_SSA, "Build SSA representation", true, -1, false) -CompPhaseNameMacro(PHASE_BUILD_SSA_TOPOSORT, "SSA: topological sort", false, PHASE_BUILD_SSA, false) CompPhaseNameMacro(PHASE_BUILD_SSA_DOMS, "SSA: Doms1", false, PHASE_BUILD_SSA, false) CompPhaseNameMacro(PHASE_BUILD_SSA_LIVENESS, "SSA: liveness", false, PHASE_BUILD_SSA, false) CompPhaseNameMacro(PHASE_BUILD_SSA_DF, "SSA: DF", false, PHASE_BUILD_SSA, false) diff --git a/src/coreclr/jit/fgprofilesynthesis.cpp b/src/coreclr/jit/fgprofilesynthesis.cpp index 400be0f16107dd..6b3b2fff2dee1d 100644 --- a/src/coreclr/jit/fgprofilesynthesis.cpp +++ b/src/coreclr/jit/fgprofilesynthesis.cpp @@ -709,7 +709,7 @@ void ProfileSynthesis::ComputeCyclicProbabilities(FlowGraphNaturalLoop* loop) loop->VisitLoopBlocks([](BasicBlock* loopBlock) { loopBlock->bbWeight = 0.0; return BasicBlockVisit::Continue; - }); + }); // Process loop blocks in RPO. Just takes one pass through the loop blocks // as any cyclic contributions are handled by cyclic probabilities.